Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_cpu
35 use num_types
36 use gs_bcknd
37 use gs_ops
38 use, intrinsic :: iso_c_binding, only : c_ptr
39 implicit none
40 private
41
43 type, public, extends(gs_bcknd_t) :: gs_cpu_t
44 contains
45 procedure, pass(this) :: init => gs_cpu_init
46 procedure, pass(this) :: free => gs_cpu_free
47 procedure, pass(this) :: gather => gs_gather_cpu
48 procedure, pass(this) :: scatter => gs_scatter_cpu
49 end type gs_cpu_t
50
51contains
52
54 subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
55 class(gs_cpu_t), intent(inout) :: this
56 integer, intent(in) :: nlocal
57 integer, intent(in) :: nshared
58 integer, intent(in) :: nlcl_blks
59 integer, intent(in) :: nshrd_blks
60 end subroutine gs_cpu_init
61
63 subroutine gs_cpu_free(this)
64 class(gs_cpu_t), intent(inout) :: this
65 end subroutine gs_cpu_free
66
68 subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, bo, op, shrd)
69 integer, intent(in) :: m
70 integer, intent(in) :: n
71 integer, intent(in) :: nb
72 class(gs_cpu_t), intent(inout) :: this
73 real(kind=rp), dimension(m), intent(inout) :: v
74 integer, dimension(m), intent(inout) :: dg
75 real(kind=rp), dimension(n), intent(inout) :: u
76 integer, dimension(m), intent(inout) :: gd
77 integer, dimension(nb), intent(inout) :: b
78 integer, dimension(nb), intent(inout) :: bo
79 integer, intent(in) :: o
80 integer, intent(in) :: op
81 logical, intent(in) :: shrd
82
83 select case(op)
84 case (gs_op_add)
85 call gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, bo)
86 case (gs_op_mul)
87 call gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, bo)
88 case (gs_op_min)
89 call gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, bo)
90 case (gs_op_max)
91 call gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, bo)
92 end select
93
94 end subroutine gs_gather_cpu
95
98 subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, bo)
99 integer, intent(in) :: m
100 integer, intent(in) :: n
101 integer, intent(in) :: nb
102 real(kind=rp), dimension(m), intent(inout) :: v
103 integer, dimension(m), intent(inout) :: dg
104 real(kind=rp), dimension(n), intent(inout) :: u
105 integer, dimension(m), intent(inout) :: gd
106 integer, dimension(nb), intent(inout) :: b
107 integer, dimension(nb), intent(inout) :: bo
108 integer, intent(in) :: o
109 integer :: i, j, k, blk_len
110 real(kind=rp) :: tmp
111
112 do concurrent(i = 1:nb)
113 k = bo(i)
114 blk_len = b(i)
115 tmp = u(gd(k + 1))
116 do j = 2, blk_len
117 tmp = tmp + u(gd(k + j))
118 end do
119 v(dg(k + 1)) = tmp
120 end do
121
122 if (o .lt. 0) then
123 do concurrent(i = abs(o):m)
124 v(dg(i)) = u(gd(i))
125 end do
126 else
127 do concurrent(i = o:m:2)
128 v(dg(i)) = u(gd(i)) + u(gd(i+1))
129 end do
130 end if
131
132 end subroutine gs_gather_kernel_add
133
136 subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, bo)
137 integer, intent(in) :: m
138 integer, intent(in) :: n
139 integer, intent(in) :: nb
140 real(kind=rp), dimension(m), intent(inout) :: v
141 integer, dimension(m), intent(inout) :: dg
142 real(kind=rp), dimension(n), intent(inout) :: u
143 integer, dimension(m), intent(inout) :: gd
144 integer, dimension(nb), intent(inout) :: b
145 integer, dimension(nb), intent(inout) :: bo
146 integer, intent(in) :: o
147 integer :: i, j, k, blk_len
148 real(kind=rp) :: tmp
149
150 do concurrent(i = 1:nb)
151 k = bo(i)
152 blk_len = b(i)
153 tmp = u(gd(k + 1))
154 do j = 2, blk_len
155 tmp = tmp * u(gd(k + j))
156 end do
157 v(dg(k + 1)) = tmp
158 end do
159
160 if (o .lt. 0) then
161 do concurrent(i = abs(o):m)
162 v(dg(i)) = u(gd(i))
163 end do
164 else
165 do concurrent(i = o:m:2)
166 v(dg(i)) = u(gd(i)) * u(gd(i+1))
167 end do
168 end if
169
170 end subroutine gs_gather_kernel_mul
171
174 subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, bo)
175 integer, intent(in) :: m
176 integer, intent(in) :: n
177 integer, intent(in) :: nb
178 real(kind=rp), dimension(m), intent(inout) :: v
179 integer, dimension(m), intent(inout) :: dg
180 real(kind=rp), dimension(n), intent(inout) :: u
181 integer, dimension(m), intent(inout) :: gd
182 integer, dimension(nb), intent(inout) :: b
183 integer, dimension(nb), intent(inout) :: bo
184 integer, intent(in) :: o
185 integer :: i, j, k, blk_len
186 real(kind=rp) :: tmp
187
188 do concurrent(i = 1:nb)
189 k = bo(i)
190 blk_len = b(i)
191 tmp = u(gd(k + 1))
192 do j = 2, blk_len
193 tmp = min(tmp, u(gd(k + j)))
194 end do
195 v(dg(k + 1)) = tmp
196 end do
197
198 if (o .lt. 0) then
199 do concurrent(i = abs(o):m)
200 v(dg(i)) = u(gd(i))
201 end do
202 else
203 do concurrent(i = o:m:2)
204 v(dg(i)) = min(u(gd(i)), u(gd(i+1)))
205 end do
206 end if
207
208 end subroutine gs_gather_kernel_min
209
212 subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, bo)
213 integer, intent(in) :: m
214 integer, intent(in) :: n
215 integer, intent(in) :: nb
216 real(kind=rp), dimension(m), intent(inout) :: v
217 integer, dimension(m), intent(inout) :: dg
218 real(kind=rp), dimension(n), intent(inout) :: u
219 integer, dimension(m), intent(inout) :: gd
220 integer, dimension(nb), intent(inout) :: b
221 integer, dimension(nb), intent(inout) :: bo
222 integer, intent(in) :: o
223 integer :: i, j, k, blk_len
224 real(kind=rp) :: tmp
225
226 do concurrent(i = 1:nb)
227 k = bo(i)
228 blk_len = b(i)
229 tmp = u(gd(k + 1))
230 do j = 2, blk_len
231 tmp = max(tmp, u(gd(k + j)))
232 end do
233 v(dg(k + 1)) = tmp
234 end do
235
236 if (o .lt. 0) then
237 do concurrent(i = abs(o):m)
238 v(dg(i)) = u(gd(i))
239 end do
240 else
241 do concurrent(i = o:m:2)
242 v(dg(i)) = max(u(gd(i)), u(gd(i+1)))
243 end do
244 end if
245
246 end subroutine gs_gather_kernel_max
247
249 subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, bo, shrd, event)
250 integer, intent(in) :: m
251 integer, intent(in) :: n
252 integer, intent(in) :: nb
253 class(gs_cpu_t), intent(inout) :: this
254 real(kind=rp), dimension(m), intent(inout) :: v
255 integer, dimension(m), intent(inout) :: dg
256 real(kind=rp), dimension(n), intent(inout) :: u
257 integer, dimension(m), intent(inout) :: gd
258 integer, dimension(nb), intent(inout) :: b
259 integer, dimension(nb), intent(inout) :: bo
260 logical, intent(in) :: shrd
261 type(c_ptr) :: event
262
263 call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, bo)
264
265 end subroutine gs_scatter_cpu
266
268 subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, bo)
269 integer, intent(in) :: m
270 integer, intent(in) :: n
271 integer, intent(in) :: nb
272 real(kind=rp), dimension(m), intent(inout) :: v
273 integer, dimension(m), intent(inout) :: dg
274 real(kind=rp), dimension(n), intent(inout) :: u
275 integer, dimension(m), intent(inout) :: gd
276 integer, dimension(nb), intent(inout) :: b
277 integer, dimension(nb), intent(inout) :: bo
278 integer :: i, j, k, blk_len
279 real(kind=rp) :: tmp
280
281 do concurrent(i = 1:nb)
282 k = bo(i)
283 blk_len = b(i)
284 tmp = v(dg(k + 1))
285 do j = 1, blk_len
286 u(gd(k + j)) = tmp
287 end do
288 end do
289
290 do concurrent(i = (bo(nb) + b(nb) + 1):m)
291 u(gd(i)) = v(dg(i))
292 end do
293
294 end subroutine gs_scatter_kernel
295
296end module gs_cpu
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, bo)
Scatter kernel .
Definition gs_cpu.f90:269
subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
Dummy backend initialisation.
Definition gs_cpu.f90:55
subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, bo)
Gather kernel for addition of data .
Definition gs_cpu.f90:99
subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, bo)
Gather kernel for multiplication of data .
Definition gs_cpu.f90:137
subroutine gs_cpu_free(this)
Dummy backend deallocation.
Definition gs_cpu.f90:64
subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, bo, shrd, event)
Scatter kernel.
Definition gs_cpu.f90:250
subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, bo, op, shrd)
Gather kernel.
Definition gs_cpu.f90:69
subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, bo)
Gather kernel for minimum of data .
Definition gs_cpu.f90:175
subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, bo)
Gather kernel for maximum of data .
Definition gs_cpu.f90:213
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
#define max(a, b)
Definition tensor.cu:40