Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2021, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_cpu
35 use num_types
36 use gs_bcknd
37 use gs_ops
38 use, intrinsic :: iso_c_binding, only : c_ptr
39 implicit none
40 private
41
43 type, public, extends(gs_bcknd_t) :: gs_cpu_t
44 contains
45 procedure, pass(this) :: init => gs_cpu_init
46 procedure, pass(this) :: free => gs_cpu_free
47 procedure, pass(this) :: gather => gs_gather_cpu
48 procedure, pass(this) :: scatter => gs_scatter_cpu
49 end type gs_cpu_t
50
51contains
52
54 subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
55 class(gs_cpu_t), intent(inout) :: this
56 integer, intent(in) :: nlocal
57 integer, intent(in) :: nshared
58 integer, intent(in) :: nlcl_blks
59 integer, intent(in) :: nshrd_blks
60 end subroutine gs_cpu_init
61
63 subroutine gs_cpu_free(this)
64 class(gs_cpu_t), intent(inout) :: this
65 end subroutine gs_cpu_free
66
68 subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
69 integer, intent(in) :: m
70 integer, intent(in) :: n
71 integer, intent(in) :: nb
72 class(gs_cpu_t), intent(inout) :: this
73 real(kind=rp), dimension(m), intent(inout) :: v
74 integer, dimension(m), intent(inout) :: dg
75 real(kind=rp), dimension(n), intent(inout) :: u
76 integer, dimension(m), intent(inout) :: gd
77 integer, dimension(nb), intent(inout) :: b
78 integer, intent(in) :: o
79 integer, intent(in) :: op
80 logical, intent(in) :: shrd
81
82 select case(op)
83 case (gs_op_add)
84 call gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
85 case (gs_op_mul)
86 call gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
87 case (gs_op_min)
88 call gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
89 case (gs_op_max)
90 call gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
91 end select
92
93 end subroutine gs_gather_cpu
94
97 subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
98 integer, intent(in) :: m
99 integer, intent(in) :: n
100 integer, intent(in) :: nb
101 real(kind=rp), dimension(m), intent(inout) :: v
102 integer, dimension(m), intent(inout) :: dg
103 real(kind=rp), dimension(n), intent(inout) :: u
104 integer, dimension(m), intent(inout) :: gd
105 integer, dimension(nb), intent(inout) :: b
106 integer, intent(in) :: o
107 integer :: i, j, k, blk_len
108 real(kind=rp) :: tmp
109
110 k = 0
111 do i = 1, nb
112 blk_len = b(i)
113 tmp = u(gd(k + 1))
114 do j = 2, blk_len
115 tmp = tmp + u(gd(k + j))
116 end do
117 v(dg(k + 1)) = tmp
118 k = k + blk_len
119 end do
120
121 if (o .lt. 0) then
122 do i = abs(o), m
123 v(dg(i)) = u(gd(i))
124 end do
125 else
126 do i = o, m, 2
127 tmp = u(gd(i)) + u(gd(i+1))
128 v(dg(i)) = tmp
129 end do
130 end if
131
132 end subroutine gs_gather_kernel_add
133
136 subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
137 integer, intent(in) :: m
138 integer, intent(in) :: n
139 integer, intent(in) :: nb
140 real(kind=rp), dimension(m), intent(inout) :: v
141 integer, dimension(m), intent(inout) :: dg
142 real(kind=rp), dimension(n), intent(inout) :: u
143 integer, dimension(m), intent(inout) :: gd
144 integer, dimension(nb), intent(inout) :: b
145 integer, intent(in) :: o
146 integer :: i, j, k, blk_len
147 real(kind=rp) :: tmp
148
149 k = 0
150 do i = 1, nb
151 blk_len = b(i)
152 tmp = u(gd(k + 1))
153 do j = 2, blk_len
154 tmp = tmp * u(gd(k + j))
155 end do
156 v(dg(k + 1)) = tmp
157 k = k + blk_len
158 end do
159
160 if (o .lt. 0) then
161 do i = abs(o), m
162 v(dg(i)) = u(gd(i))
163 end do
164 else
165 do i = o, m, 2
166 tmp = u(gd(i)) * u(gd(i+1))
167 v(dg(i)) = tmp
168 end do
169 end if
170
171 end subroutine gs_gather_kernel_mul
172
175 subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
176 integer, intent(in) :: m
177 integer, intent(in) :: n
178 integer, intent(in) :: nb
179 real(kind=rp), dimension(m), intent(inout) :: v
180 integer, dimension(m), intent(inout) :: dg
181 real(kind=rp), dimension(n), intent(inout) :: u
182 integer, dimension(m), intent(inout) :: gd
183 integer, dimension(nb), intent(inout) :: b
184 integer, intent(in) :: o
185 integer :: i, j, k, blk_len
186 real(kind=rp) :: tmp
187
188 k = 0
189 do i = 1, nb
190 blk_len = b(i)
191 tmp = u(gd(k + 1))
192 do j = 2, blk_len
193 tmp = min(tmp, u(gd(k + j)))
194 end do
195 v(dg(k + 1)) = tmp
196 k = k + blk_len
197 end do
198
199 if (o .lt. 0) then
200 do i = abs(o), m
201 v(dg(i)) = u(gd(i))
202 end do
203 else
204 do i = o, m, 2
205 tmp = min(u(gd(i)), u(gd(i+1)))
206 v(dg(i)) = tmp
207 end do
208 end if
209
210 end subroutine gs_gather_kernel_min
211
214 subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
215 integer, intent(in) :: m
216 integer, intent(in) :: n
217 integer, intent(in) :: nb
218 real(kind=rp), dimension(m), intent(inout) :: v
219 integer, dimension(m), intent(inout) :: dg
220 real(kind=rp), dimension(n), intent(inout) :: u
221 integer, dimension(m), intent(inout) :: gd
222 integer, dimension(nb), intent(inout) :: b
223 integer, intent(in) :: o
224 integer :: i, j, k, blk_len
225 real(kind=rp) :: tmp
226
227 k = 0
228 do i = 1, nb
229 blk_len = b(i)
230 tmp = u(gd(k + 1))
231 do j = 2, blk_len
232 tmp = max(tmp, u(gd(k + j)))
233 end do
234 v(dg(k + 1)) = tmp
235 k = k + blk_len
236 end do
237
238 if (o .lt. 0) then
239 do i = abs(o), m
240 v(dg(i)) = u(gd(i))
241 end do
242 else
243 do i = o, m, 2
244 tmp = max(u(gd(i)), u(gd(i+1)))
245 v(dg(i)) = tmp
246 end do
247 end if
248
249 end subroutine gs_gather_kernel_max
250
252 subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event)
253 integer, intent(in) :: m
254 integer, intent(in) :: n
255 integer, intent(in) :: nb
256 class(gs_cpu_t), intent(inout) :: this
257 real(kind=rp), dimension(m), intent(inout) :: v
258 integer, dimension(m), intent(inout) :: dg
259 real(kind=rp), dimension(n), intent(inout) :: u
260 integer, dimension(m), intent(inout) :: gd
261 integer, dimension(nb), intent(inout) :: b
262 logical, intent(in) :: shrd
263 type(c_ptr) :: event
264
265 call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
266
267 end subroutine gs_scatter_cpu
268
270 subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
271 integer, intent(in) :: m
272 integer, intent(in) :: n
273 integer, intent(in) :: nb
274 real(kind=rp), dimension(m), intent(inout) :: v
275 integer, dimension(m), intent(inout) :: dg
276 real(kind=rp), dimension(n), intent(inout) :: u
277 integer, dimension(m), intent(inout) :: gd
278 integer, dimension(nb), intent(inout) :: b
279 integer :: i, j, k, blk_len
280 real(kind=rp) :: tmp
281
282 k = 0
283 do i = 1, nb
284 blk_len = b(i)
285 tmp = v(dg(k + 1))
286 do j = 1, blk_len
287 u(gd(k + j)) = tmp
288 end do
289 k = k + blk_len
290 end do
291
292 do i = k + 1, m
293 u(gd(i)) = v(dg(i))
294 end do
295
296 end subroutine gs_scatter_kernel
297
298end module gs_cpu
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for addition of data .
Definition gs_cpu.f90:98
subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
Scatter kernel .
Definition gs_cpu.f90:271
subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
Dummy backend initialisation.
Definition gs_cpu.f90:55
subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event)
Scatter kernel.
Definition gs_cpu.f90:253
subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for multiplication of data .
Definition gs_cpu.f90:137
subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
Gather kernel.
Definition gs_cpu.f90:69
subroutine gs_cpu_free(this)
Dummy backend deallocation.
Definition gs_cpu.f90:64
subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for minimum of data .
Definition gs_cpu.f90:176
subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for maximum of data .
Definition gs_cpu.f90:215
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
#define max(a, b)
Definition tensor.cu:40