Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
44 use, intrinsic :: iso_c_binding
45 implicit none
46 private
47
50
51#ifdef HAVE_HIP
52 interface
53 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
54 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
55 bind(c, name = 'hip_dudxyz')
56 use, intrinsic :: iso_c_binding
57 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
58 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
59 integer(c_int) :: nel, lx
60 end subroutine hip_dudxyz
61 end interface
62
63 interface
64 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
65 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
66 bind(c, name = 'hip_cdtp')
67 use, intrinsic :: iso_c_binding
68 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
69 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
70 integer(c_int) :: nel, lx
71 end subroutine hip_cdtp
72 end interface
73
74 interface
75 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
76 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
77 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
78 jacinv_d, nel, gdim, lx) &
79 bind(c, name = 'hip_conv1')
80 use, intrinsic :: iso_c_binding
81 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
82 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
83 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
84 type(c_ptr), value :: jacinv_d
85 integer(c_int) :: nel, gdim, lx
86 end subroutine hip_conv1
87 end interface
88
89 interface
90 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
91 dx_d, dy_d, dz_d, &
92 drdx_d, dsdx_d, dtdx_d, &
93 drdy_d, dsdy_d, dtdy_d, &
94 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
95 bind(c, name = 'hip_opgrad')
96 use, intrinsic :: iso_c_binding
97 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
98 type(c_ptr), value :: dx_d, dy_d, dz_d
99 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
100 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
101 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
102 type(c_ptr), value :: w3_d
103 integer(c_int) :: nel, lx
104 end subroutine hip_opgrad
105 end interface
106
107 interface
108 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
109 dx_d, dy_d, dz_d, &
110 drdx_d, dsdx_d, dtdx_d, &
111 drdy_d, dsdy_d, dtdy_d, &
112 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
113 bind(c, name = 'hip_lambda2')
114 use, intrinsic :: iso_c_binding
115 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
116 type(c_ptr), value :: dx_d, dy_d, dz_d
117 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
118 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
119 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
120 type(c_ptr), value :: jacinv_d
121 integer(c_int) :: nel, lx
122 end subroutine hip_lambda2
123 end interface
124
125 interface
126 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
127 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
128 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
129 jacinv_d, nel, lx) &
130 bind(c, name = 'hip_cfl')
131 use, intrinsic :: iso_c_binding
132 import c_rp
133 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
134 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
135 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
136 real(c_rp) :: dt
137 integer(c_int) :: nel, lx
138 end function hip_cfl
139 end interface
140
141
142
143
144#elif HAVE_CUDA
145 interface
146 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
147 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
148 bind(c, name = 'cuda_dudxyz')
149 use, intrinsic :: iso_c_binding
150 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
151 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
152 integer(c_int) :: nel, lx
153 end subroutine cuda_dudxyz
154 end interface
155
156 interface
157 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
158 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
159 bind(c, name = 'cuda_cdtp')
160 use, intrinsic :: iso_c_binding
161 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
162 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
163 integer(c_int) :: nel, lx
164 end subroutine cuda_cdtp
165 end interface
166
167 interface
168 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
169 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
170 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
171 jacinv_d, nel, gdim, lx) &
172 bind(c, name = 'cuda_conv1')
173 use, intrinsic :: iso_c_binding
174 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
175 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
176 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
177 type(c_ptr), value :: jacinv_d
178 integer(c_int) :: nel, gdim, lx
179 end subroutine cuda_conv1
180 end interface
181
182 interface
183 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
184 dx_d, dy_d, dz_d, &
185 drdx_d, dsdx_d, dtdx_d, &
186 drdy_d, dsdy_d, dtdy_d, &
187 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
188 bind(c, name = 'cuda_opgrad')
189 use, intrinsic :: iso_c_binding
190 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
191 type(c_ptr), value :: dx_d, dy_d, dz_d
192 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
193 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
194 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
195 type(c_ptr), value :: w3_d
196 integer(c_int) :: nel, lx
197 end subroutine cuda_opgrad
198 end interface
199
200 interface
201 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
202 dx_d, dy_d, dz_d, &
203 drdx_d, dsdx_d, dtdx_d, &
204 drdy_d, dsdy_d, dtdy_d, &
205 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
206 bind(c, name = 'cuda_lambda2')
207 use, intrinsic :: iso_c_binding
208 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
209 type(c_ptr), value :: dx_d, dy_d, dz_d
210 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
211 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
212 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
213 type(c_ptr), value :: jacinv_d
214 integer(c_int) :: nel, lx
215 end subroutine cuda_lambda2
216 end interface
217
218 interface
219 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
220 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
221 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
222 jacinv_d, nel, lx) &
223 bind(c, name = 'cuda_cfl')
224 use, intrinsic :: iso_c_binding
225 import c_rp
226 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
227 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
228 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
229 real(c_rp) :: dt
230 integer(c_int) :: nel, lx
231 end function cuda_cfl
232 end interface
233
234
235
236
237#elif HAVE_OPENCL
238 interface
239 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
240 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
241 bind(c, name = 'opencl_dudxyz')
242 use, intrinsic :: iso_c_binding
243 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
244 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
245 integer(c_int) :: nel, lx
246 end subroutine opencl_dudxyz
247 end interface
248
249 interface
250 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
251 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
252 bind(c, name = 'opencl_cdtp')
253 use, intrinsic :: iso_c_binding
254 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
255 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
256 integer(c_int) :: nel, lx
257 end subroutine opencl_cdtp
258 end interface
259
260 interface
261 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
262 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
263 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
264 jacinv_d, nel, gdim, lx) &
265 bind(c, name = 'opencl_conv1')
266 use, intrinsic :: iso_c_binding
267 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
268 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
269 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
270 type(c_ptr), value :: jacinv_d
271 integer(c_int) :: nel, gdim, lx
272 end subroutine opencl_conv1
273 end interface
274
275 interface
276 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
277 dx_d, dy_d, dz_d, &
278 drdx_d, dsdx_d, dtdx_d, &
279 drdy_d, dsdy_d, dtdy_d, &
280 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
281 bind(c, name = 'opencl_opgrad')
282 use, intrinsic :: iso_c_binding
283 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
284 type(c_ptr), value :: dx_d, dy_d, dz_d
285 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
286 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
287 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
288 type(c_ptr), value :: w3_d
289 integer(c_int) :: nel, lx
290 end subroutine opencl_opgrad
291 end interface
292
293 interface
294 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
295 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
296 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
297 jacinv_d, nel, lx) &
298 bind(c, name = 'opencl_cfl')
299 use, intrinsic :: iso_c_binding
300 import c_rp
301 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
302 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
303 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
304 real(c_rp) :: dt
305 integer(c_int) :: nel, lx
306 end function opencl_cfl
307 end interface
308
309 interface
310 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
311 dx_d, dy_d, dz_d, &
312 drdx_d, dsdx_d, dtdx_d, &
313 drdy_d, dsdy_d, dtdy_d, &
314 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
315 bind(c, name = 'opencl_lambda2')
316 use, intrinsic :: iso_c_binding
317 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
318 type(c_ptr), value :: dx_d, dy_d, dz_d
319 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
320 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
321 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
322 type(c_ptr), value :: jacinv_d
323 integer(c_int) :: nel, lx
324 end subroutine opencl_lambda2
325 end interface
326
327
328#endif
329
330contains
331
332 subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
333 type(coef_t), intent(in), target :: coef
334 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
335 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
336 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
337
338 du_d = device_get_ptr(du)
339 u_d = device_get_ptr(u)
340
341 dr_d = device_get_ptr(dr)
342 ds_d = device_get_ptr(ds)
343 dt_d = device_get_ptr(dt)
344
345 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
346#ifdef HAVE_HIP
347 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
348 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
349 msh%nelv, xh%lx)
350#elif HAVE_CUDA
351 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
352 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
353 msh%nelv, xh%lx)
354#elif HAVE_OPENCL
355 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
356 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
357 msh%nelv, xh%lx)
358#else
359 call neko_error('No device backend configured')
360#endif
361 end associate
362
363 end subroutine opr_device_dudxyz
364
365 subroutine opr_device_opgrad(ux, uy, uz, u, coef)
366 type(coef_t), intent(in) :: coef
367 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
368 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
369 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
370 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
371 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
372
373 ux_d = device_get_ptr(ux)
374 uy_d = device_get_ptr(uy)
375 uz_d = device_get_ptr(uz)
376
377 u_d = device_get_ptr(u)
378
379 associate(xh => coef%Xh, msh => coef%msh)
380#ifdef HAVE_HIP
381 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
382 xh%dx_d, xh%dy_d, xh%dz_d, &
383 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
384 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
385 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
386 xh%w3_d, msh%nelv, xh%lx)
387#elif HAVE_CUDA
388 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
389 xh%dx_d, xh%dy_d, xh%dz_d, &
390 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
391 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
392 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
393 xh%w3_d, msh%nelv, xh%lx)
394#elif HAVE_OPENCL
395 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
396 xh%dx_d, xh%dy_d, xh%dz_d, &
397 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
398 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
399 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
400 xh%w3_d, msh%nelv, xh%lx)
401#else
402 call neko_error('No device backend configured')
403#endif
404 end associate
405
406 end subroutine opr_device_opgrad
407 subroutine opr_device_lambda2(lambda2, u, v, w, coef)
408 type(coef_t), intent(in) :: coef
409 type(field_t), intent(inout) :: lambda2
410 type(field_t), intent(in) :: u, v, w
411#ifdef HAVE_HIP
412 call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
413 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
414 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
415 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
416 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
417 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
418#elif HAVE_CUDA
419 call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
420 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
421 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
422 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
423 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
424 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
425#elif HAVE_OPENCL
426 call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
427 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
428 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
429 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
430 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
431 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
432#else
433 call neko_error('No device backend configured')
434#endif
435 end subroutine opr_device_lambda2
436
437 subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
438 type(coef_t), intent(in) :: coef
439 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
440 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
441 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
442 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
443 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
444 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
445
446 dtx_d = device_get_ptr(dtx)
447 x_d = device_get_ptr(x)
448
449 dr_d = device_get_ptr(dr)
450 ds_d = device_get_ptr(ds)
451 dt_d = device_get_ptr(dt)
452
453 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
454#ifdef HAVE_HIP
455 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
456 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
457 msh%nelv, xh%lx)
458#elif HAVE_CUDA
459 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
460 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
461 msh%nelv, xh%lx)
462#elif HAVE_OPENCL
463 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
464 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
465 msh%nelv, xh%lx)
466#else
467 call neko_error('No device backend configured')
468#endif
469 end associate
470
471 end subroutine opr_device_cdtp
472
473 subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
474 type(space_t), intent(in) :: xh
475 type(coef_t), intent(in) :: coef
476 integer, intent(in) :: nelv, gdim
477 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
478 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
479 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
480 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
481 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
482 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
483
484 du_d = device_get_ptr(du)
485 u_d = device_get_ptr(u)
486
487 vx_d = device_get_ptr(vx)
488 vy_d = device_get_ptr(vy)
489 vz_d = device_get_ptr(vz)
490
491 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
492#ifdef HAVE_HIP
493 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
494 xh%dx_d, xh%dy_d, xh%dz_d, &
495 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
496 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
497 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
498 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
499#elif HAVE_CUDA
500 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
501 xh%dx_d, xh%dy_d, xh%dz_d, &
502 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
503 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
504 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
505 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
506#elif HAVE_OPENCL
507 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
508 xh%dx_d, xh%dy_d, xh%dz_d, &
509 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
510 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
511 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
512 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
513#else
514 call neko_error('No device backend configured')
515#endif
516 end associate
517
518 end subroutine opr_device_conv1
519
520 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
521 type(field_t), intent(inout) :: w1
522 type(field_t), intent(inout) :: w2
523 type(field_t), intent(inout) :: w3
524 type(field_t), intent(in) :: u1
525 type(field_t), intent(in) :: u2
526 type(field_t), intent(in) :: u3
527 type(field_t), intent(inout) :: work1
528 type(field_t), intent(inout) :: work2
529 type(coef_t), intent(in) :: c_xh
530 type(c_ptr), optional, intent(inout) :: event
531 integer :: gdim, n, nelv
532
533 n = w1%dof%size()
534 gdim = c_xh%msh%gdim
535 nelv = c_xh%msh%nelv
536
537 ! this%work1=dw/dy ; this%work2=dv/dz
538#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
539#ifdef HAVE_HIP
540 call hip_dudxyz(work1%x_d, u3%x_d, &
541 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
542 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
543 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
544#elif HAVE_CUDA
545 call cuda_dudxyz(work1%x_d, u3%x_d, &
546 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
547 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
548 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
549#elif HAVE_OPENCL
550 call opencl_dudxyz(work1%x_d, u3%x_d, &
551 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
552 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
553 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
554#endif
555 if (gdim .eq. 3) then
556#ifdef HAVE_HIP
557 call hip_dudxyz(work2%x_d, u2%x_d, &
558 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
559 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
560 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
561#elif HAVE_CUDA
562 call cuda_dudxyz(work2%x_d, u2%x_d, &
563 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
564 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
565 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
566#elif HAVE_OPENCL
567 call opencl_dudxyz(work2%x_d, u2%x_d, &
568 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
569 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
570 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
571#endif
572 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
573 else
574 call device_copy(w1%x_d, work1%x_d, n)
575 endif
576 ! this%work1=du/dz ; this%work2=dw/dx
577 if (gdim .eq. 3) then
578#ifdef HAVE_HIP
579 call hip_dudxyz(work1%x_d, u1%x_d, &
580 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
581 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
582 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
583 call hip_dudxyz(work2%x_d, u3%x_d, &
584 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
585 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
586 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
587#elif HAVE_CUDA
588 call cuda_dudxyz(work1%x_d, u1%x_d, &
589 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
590 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
591 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
592 call cuda_dudxyz(work2%x_d, u3%x_d, &
593 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
594 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
595 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
596#elif HAVE_OPENCL
597 call opencl_dudxyz(work1%x_d, u1%x_d, &
598 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
599 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
600 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
601 call opencl_dudxyz(work2%x_d, u3%x_d, &
602 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
603 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
604 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
605#endif
606 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
607 else
608 call device_rzero (work1%x_d, n)
609#ifdef HAVE_HIP
610 call hip_dudxyz(work2%x_d, u3%x_d, &
611 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
612 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
613 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
614#elif HAVE_CUDA
615 call cuda_dudxyz(work2%x_d, u3%x_d, &
616 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
617 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
618 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
619#elif HAVE_OPENCL
620 call opencl_dudxyz(work2%x_d, u3%x_d, &
621 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
622 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
623 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
624#endif
625 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
626 endif
627 ! this%work1=dv/dx ; this%work2=du/dy
628#ifdef HAVE_HIP
629 call hip_dudxyz(work1%x_d, u2%x_d, &
630 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
631 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
632 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
633 call hip_dudxyz(work2%x_d, u1%x_d, &
634 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
635 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
636 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
637#elif HAVE_CUDA
638 call cuda_dudxyz(work1%x_d, u2%x_d, &
639 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
640 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
641 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
642 call cuda_dudxyz(work2%x_d, u1%x_d, &
643 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
644 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
645 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
646#elif HAVE_OPENCL
647 call opencl_dudxyz(work1%x_d, u2%x_d, &
648 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
649 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
650 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
651 call opencl_dudxyz(work2%x_d, u1%x_d, &
652 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
653 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
654 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
655#endif
656 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
657 !! BC dependent, Needs to change if cyclic
658
659 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
660
661 if (present(event)) then
662 call c_xh%gs_h%op(w1, gs_op_add, event)
663 call device_event_sync(event)
664 call c_xh%gs_h%op(w2, gs_op_add, event)
665 call device_event_sync(event)
666 call c_xh%gs_h%op(w3, gs_op_add, event)
667 call device_event_sync(event)
668 else
669 call c_xh%gs_h%op(w1, gs_op_add)
670 call c_xh%gs_h%op(w2, gs_op_add)
671 call c_xh%gs_h%op(w3, gs_op_add)
672 end if
673
674 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
675
676#else
677 call neko_error('No device backend configured')
678#endif
679
680 end subroutine opr_device_curl
681
682 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
683 type(space_t) :: xh
684 type(coef_t) :: coef
685 integer :: nelv, gdim
686 real(kind=rp) :: dt
687 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
688 real(kind=rp) :: cfl
689 type(c_ptr) :: u_d, v_d, w_d
690
691 u_d = device_get_ptr(u)
692 v_d = device_get_ptr(v)
693 w_d = device_get_ptr(w)
694
695#ifdef HAVE_HIP
696 cfl = hip_cfl(dt, u_d, v_d, w_d, &
697 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
698 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
699 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
700 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
701 coef%jacinv_d, nelv, xh%lx)
702#elif HAVE_CUDA
703 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
704 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
705 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
706 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
707 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
708 coef%jacinv_d, nelv, xh%lx)
709#elif HAVE_OPENCL
710 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
711 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
712 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
713 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
714 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
715 coef%jacinv_d, nelv, xh%lx)
716#else
717 cfl = 0.0_rp
718 call neko_error('No device backend configured')
719#endif
720 end function opr_device_cfl
721
722
723
724end module opr_device
725
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1309
Defines a field.
Definition field.f90:34
Gather-scatter.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:53
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:53
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:53
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:53
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62