Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp, i8
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
46 use, intrinsic :: iso_c_binding
47 implicit none
48 private
49
55
56#ifdef HAVE_HIP
57 interface
58 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
59 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
60 bind(c, name = 'hip_dudxyz')
61 use, intrinsic :: iso_c_binding
62 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
63 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
64 integer(c_int) :: nel, lx
65 end subroutine hip_dudxyz
66 end interface
67
68 interface
69 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
70 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
71 bind(c, name = 'hip_cdtp')
72 use, intrinsic :: iso_c_binding
73 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
74 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
75 integer(c_int) :: nel, lx
76 end subroutine hip_cdtp
77 end interface
78
79 interface
80 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
81 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
82 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
83 jacinv_d, nel, gdim, lx) &
84 bind(c, name = 'hip_conv1')
85 use, intrinsic :: iso_c_binding
86 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
87 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
88 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
89 type(c_ptr), value :: jacinv_d
90 integer(c_int) :: nel, gdim, lx
91 end subroutine hip_conv1
92 end interface
93
94 interface
95 subroutine hip_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
96 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
97 use, intrinsic :: iso_c_binding
98 type(c_ptr), value :: du_d, u_d
99 type(c_ptr), value :: cr_d, cs_d, ct_d
100 type(c_ptr), value :: dx_d, dy_d, dz_d
101 integer(c_int) :: nel, lx
102 end subroutine hip_convect_scalar
103 end interface
104
105 interface
106 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
107 dx_d, dy_d, dz_d, &
108 drdx_d, dsdx_d, dtdx_d, &
109 drdy_d, dsdy_d, dtdy_d, &
110 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
111 bind(c, name = 'hip_opgrad')
112 use, intrinsic :: iso_c_binding
113 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
114 type(c_ptr), value :: dx_d, dy_d, dz_d
115 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
116 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
117 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
118 type(c_ptr), value :: w3_d
119 integer(c_int) :: nel, lx
120 end subroutine hip_opgrad
121 end interface
122
123 interface
124 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
125 dx_d, dy_d, dz_d, &
126 drdx_d, dsdx_d, dtdx_d, &
127 drdy_d, dsdy_d, dtdy_d, &
128 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
129 bind(c, name = 'hip_lambda2')
130 use, intrinsic :: iso_c_binding
131 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
132 type(c_ptr), value :: dx_d, dy_d, dz_d
133 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
134 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
135 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
136 type(c_ptr), value :: jacinv_d
137 integer(c_int) :: nel, lx
138 end subroutine hip_lambda2
139 end interface
140
141 interface
142 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
143 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
144 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
145 jacinv_d, nel, lx) &
146 bind(c, name = 'hip_cfl')
147 use, intrinsic :: iso_c_binding
148 import c_rp
149 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
150 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
151 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
152 real(c_rp) :: dt
153 integer(c_int) :: nel, lx
154 end function hip_cfl
155 end interface
156
157 interface
158 subroutine hip_rotate_cyc(vx_d, vy_d, vz_d, &
159 x_d, y_d, z_d, &
160 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
161 bind(c, name = 'hip_rotate_cyc')
162 use, intrinsic :: iso_c_binding
163 type(c_ptr), value :: vx_d, vy_d, vz_d
164 type(c_ptr), value :: x_d, y_d, z_d
165 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
166 integer(c_int) :: ncyc, idir
167 end subroutine hip_rotate_cyc
168 end interface
169
170 interface
171 subroutine hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
172 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
173 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
174 use, intrinsic :: iso_c_binding
175 type(c_ptr), value :: cr_d, cs_d, ct_d
176 type(c_ptr), value :: cx_d, cy_d, cz_d
177 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
178 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
179 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
180 type(c_ptr), value :: w3_d
181 integer(c_int) :: nel, lx
182 end subroutine hip_set_convect_rst
183 end interface
184
185#elif HAVE_CUDA
186 interface
187 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
188 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
189 bind(c, name = 'cuda_dudxyz')
190 use, intrinsic :: iso_c_binding
191 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
192 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
193 integer(c_int) :: nel, lx
194 end subroutine cuda_dudxyz
195 end interface
196
197 interface
198 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
199 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
200 bind(c, name = 'cuda_cdtp')
201 use, intrinsic :: iso_c_binding
202 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
203 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
204 integer(c_int) :: nel, lx
205 end subroutine cuda_cdtp
206 end interface
207
208 interface
209 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
210 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
211 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
212 jacinv_d, nel, gdim, lx) &
213 bind(c, name = 'cuda_conv1')
214 use, intrinsic :: iso_c_binding
215 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
216 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
217 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
218 type(c_ptr), value :: jacinv_d
219 integer(c_int) :: nel, gdim, lx
220 end subroutine cuda_conv1
221 end interface
222
223 interface
224 subroutine cuda_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
225 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
226 use, intrinsic :: iso_c_binding
227 type(c_ptr), value :: du_d, u_d
228 type(c_ptr), value :: cr_d, cs_d, ct_d
229 type(c_ptr), value :: dx_d, dy_d, dz_d
230 integer(c_int) :: nel, lx
231 end subroutine cuda_convect_scalar
232 end interface
233
234 interface
235 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
236 dx_d, dy_d, dz_d, &
237 drdx_d, dsdx_d, dtdx_d, &
238 drdy_d, dsdy_d, dtdy_d, &
239 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
240 bind(c, name = 'cuda_opgrad')
241 use, intrinsic :: iso_c_binding
242 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
243 type(c_ptr), value :: dx_d, dy_d, dz_d
244 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
245 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
246 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
247 type(c_ptr), value :: w3_d
248 integer(c_int) :: nel, lx
249 end subroutine cuda_opgrad
250 end interface
251
252 interface
253 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
254 dx_d, dy_d, dz_d, &
255 drdx_d, dsdx_d, dtdx_d, &
256 drdy_d, dsdy_d, dtdy_d, &
257 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
258 bind(c, name = 'cuda_lambda2')
259 use, intrinsic :: iso_c_binding
260 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
261 type(c_ptr), value :: dx_d, dy_d, dz_d
262 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
263 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
264 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
265 type(c_ptr), value :: jacinv_d
266 integer(c_int) :: nel, lx
267 end subroutine cuda_lambda2
268 end interface
269
270 interface
271 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
272 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
273 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
274 jacinv_d, nel, lx) &
275 bind(c, name = 'cuda_cfl')
276 use, intrinsic :: iso_c_binding
277 import c_rp
278 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
279 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
280 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
281 real(c_rp) :: dt
282 integer(c_int) :: nel, lx
283 end function cuda_cfl
284 end interface
285
286 interface
287 subroutine cuda_rotate_cyc(vx_d, vy_d, vz_d, &
288 x_d, y_d, z_d, &
289 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
290 bind(c, name = 'cuda_rotate_cyc')
291 use, intrinsic :: iso_c_binding
292 type(c_ptr), value :: vx_d, vy_d, vz_d
293 type(c_ptr), value :: x_d, y_d, z_d
294 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
295 integer(c_int) :: ncyc, idir
296 end subroutine cuda_rotate_cyc
297 end interface
298
299 interface
300 subroutine cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
301 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
302 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
303 use, intrinsic :: iso_c_binding
304 type(c_ptr), value :: cr_d, cs_d, ct_d
305 type(c_ptr), value :: cx_d, cy_d, cz_d
306 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
307 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
308 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
309 type(c_ptr), value :: w3_d
310 integer(c_int) :: nel, lx
311 end subroutine cuda_set_convect_rst
312 end interface
313
314#elif HAVE_OPENCL
315 interface
316 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
317 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
318 bind(c, name = 'opencl_dudxyz')
319 use, intrinsic :: iso_c_binding
320 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
321 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
322 integer(c_int) :: nel, lx
323 end subroutine opencl_dudxyz
324 end interface
325
326 interface
327 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
328 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
329 bind(c, name = 'opencl_cdtp')
330 use, intrinsic :: iso_c_binding
331 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
332 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
333 integer(c_int) :: nel, lx
334 end subroutine opencl_cdtp
335 end interface
336
337 interface
338 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
339 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
340 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
341 jacinv_d, nel, gdim, lx) &
342 bind(c, name = 'opencl_conv1')
343 use, intrinsic :: iso_c_binding
344 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
345 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
346 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
347 type(c_ptr), value :: jacinv_d
348 integer(c_int) :: nel, gdim, lx
349 end subroutine opencl_conv1
350 end interface
351
352 interface
353 subroutine opencl_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
354 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
355 use, intrinsic :: iso_c_binding
356 type(c_ptr), value :: du_d, u_d
357 type(c_ptr), value :: cr_d, cs_d, ct_d
358 type(c_ptr), value :: dx_d, dy_d, dz_d
359 integer(c_int) :: nel, lx
360 end subroutine opencl_convect_scalar
361 end interface
362
363 interface
364 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
365 dx_d, dy_d, dz_d, &
366 drdx_d, dsdx_d, dtdx_d, &
367 drdy_d, dsdy_d, dtdy_d, &
368 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
369 bind(c, name = 'opencl_opgrad')
370 use, intrinsic :: iso_c_binding
371 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
372 type(c_ptr), value :: dx_d, dy_d, dz_d
373 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
374 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
375 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
376 type(c_ptr), value :: w3_d
377 integer(c_int) :: nel, lx
378 end subroutine opencl_opgrad
379 end interface
380
381 interface
382 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
383 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
384 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
385 jacinv_d, nel, lx) &
386 bind(c, name = 'opencl_cfl')
387 use, intrinsic :: iso_c_binding
388 import c_rp
389 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
390 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
391 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
392 real(c_rp) :: dt
393 integer(c_int) :: nel, lx
394 end function opencl_cfl
395 end interface
396
397 interface
398 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
399 dx_d, dy_d, dz_d, &
400 drdx_d, dsdx_d, dtdx_d, &
401 drdy_d, dsdy_d, dtdy_d, &
402 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
403 bind(c, name = 'opencl_lambda2')
404 use, intrinsic :: iso_c_binding
405 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
406 type(c_ptr), value :: dx_d, dy_d, dz_d
407 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
408 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
409 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
410 type(c_ptr), value :: jacinv_d
411 integer(c_int) :: nel, lx
412 end subroutine opencl_lambda2
413 end interface
414
415 interface
416 subroutine opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
417 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
418 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
419 use, intrinsic :: iso_c_binding
420 type(c_ptr), value :: cr_d, cs_d, ct_d
421 type(c_ptr), value :: cx_d, cy_d, cz_d
422 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
423 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
424 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
425 type(c_ptr), value :: w3_d
426 integer(c_int) :: nel, lx
427 end subroutine opencl_set_convect_rst
428 end interface
429
430#endif
431
432contains
433
434 subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
435 type(coef_t), intent(in), target :: coef
436 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
437 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
438 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
439
440 du_d = device_get_ptr(du)
441 u_d = device_get_ptr(u)
442
443 dr_d = device_get_ptr(dr)
444 ds_d = device_get_ptr(ds)
445 dt_d = device_get_ptr(dt)
446
447 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
448#ifdef HAVE_HIP
449 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
450 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
451 msh%nelv, xh%lx)
452#elif HAVE_CUDA
453 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
454 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
455 msh%nelv, xh%lx)
456#elif HAVE_OPENCL
457 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
458 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
459 msh%nelv, xh%lx)
460#else
461 call neko_error('No device backend configured')
462#endif
463 end associate
464
465 end subroutine opr_device_dudxyz
466
467 subroutine opr_device_opgrad(ux, uy, uz, u, coef)
468 type(coef_t), intent(in) :: coef
469 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
470 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
471 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
472 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
473 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
474
475 ux_d = device_get_ptr(ux)
476 uy_d = device_get_ptr(uy)
477 uz_d = device_get_ptr(uz)
478
479 u_d = device_get_ptr(u)
480
481 associate(xh => coef%Xh, msh => coef%msh)
482#ifdef HAVE_HIP
483 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
484 xh%dx_d, xh%dy_d, xh%dz_d, &
485 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
486 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
487 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
488 xh%w3_d, msh%nelv, xh%lx)
489#elif HAVE_CUDA
490 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
491 xh%dx_d, xh%dy_d, xh%dz_d, &
492 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
493 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
494 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
495 xh%w3_d, msh%nelv, xh%lx)
496#elif HAVE_OPENCL
497 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
498 xh%dx_d, xh%dy_d, xh%dz_d, &
499 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
500 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
501 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
502 xh%w3_d, msh%nelv, xh%lx)
503#else
504 call neko_error('No device backend configured')
505#endif
506 end associate
507
508 end subroutine opr_device_opgrad
509
515 subroutine device_ortho(x_d, glb_n_points, n)
516 integer, intent(in) :: n
517 integer(kind=i8), intent(in) :: glb_n_points
518 type(c_ptr), intent(inout) :: x_d
519 real(kind=rp) :: c
520
521 c = device_glsum(x_d, n) / glb_n_points
522 call device_cadd(x_d, -c, n)
523
524 end subroutine device_ortho
525
526 subroutine opr_device_lambda2(lambda2, u, v, w, coef)
527 type(coef_t), intent(in) :: coef
528 type(field_t), intent(inout) :: lambda2
529 type(field_t), intent(in) :: u, v, w
530#ifdef HAVE_HIP
531 call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
532 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
533 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
534 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
535 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
536 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
537#elif HAVE_CUDA
538 call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
539 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
540 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
541 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
542 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
543 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
544#elif HAVE_OPENCL
545 call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
546 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
547 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
548 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
549 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
550 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
551#else
552 call neko_error('No device backend configured')
553#endif
554 end subroutine opr_device_lambda2
555
556 subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
557 type(coef_t), intent(in) :: coef
558 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
559 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
560 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
561 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
562 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
563 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
564
565 dtx_d = device_get_ptr(dtx)
566 x_d = device_get_ptr(x)
567
568 dr_d = device_get_ptr(dr)
569 ds_d = device_get_ptr(ds)
570 dt_d = device_get_ptr(dt)
571
572 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
573#ifdef HAVE_HIP
574 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
575 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
576 msh%nelv, xh%lx)
577#elif HAVE_CUDA
578 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
579 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
580 msh%nelv, xh%lx)
581#elif HAVE_OPENCL
582 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
583 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
584 msh%nelv, xh%lx)
585#else
586 call neko_error('No device backend configured')
587#endif
588 end associate
589
590 end subroutine opr_device_cdtp
591
592 subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
593 type(space_t), intent(in) :: xh
594 type(coef_t), intent(in) :: coef
595 integer, intent(in) :: nelv, gdim
596 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
597 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
598 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
599 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
600 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
601 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
602
603 du_d = device_get_ptr(du)
604 u_d = device_get_ptr(u)
605
606 vx_d = device_get_ptr(vx)
607 vy_d = device_get_ptr(vy)
608 vz_d = device_get_ptr(vz)
609
610 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
611#ifdef HAVE_HIP
612 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
613 xh%dx_d, xh%dy_d, xh%dz_d, &
614 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
615 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
616 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
617 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
618#elif HAVE_CUDA
619 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
620 xh%dx_d, xh%dy_d, xh%dz_d, &
621 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
622 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
623 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
624 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
625#elif HAVE_OPENCL
626 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
627 xh%dx_d, xh%dy_d, xh%dz_d, &
628 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
629 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
630 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
631 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
632#else
633 call neko_error('No device backend configured')
634#endif
635 end associate
636
637 end subroutine opr_device_conv1
638
639 subroutine opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, &
640 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
641 type(space_t), intent(in) :: xh_gl
642 type(space_t), intent(in) :: xh_gll
643 type(coef_t), intent(in) :: coef_gll
644 type(coef_t), intent(in) :: coef_gl
645 type(interpolator_t), intent(inout) :: gll_to_gl
646 real(kind=rp), intent(inout) :: &
647 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
648 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
649 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
650 type(c_ptr) :: du_d, ud_d
651 integer :: n_gl, n_gll
652
653 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
654 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
655
656 call device_map(ud, ud_d, n_gl)
657
658 du_d = device_get_ptr(du)
659
660 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
661#ifdef HAVE_HIP
662 call hip_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
663 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
664#elif HAVE_CUDA
665 call cuda_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
666 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
667#elif HAVE_OPENCL
668 call opencl_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
669 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
670#else
671 call neko_error('No device backend configured')
672#endif
673
674 call gll_to_gl%map(du, ud, nelv, xh_gll)
675 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
676 call device_col2(du_d, coef_gll%Binv_d, n_gll)
677
678 end associate
679
680 call device_free(ud_d)
681
682 end subroutine opr_device_convect_scalar
683
684 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
685 type(field_t), intent(inout) :: w1
686 type(field_t), intent(inout) :: w2
687 type(field_t), intent(inout) :: w3
688 type(field_t), intent(in) :: u1
689 type(field_t), intent(in) :: u2
690 type(field_t), intent(in) :: u3
691 type(field_t), intent(inout) :: work1
692 type(field_t), intent(inout) :: work2
693 type(coef_t), intent(in) :: c_xh
694 type(c_ptr), optional, intent(inout) :: event
695 integer :: gdim, n, nelv
696
697 n = w1%dof%size()
698 gdim = c_xh%msh%gdim
699 nelv = c_xh%msh%nelv
700
701 ! this%work1=dw/dy ; this%work2=dv/dz
702#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
703#ifdef HAVE_HIP
704 call hip_dudxyz(work1%x_d, u3%x_d, &
705 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
706 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
707 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
708#elif HAVE_CUDA
709 call cuda_dudxyz(work1%x_d, u3%x_d, &
710 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
711 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
712 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
713#elif HAVE_OPENCL
714 call opencl_dudxyz(work1%x_d, u3%x_d, &
715 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
716 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
717 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
718#endif
719 if (gdim .eq. 3) then
720#ifdef HAVE_HIP
721 call hip_dudxyz(work2%x_d, u2%x_d, &
722 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
723 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
724 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
725#elif HAVE_CUDA
726 call cuda_dudxyz(work2%x_d, u2%x_d, &
727 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
728 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
729 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
730#elif HAVE_OPENCL
731 call opencl_dudxyz(work2%x_d, u2%x_d, &
732 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
733 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
734 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
735#endif
736 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
737 else
738 call device_copy(w1%x_d, work1%x_d, n)
739 endif
740 ! this%work1=du/dz ; this%work2=dw/dx
741 if (gdim .eq. 3) then
742#ifdef HAVE_HIP
743 call hip_dudxyz(work1%x_d, u1%x_d, &
744 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
745 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
746 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
747 call hip_dudxyz(work2%x_d, u3%x_d, &
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
751#elif HAVE_CUDA
752 call cuda_dudxyz(work1%x_d, u1%x_d, &
753 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
754 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
755 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
756 call cuda_dudxyz(work2%x_d, u3%x_d, &
757 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
758 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
759 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
760#elif HAVE_OPENCL
761 call opencl_dudxyz(work1%x_d, u1%x_d, &
762 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
763 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
764 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
765 call opencl_dudxyz(work2%x_d, u3%x_d, &
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
769#endif
770 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
771 else
772 call device_rzero (work1%x_d, n)
773#ifdef HAVE_HIP
774 call hip_dudxyz(work2%x_d, u3%x_d, &
775 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
776 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
777 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
778#elif HAVE_CUDA
779 call cuda_dudxyz(work2%x_d, u3%x_d, &
780 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
781 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
782 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
783#elif HAVE_OPENCL
784 call opencl_dudxyz(work2%x_d, u3%x_d, &
785 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
786 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
787 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
788#endif
789 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
790 endif
791 ! this%work1=dv/dx ; this%work2=du/dy
792#ifdef HAVE_HIP
793 call hip_dudxyz(work1%x_d, u2%x_d, &
794 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
795 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
796 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
797 call hip_dudxyz(work2%x_d, u1%x_d, &
798 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
799 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
800 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
801#elif HAVE_CUDA
802 call cuda_dudxyz(work1%x_d, u2%x_d, &
803 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
804 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
805 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
806 call cuda_dudxyz(work2%x_d, u1%x_d, &
807 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
808 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
809 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
810#elif HAVE_OPENCL
811 call opencl_dudxyz(work1%x_d, u2%x_d, &
812 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
813 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
814 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
815 call opencl_dudxyz(work2%x_d, u1%x_d, &
816 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
817 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
818 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
819#endif
820 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
821 !! BC dependent, Needs to change if cyclic
822
823 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
824
825 if (present(event)) then
826 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
827 call c_xh%gs_h%op(w1, gs_op_add, event)
828 call device_event_sync(event)
829 call c_xh%gs_h%op(w2, gs_op_add, event)
830 call device_event_sync(event)
831 call c_xh%gs_h%op(w3, gs_op_add, event)
832 call device_event_sync(event)
833 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
834 else
835 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
836 call c_xh%gs_h%op(w1, gs_op_add)
837 call c_xh%gs_h%op(w2, gs_op_add)
838 call c_xh%gs_h%op(w3, gs_op_add)
839 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
840 end if
841
842 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
843
844#else
845 call neko_error('No device backend configured')
846#endif
847
848 end subroutine opr_device_curl
849
850 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
851 type(space_t) :: xh
852 type(coef_t) :: coef
853 integer :: nelv, gdim
854 real(kind=rp) :: dt
855 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
856 real(kind=rp) :: cfl
857 type(c_ptr) :: u_d, v_d, w_d
858
859 u_d = device_get_ptr(u)
860 v_d = device_get_ptr(v)
861 w_d = device_get_ptr(w)
862
863#ifdef HAVE_HIP
864 cfl = hip_cfl(dt, u_d, v_d, w_d, &
865 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
866 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
867 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
868 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
869 coef%jacinv_d, nelv, xh%lx)
870#elif HAVE_CUDA
871 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
872 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
873 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
874 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
875 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
876 coef%jacinv_d, nelv, xh%lx)
877#elif HAVE_OPENCL
878 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
879 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
880 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
881 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
882 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
883 coef%jacinv_d, nelv, xh%lx)
884#else
885 cfl = 0.0_rp
886 call neko_error('No device backend configured')
887#endif
888 end function opr_device_cfl
889
890 subroutine opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
891 type(coef_t) :: coef
892 integer :: idir, ncyc
893 real(rp), dimension(coef%Xh%lx*coef%Xh%ly*coef%Xh%lz*coef%msh%nelv) :: &
894 vx, vy, vz
895 type(c_ptr) :: vx_d, vy_d, vz_d
896
897 vx_d = device_get_ptr(vx)
898 vy_d = device_get_ptr(vy)
899 vz_d = device_get_ptr(vz)
900 ncyc = coef%cyc_msk(0) - 1
901
902 if (ncyc .le. 0) return
903
904#ifdef HAVE_HIP
905 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
906 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
907 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
908 ncyc, idir)
909#elif HAVE_CUDA
910 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
911 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
912 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
913 ncyc, idir)
914#elif HAVE_OPENCL
915
916 call neko_error('No device backend configured for rotate_cyc')
917#else
918 call neko_error('No device backend configured for rotate_cyc')
919#endif
920 end subroutine opr_device_rotate_cyc_r1
921
922 subroutine opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
923 type(coef_t) :: coef
924 integer :: idir, ncyc
925 real(rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
926 vx, vy, vz
927 type(c_ptr) :: vx_d, vy_d, vz_d
928
929 vx_d = device_get_ptr(vx)
930 vy_d = device_get_ptr(vy)
931 vz_d = device_get_ptr(vz)
932 ncyc = coef%cyc_msk(0) - 1
933
934 if (ncyc .le. 0) return
935
936#ifdef HAVE_HIP
937 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
938 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
939 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
940 ncyc, idir)
941#elif HAVE_CUDA
942 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
943 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
944 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
945 ncyc, idir)
946#elif HAVE_OPENCL
947
948 call neko_error('No device backend configured for rotate_cyc')
949#else
950 call neko_error('No device backend configured for rotate_cyc')
951#endif
952 end subroutine opr_device_rotate_cyc_r4
953
954 subroutine opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
955 Xh, coef)
956 type(space_t), intent(inout) :: xh
957 type(coef_t), intent(inout) :: coef
958 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
959
960#ifdef HAVE_HIP
961 call hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
962 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
963 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
964 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
965 xh%w3_d, coef%msh%nelv, xh%lx)
966#elif HAVE_CUDA
967 call cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
968 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
969 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
970 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
971 xh%w3_d, coef%msh%nelv, xh%lx)
972#elif HAVE_OPENCL
973 call opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
974 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
975 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
976 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
977 xh%w3_d, coef%msh%nelv, xh%lx)
978#else
979 call neko_error('No device backend configured')
980#endif
981
982 end subroutine opr_device_set_convect_rst
983
984end module opr_device
985
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public device_ortho(x_d, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:57
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:57
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:57
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:57
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63