Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
45 use, intrinsic :: iso_c_binding
46 implicit none
47 private
48
52
53#ifdef HAVE_HIP
54 interface
55 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
56 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
57 bind(c, name = 'hip_dudxyz')
58 use, intrinsic :: iso_c_binding
59 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
60 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
61 integer(c_int) :: nel, lx
62 end subroutine hip_dudxyz
63 end interface
64
65 interface
66 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
67 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
68 bind(c, name = 'hip_cdtp')
69 use, intrinsic :: iso_c_binding
70 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
71 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
72 integer(c_int) :: nel, lx
73 end subroutine hip_cdtp
74 end interface
75
76 interface
77 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
78 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
79 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
80 jacinv_d, nel, gdim, lx) &
81 bind(c, name = 'hip_conv1')
82 use, intrinsic :: iso_c_binding
83 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
84 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
85 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
86 type(c_ptr), value :: jacinv_d
87 integer(c_int) :: nel, gdim, lx
88 end subroutine hip_conv1
89 end interface
90
91 interface
92 subroutine hip_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
93 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
94 use, intrinsic :: iso_c_binding
95 type(c_ptr), value :: du_d, u_d
96 type(c_ptr), value :: cr_d, cs_d, ct_d
97 type(c_ptr), value :: dx_d, dy_d, dz_d
98 integer(c_int) :: nel, lx
99 end subroutine hip_convect_scalar
100 end interface
101
102 interface
103 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
104 dx_d, dy_d, dz_d, &
105 drdx_d, dsdx_d, dtdx_d, &
106 drdy_d, dsdy_d, dtdy_d, &
107 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
108 bind(c, name = 'hip_opgrad')
109 use, intrinsic :: iso_c_binding
110 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
111 type(c_ptr), value :: dx_d, dy_d, dz_d
112 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
113 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
114 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
115 type(c_ptr), value :: w3_d
116 integer(c_int) :: nel, lx
117 end subroutine hip_opgrad
118 end interface
119
120 interface
121 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
122 dx_d, dy_d, dz_d, &
123 drdx_d, dsdx_d, dtdx_d, &
124 drdy_d, dsdy_d, dtdy_d, &
125 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
126 bind(c, name = 'hip_lambda2')
127 use, intrinsic :: iso_c_binding
128 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
129 type(c_ptr), value :: dx_d, dy_d, dz_d
130 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
131 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
132 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
133 type(c_ptr), value :: jacinv_d
134 integer(c_int) :: nel, lx
135 end subroutine hip_lambda2
136 end interface
137
138 interface
139 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
140 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
141 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
142 jacinv_d, nel, lx) &
143 bind(c, name = 'hip_cfl')
144 use, intrinsic :: iso_c_binding
145 import c_rp
146 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
147 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
148 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
149 real(c_rp) :: dt
150 integer(c_int) :: nel, lx
151 end function hip_cfl
152 end interface
153
154 interface
155 subroutine hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
156 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
157 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
158 use, intrinsic :: iso_c_binding
159 type(c_ptr), value :: cr_d, cs_d, ct_d
160 type(c_ptr), value :: cx_d, cy_d, cz_d
161 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
162 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
163 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
164 type(c_ptr), value :: w3_d
165 integer(c_int) :: nel, lx
166 end subroutine hip_set_convect_rst
167 end interface
168
169#elif HAVE_CUDA
170 interface
171 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
172 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
173 bind(c, name = 'cuda_dudxyz')
174 use, intrinsic :: iso_c_binding
175 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
176 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
177 integer(c_int) :: nel, lx
178 end subroutine cuda_dudxyz
179 end interface
180
181 interface
182 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
183 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
184 bind(c, name = 'cuda_cdtp')
185 use, intrinsic :: iso_c_binding
186 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
187 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
188 integer(c_int) :: nel, lx
189 end subroutine cuda_cdtp
190 end interface
191
192 interface
193 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
194 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
195 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
196 jacinv_d, nel, gdim, lx) &
197 bind(c, name = 'cuda_conv1')
198 use, intrinsic :: iso_c_binding
199 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
200 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
201 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
202 type(c_ptr), value :: jacinv_d
203 integer(c_int) :: nel, gdim, lx
204 end subroutine cuda_conv1
205 end interface
206
207 interface
208 subroutine cuda_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
209 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
210 use, intrinsic :: iso_c_binding
211 type(c_ptr), value :: du_d, u_d
212 type(c_ptr), value :: cr_d, cs_d, ct_d
213 type(c_ptr), value :: dx_d, dy_d, dz_d
214 integer(c_int) :: nel, lx
215 end subroutine cuda_convect_scalar
216 end interface
217
218 interface
219 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
220 dx_d, dy_d, dz_d, &
221 drdx_d, dsdx_d, dtdx_d, &
222 drdy_d, dsdy_d, dtdy_d, &
223 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
224 bind(c, name = 'cuda_opgrad')
225 use, intrinsic :: iso_c_binding
226 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
227 type(c_ptr), value :: dx_d, dy_d, dz_d
228 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
229 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
230 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
231 type(c_ptr), value :: w3_d
232 integer(c_int) :: nel, lx
233 end subroutine cuda_opgrad
234 end interface
235
236 interface
237 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
238 dx_d, dy_d, dz_d, &
239 drdx_d, dsdx_d, dtdx_d, &
240 drdy_d, dsdy_d, dtdy_d, &
241 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
242 bind(c, name = 'cuda_lambda2')
243 use, intrinsic :: iso_c_binding
244 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
245 type(c_ptr), value :: dx_d, dy_d, dz_d
246 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
247 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
248 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
249 type(c_ptr), value :: jacinv_d
250 integer(c_int) :: nel, lx
251 end subroutine cuda_lambda2
252 end interface
253
254 interface
255 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
256 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
257 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
258 jacinv_d, nel, lx) &
259 bind(c, name = 'cuda_cfl')
260 use, intrinsic :: iso_c_binding
261 import c_rp
262 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
263 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
264 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
265 real(c_rp) :: dt
266 integer(c_int) :: nel, lx
267 end function cuda_cfl
268 end interface
269
270 interface
271 subroutine cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
272 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
273 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
274 use, intrinsic :: iso_c_binding
275 type(c_ptr), value :: cr_d, cs_d, ct_d
276 type(c_ptr), value :: cx_d, cy_d, cz_d
277 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
278 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
279 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
280 type(c_ptr), value :: w3_d
281 integer(c_int) :: nel, lx
282 end subroutine cuda_set_convect_rst
283 end interface
284
285#elif HAVE_OPENCL
286 interface
287 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
288 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
289 bind(c, name = 'opencl_dudxyz')
290 use, intrinsic :: iso_c_binding
291 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
292 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
293 integer(c_int) :: nel, lx
294 end subroutine opencl_dudxyz
295 end interface
296
297 interface
298 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
299 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
300 bind(c, name = 'opencl_cdtp')
301 use, intrinsic :: iso_c_binding
302 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
303 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
304 integer(c_int) :: nel, lx
305 end subroutine opencl_cdtp
306 end interface
307
308 interface
309 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
310 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
311 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
312 jacinv_d, nel, gdim, lx) &
313 bind(c, name = 'opencl_conv1')
314 use, intrinsic :: iso_c_binding
315 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
316 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
317 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
318 type(c_ptr), value :: jacinv_d
319 integer(c_int) :: nel, gdim, lx
320 end subroutine opencl_conv1
321 end interface
322
323 interface
324 subroutine opencl_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
325 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
326 use, intrinsic :: iso_c_binding
327 type(c_ptr), value :: du_d, u_d
328 type(c_ptr), value :: cr_d, cs_d, ct_d
329 type(c_ptr), value :: dx_d, dy_d, dz_d
330 integer(c_int) :: nel, lx
331 end subroutine opencl_convect_scalar
332 end interface
333
334 interface
335 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
336 dx_d, dy_d, dz_d, &
337 drdx_d, dsdx_d, dtdx_d, &
338 drdy_d, dsdy_d, dtdy_d, &
339 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
340 bind(c, name = 'opencl_opgrad')
341 use, intrinsic :: iso_c_binding
342 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
343 type(c_ptr), value :: dx_d, dy_d, dz_d
344 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
345 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
346 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
347 type(c_ptr), value :: w3_d
348 integer(c_int) :: nel, lx
349 end subroutine opencl_opgrad
350 end interface
351
352 interface
353 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
354 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
355 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
356 jacinv_d, nel, lx) &
357 bind(c, name = 'opencl_cfl')
358 use, intrinsic :: iso_c_binding
359 import c_rp
360 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
361 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
362 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
363 real(c_rp) :: dt
364 integer(c_int) :: nel, lx
365 end function opencl_cfl
366 end interface
367
368 interface
369 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
370 dx_d, dy_d, dz_d, &
371 drdx_d, dsdx_d, dtdx_d, &
372 drdy_d, dsdy_d, dtdy_d, &
373 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
374 bind(c, name = 'opencl_lambda2')
375 use, intrinsic :: iso_c_binding
376 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
377 type(c_ptr), value :: dx_d, dy_d, dz_d
378 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
379 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
380 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
381 type(c_ptr), value :: jacinv_d
382 integer(c_int) :: nel, lx
383 end subroutine opencl_lambda2
384 end interface
385
386 interface
387 subroutine opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
388 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
389 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
390 use, intrinsic :: iso_c_binding
391 type(c_ptr), value :: cr_d, cs_d, ct_d
392 type(c_ptr), value :: cx_d, cy_d, cz_d
393 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
394 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
395 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
396 type(c_ptr), value :: w3_d
397 integer(c_int) :: nel, lx
398 end subroutine opencl_set_convect_rst
399 end interface
400
401#endif
402
403contains
404
405 subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
406 type(coef_t), intent(in), target :: coef
407 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
408 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
409 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
410
411 du_d = device_get_ptr(du)
412 u_d = device_get_ptr(u)
413
414 dr_d = device_get_ptr(dr)
415 ds_d = device_get_ptr(ds)
416 dt_d = device_get_ptr(dt)
417
418 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
419#ifdef HAVE_HIP
420 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
421 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
422 msh%nelv, xh%lx)
423#elif HAVE_CUDA
424 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
425 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
426 msh%nelv, xh%lx)
427#elif HAVE_OPENCL
428 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
429 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
430 msh%nelv, xh%lx)
431#else
432 call neko_error('No device backend configured')
433#endif
434 end associate
435
436 end subroutine opr_device_dudxyz
437
438 subroutine opr_device_opgrad(ux, uy, uz, u, coef)
439 type(coef_t), intent(in) :: coef
440 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
441 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
442 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
443 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
444 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
445
446 ux_d = device_get_ptr(ux)
447 uy_d = device_get_ptr(uy)
448 uz_d = device_get_ptr(uz)
449
450 u_d = device_get_ptr(u)
451
452 associate(xh => coef%Xh, msh => coef%msh)
453#ifdef HAVE_HIP
454 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
455 xh%dx_d, xh%dy_d, xh%dz_d, &
456 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
457 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
458 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
459 xh%w3_d, msh%nelv, xh%lx)
460#elif HAVE_CUDA
461 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
462 xh%dx_d, xh%dy_d, xh%dz_d, &
463 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
464 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
465 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
466 xh%w3_d, msh%nelv, xh%lx)
467#elif HAVE_OPENCL
468 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
469 xh%dx_d, xh%dy_d, xh%dz_d, &
470 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
471 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
472 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
473 xh%w3_d, msh%nelv, xh%lx)
474#else
475 call neko_error('No device backend configured')
476#endif
477 end associate
478
479 end subroutine opr_device_opgrad
480 subroutine opr_device_lambda2(lambda2, u, v, w, coef)
481 type(coef_t), intent(in) :: coef
482 type(field_t), intent(inout) :: lambda2
483 type(field_t), intent(in) :: u, v, w
484#ifdef HAVE_HIP
485 call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
486 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
487 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
491#elif HAVE_CUDA
492 call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
493 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
494 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
498#elif HAVE_OPENCL
499 call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
500 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
501 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
502 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
503 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
504 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
505#else
506 call neko_error('No device backend configured')
507#endif
508 end subroutine opr_device_lambda2
509
510 subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
511 type(coef_t), intent(in) :: coef
512 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
513 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
514 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
515 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
516 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
517 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
518
519 dtx_d = device_get_ptr(dtx)
520 x_d = device_get_ptr(x)
521
522 dr_d = device_get_ptr(dr)
523 ds_d = device_get_ptr(ds)
524 dt_d = device_get_ptr(dt)
525
526 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
527#ifdef HAVE_HIP
528 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
529 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
530 msh%nelv, xh%lx)
531#elif HAVE_CUDA
532 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
533 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
534 msh%nelv, xh%lx)
535#elif HAVE_OPENCL
536 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
537 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
538 msh%nelv, xh%lx)
539#else
540 call neko_error('No device backend configured')
541#endif
542 end associate
543
544 end subroutine opr_device_cdtp
545
546 subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
547 type(space_t), intent(in) :: xh
548 type(coef_t), intent(in) :: coef
549 integer, intent(in) :: nelv, gdim
550 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
551 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
552 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
553 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
554 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
555 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
556
557 du_d = device_get_ptr(du)
558 u_d = device_get_ptr(u)
559
560 vx_d = device_get_ptr(vx)
561 vy_d = device_get_ptr(vy)
562 vz_d = device_get_ptr(vz)
563
564 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
565#ifdef HAVE_HIP
566 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
567 xh%dx_d, xh%dy_d, xh%dz_d, &
568 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
569 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
570 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
571 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
572#elif HAVE_CUDA
573 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
574 xh%dx_d, xh%dy_d, xh%dz_d, &
575 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
576 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
577 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
578 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
579#elif HAVE_OPENCL
580 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
581 xh%dx_d, xh%dy_d, xh%dz_d, &
582 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
583 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
584 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
585 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
586#else
587 call neko_error('No device backend configured')
588#endif
589 end associate
590
591 end subroutine opr_device_conv1
592
593 subroutine opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, &
594 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
595 type(space_t), intent(in) :: xh_gl
596 type(space_t), intent(in) :: xh_gll
597 type(coef_t), intent(in) :: coef_gll
598 type(coef_t), intent(in) :: coef_gl
599 type(interpolator_t), intent(inout) :: gll_to_gl
600 real(kind=rp), intent(inout) :: &
601 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
602 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
603 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
604 type(c_ptr) :: du_d, ud_d
605 integer :: n_gl, n_gll
606
607 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
608 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
609
610 call device_map(ud, ud_d, n_gl)
611
612 du_d = device_get_ptr(du)
613
614 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
615#ifdef HAVE_HIP
616 call hip_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
617 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
618#elif HAVE_CUDA
619 call cuda_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
620 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
621#elif HAVE_OPENCL
622 call opencl_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
623 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
624#else
625 call neko_error('No device backend configured')
626#endif
627
628 call gll_to_gl%map(du, ud, nelv, xh_gll)
629 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
630 call device_col2(du_d, coef_gll%Binv_d, n_gll)
631
632 end associate
633
634 call device_free(ud_d)
635
636 end subroutine opr_device_convect_scalar
637
638 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
639 type(field_t), intent(inout) :: w1
640 type(field_t), intent(inout) :: w2
641 type(field_t), intent(inout) :: w3
642 type(field_t), intent(in) :: u1
643 type(field_t), intent(in) :: u2
644 type(field_t), intent(in) :: u3
645 type(field_t), intent(inout) :: work1
646 type(field_t), intent(inout) :: work2
647 type(coef_t), intent(in) :: c_xh
648 type(c_ptr), optional, intent(inout) :: event
649 integer :: gdim, n, nelv
650
651 n = w1%dof%size()
652 gdim = c_xh%msh%gdim
653 nelv = c_xh%msh%nelv
654
655 ! this%work1=dw/dy ; this%work2=dv/dz
656#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
657#ifdef HAVE_HIP
658 call hip_dudxyz(work1%x_d, u3%x_d, &
659 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
660 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
661 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
662#elif HAVE_CUDA
663 call cuda_dudxyz(work1%x_d, u3%x_d, &
664 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
665 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
666 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
667#elif HAVE_OPENCL
668 call opencl_dudxyz(work1%x_d, u3%x_d, &
669 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
670 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
671 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
672#endif
673 if (gdim .eq. 3) then
674#ifdef HAVE_HIP
675 call hip_dudxyz(work2%x_d, u2%x_d, &
676 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
677 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
678 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
679#elif HAVE_CUDA
680 call cuda_dudxyz(work2%x_d, u2%x_d, &
681 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
682 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
683 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
684#elif HAVE_OPENCL
685 call opencl_dudxyz(work2%x_d, u2%x_d, &
686 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
687 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
688 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
689#endif
690 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
691 else
692 call device_copy(w1%x_d, work1%x_d, n)
693 endif
694 ! this%work1=du/dz ; this%work2=dw/dx
695 if (gdim .eq. 3) then
696#ifdef HAVE_HIP
697 call hip_dudxyz(work1%x_d, u1%x_d, &
698 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
699 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
700 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
701 call hip_dudxyz(work2%x_d, u3%x_d, &
702 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
703 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
704 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
705#elif HAVE_CUDA
706 call cuda_dudxyz(work1%x_d, u1%x_d, &
707 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
708 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
709 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
710 call cuda_dudxyz(work2%x_d, u3%x_d, &
711 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
712 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
713 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
714#elif HAVE_OPENCL
715 call opencl_dudxyz(work1%x_d, u1%x_d, &
716 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
717 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
718 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
719 call opencl_dudxyz(work2%x_d, u3%x_d, &
720 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
721 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
722 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
723#endif
724 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
725 else
726 call device_rzero (work1%x_d, n)
727#ifdef HAVE_HIP
728 call hip_dudxyz(work2%x_d, u3%x_d, &
729 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
730 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
731 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
732#elif HAVE_CUDA
733 call cuda_dudxyz(work2%x_d, u3%x_d, &
734 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
735 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
736 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
737#elif HAVE_OPENCL
738 call opencl_dudxyz(work2%x_d, u3%x_d, &
739 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
740 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
741 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
742#endif
743 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
744 endif
745 ! this%work1=dv/dx ; this%work2=du/dy
746#ifdef HAVE_HIP
747 call hip_dudxyz(work1%x_d, u2%x_d, &
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
751 call hip_dudxyz(work2%x_d, u1%x_d, &
752 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
753 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
754 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
755#elif HAVE_CUDA
756 call cuda_dudxyz(work1%x_d, u2%x_d, &
757 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
758 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
759 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
760 call cuda_dudxyz(work2%x_d, u1%x_d, &
761 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
762 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
763 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
764#elif HAVE_OPENCL
765 call opencl_dudxyz(work1%x_d, u2%x_d, &
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
769 call opencl_dudxyz(work2%x_d, u1%x_d, &
770 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
771 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
772 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
773#endif
774 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
775 !! BC dependent, Needs to change if cyclic
776
777 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
778
779 if (present(event)) then
780 call c_xh%gs_h%op(w1, gs_op_add, event)
781 call device_event_sync(event)
782 call c_xh%gs_h%op(w2, gs_op_add, event)
783 call device_event_sync(event)
784 call c_xh%gs_h%op(w3, gs_op_add, event)
785 call device_event_sync(event)
786 else
787 call c_xh%gs_h%op(w1, gs_op_add)
788 call c_xh%gs_h%op(w2, gs_op_add)
789 call c_xh%gs_h%op(w3, gs_op_add)
790 end if
791
792 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
793
794#else
795 call neko_error('No device backend configured')
796#endif
797
798 end subroutine opr_device_curl
799
800 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
801 type(space_t) :: xh
802 type(coef_t) :: coef
803 integer :: nelv, gdim
804 real(kind=rp) :: dt
805 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
806 real(kind=rp) :: cfl
807 type(c_ptr) :: u_d, v_d, w_d
808
809 u_d = device_get_ptr(u)
810 v_d = device_get_ptr(v)
811 w_d = device_get_ptr(w)
812
813#ifdef HAVE_HIP
814 cfl = hip_cfl(dt, u_d, v_d, w_d, &
815 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
816 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
817 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
818 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
819 coef%jacinv_d, nelv, xh%lx)
820#elif HAVE_CUDA
821 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
822 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
823 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
824 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
825 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
826 coef%jacinv_d, nelv, xh%lx)
827#elif HAVE_OPENCL
828 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
829 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
830 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
831 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
832 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
833 coef%jacinv_d, nelv, xh%lx)
834#else
835 cfl = 0.0_rp
836 call neko_error('No device backend configured')
837#endif
838 end function opr_device_cfl
839
840 subroutine opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
841 Xh, coef)
842 type(space_t), intent(inout) :: xh
843 type(coef_t), intent(inout) :: coef
844 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
845
846#ifdef HAVE_HIP
847 call hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
848 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
849 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
850 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
851 xh%w3_d, coef%msh%nelv, xh%lx)
852#elif HAVE_CUDA
853 call cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
854 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
855 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
856 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
857 xh%w3_d, coef%msh%nelv, xh%lx)
858#elif HAVE_OPENCL
859 call opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
860 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
861 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
862 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
863 xh%w3_d, coef%msh%nelv, xh%lx)
864#else
865 call neko_error('No device backend configured')
866#endif
867
868 end subroutine opr_device_set_convect_rst
869
870end module opr_device
871
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:57
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:57
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:57
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:57
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63