Neko  0.8.1
A portable framework for high-order spectral element flow simulations
opr_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module opr_device
35  use gather_scatter
36  use num_types, only : rp, c_rp
37  use device_math
38  use device_mathops
39  use device, only : device_get_ptr
40  use space, only : space_t
41  use coefs, only : coef_t
42  use mesh, only : mesh_t
43  use field, only : field_t
44  use utils, only : neko_error
45  use comm
46  use, intrinsic :: iso_c_binding
47  implicit none
48  private
49 
52 
53 #ifdef HAVE_HIP
54  interface
55  subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
56  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
57  bind(c, name='hip_dudxyz')
58  use, intrinsic :: iso_c_binding
59  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
60  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
61  integer(c_int) :: nel, lx
62  end subroutine hip_dudxyz
63  end interface
64 
65  interface
66  subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
67  dxt_d, dyt_d, dzt_d, B_d, jac_d, nel, lx) &
68  bind(c, name='hip_cdtp')
69  use, intrinsic :: iso_c_binding
70  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
71  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, B_d, jac_d
72  integer(c_int) :: nel, lx
73  end subroutine hip_cdtp
74  end interface
75 
76  interface
77  subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
78  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
79  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
80  jacinv_d, nel, gdim, lx) &
81  bind(c, name='hip_conv1')
82  use, intrinsic :: iso_c_binding
83  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
84  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
85  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
86  type(c_ptr), value :: jacinv_d
87  integer(c_int) :: nel, gdim, lx
88  end subroutine hip_conv1
89  end interface
90 
91  interface
92  subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
93  dx_d, dy_d, dz_d, &
94  drdx_d, dsdx_d, dtdx_d, &
95  drdy_d, dsdy_d, dtdy_d, &
96  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
97  bind(c, name='hip_opgrad')
98  use, intrinsic :: iso_c_binding
99  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
100  type(c_ptr), value :: dx_d, dy_d, dz_d
101  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
102  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
103  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
104  type(c_ptr), value :: w3_d
105  integer(c_int) :: nel, lx
106  end subroutine hip_opgrad
107  end interface
108 
109  interface
110  subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
111  dx_d, dy_d, dz_d, &
112  drdx_d, dsdx_d, dtdx_d, &
113  drdy_d, dsdy_d, dtdy_d, &
114  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
115  bind(c, name='hip_lambda2')
116  use, intrinsic :: iso_c_binding
117  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
118  type(c_ptr), value :: dx_d, dy_d, dz_d
119  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
120  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
121  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
122  type(c_ptr), value :: jacinv_d
123  integer(c_int) :: nel, lx
124  end subroutine hip_lambda2
125  end interface
126 
127  interface
128  real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
129  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
130  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
131  jacinv_d, nel, lx) &
132  bind(c, name='hip_cfl')
133  use, intrinsic :: iso_c_binding
134  import c_rp
135  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
136  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
137  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
138  real(c_rp) :: dt
139  integer(c_int) :: nel, lx
140  end function hip_cfl
141  end interface
142 #elif HAVE_CUDA
143  interface
144  subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
145  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
146  bind(c, name='cuda_dudxyz')
147  use, intrinsic :: iso_c_binding
148  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
149  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
150  integer(c_int) :: nel, lx
151  end subroutine cuda_dudxyz
152  end interface
153 
154  interface
155  subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
156  dxt_d, dyt_d, dzt_d, B_d, jac_d, nel, lx) &
157  bind(c, name='cuda_cdtp')
158  use, intrinsic :: iso_c_binding
159  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
160  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, b_d, jac_d
161  integer(c_int) :: nel, lx
162  end subroutine cuda_cdtp
163  end interface
164 
165  interface
166  subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
167  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
168  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
169  jacinv_d, nel, gdim, lx) &
170  bind(c, name='cuda_conv1')
171  use, intrinsic :: iso_c_binding
172  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
173  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
174  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
175  type(c_ptr), value :: jacinv_d
176  integer(c_int) :: nel, gdim, lx
177  end subroutine cuda_conv1
178  end interface
179 
180  interface
181  subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
182  dx_d, dy_d, dz_d, &
183  drdx_d, dsdx_d, dtdx_d, &
184  drdy_d, dsdy_d, dtdy_d, &
185  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
186  bind(c, name='cuda_opgrad')
187  use, intrinsic :: iso_c_binding
188  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
189  type(c_ptr), value :: dx_d, dy_d, dz_d
190  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
191  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
192  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
193  type(c_ptr), value :: w3_d
194  integer(c_int) :: nel, lx
195  end subroutine cuda_opgrad
196  end interface
197 
198  interface
199  subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
200  dx_d, dy_d, dz_d, &
201  drdx_d, dsdx_d, dtdx_d, &
202  drdy_d, dsdy_d, dtdy_d, &
203  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
204  bind(c, name='cuda_lambda2')
205  use, intrinsic :: iso_c_binding
206  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
207  type(c_ptr), value :: dx_d, dy_d, dz_d
208  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
209  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
210  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
211  type(c_ptr), value :: jacinv_d
212  integer(c_int) :: nel, lx
213  end subroutine cuda_lambda2
214  end interface
215 
216 
217  interface
218  real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
219  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
220  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
221  jacinv_d, nel, lx) &
222  bind(c, name='cuda_cfl')
223  use, intrinsic :: iso_c_binding
224  import c_rp
225  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
226  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
227  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
228  real(c_rp) :: dt
229  integer(c_int) :: nel, lx
230  end function cuda_cfl
231  end interface
232 #elif HAVE_OPENCL
233  interface
234  subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
235  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
236  bind(c, name='opencl_dudxyz')
237  use, intrinsic :: iso_c_binding
238  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
239  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
240  integer(c_int) :: nel, lx
241  end subroutine opencl_dudxyz
242  end interface
243 
244  interface
245  subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
246  dxt_d, dyt_d, dzt_d, B_d, jac_d, nel, lx) &
247  bind(c, name='opencl_cdtp')
248  use, intrinsic :: iso_c_binding
249  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
250  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, b_d, jac_d
251  integer(c_int) :: nel, lx
252  end subroutine opencl_cdtp
253  end interface
254 
255  interface
256  subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
257  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
258  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
259  jacinv_d, nel, gdim, lx) &
260  bind(c, name='opencl_conv1')
261  use, intrinsic :: iso_c_binding
262  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
263  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
264  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
265  type(c_ptr), value :: jacinv_d
266  integer(c_int) :: nel, gdim, lx
267  end subroutine opencl_conv1
268  end interface
269 
270  interface
271  subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
272  dx_d, dy_d, dz_d, &
273  drdx_d, dsdx_d, dtdx_d, &
274  drdy_d, dsdy_d, dtdy_d, &
275  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
276  bind(c, name='opencl_opgrad')
277  use, intrinsic :: iso_c_binding
278  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
279  type(c_ptr), value :: dx_d, dy_d, dz_d
280  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
281  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
282  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
283  type(c_ptr), value :: w3_d
284  integer(c_int) :: nel, lx
285  end subroutine opencl_opgrad
286  end interface
287 
288  interface
289  real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
290  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
291  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
292  jacinv_d, nel, lx) &
293  bind(c, name='opencl_cfl')
294  use, intrinsic :: iso_c_binding
295  import c_rp
296  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
297  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
298  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
299  real(c_rp) :: dt
300  integer(c_int) :: nel, lx
301  end function opencl_cfl
302  end interface
303 
304  interface
305  subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
306  dx_d, dy_d, dz_d, &
307  drdx_d, dsdx_d, dtdx_d, &
308  drdy_d, dsdy_d, dtdy_d, &
309  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
310  bind(c, name='opencl_lambda2')
311  use, intrinsic :: iso_c_binding
312  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
313  type(c_ptr), value :: dx_d, dy_d, dz_d
314  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
315  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
316  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
317  type(c_ptr), value :: jacinv_d
318  integer(c_int) :: nel, lx
319  end subroutine opencl_lambda2
320  end interface
321 #endif
322 
323 contains
324 
325  subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
326  type(coef_t), intent(in), target :: coef
327  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly, & coef%Xh%lz,coef%msh%nelv), intent(inout) :: du
328  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly, & coef%Xh%lz,coef%msh%nelv), intent(in) :: u, dr, ds, dt
329  type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
330 
331  du_d = device_get_ptr(du)
332  u_d = device_get_ptr(u)
333 
334  dr_d = device_get_ptr(dr)
335  ds_d = device_get_ptr(ds)
336  dt_d = device_get_ptr(dt)
337 
338  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
339 #ifdef HAVE_HIP
340  call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
341  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
342  msh%nelv, xh%lx)
343 #elif HAVE_CUDA
344  call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
345  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
346  msh%nelv, xh%lx)
347 #elif HAVE_OPENCL
348  call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
349  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
350  msh%nelv, xh%lx)
351 #else
352  call neko_error('No device backend configured')
353 #endif
354  end associate
355 
356  end subroutine opr_device_dudxyz
357 
358  subroutine opr_device_opgrad(ux, uy, uz, u, coef)
359  type(coef_t), intent(in) :: coef
360  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
361  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
362  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
363  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: u
364  type(c_ptr) :: ux_d, uy_d, uz_d, u_d
365 
366  ux_d = device_get_ptr(ux)
367  uy_d = device_get_ptr(uy)
368  uz_d = device_get_ptr(uz)
369 
370  u_d = device_get_ptr(u)
371 
372  associate(xh => coef%Xh, msh => coef%msh)
373 #ifdef HAVE_HIP
374  call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
375  xh%dx_d, xh%dy_d, xh%dz_d, &
376  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
377  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
378  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
379  xh%w3_d, msh%nelv, xh%lx)
380 #elif HAVE_CUDA
381  call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
382  xh%dx_d, xh%dy_d, xh%dz_d, &
383  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
384  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
385  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
386  xh%w3_d, msh%nelv, xh%lx)
387 #elif HAVE_OPENCL
388  call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
389  xh%dx_d, xh%dy_d, xh%dz_d, &
390  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
391  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
392  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
393  xh%w3_d, msh%nelv, xh%lx)
394 #else
395  call neko_error('No device backend configured')
396 #endif
397  end associate
398 
399  end subroutine opr_device_opgrad
400  subroutine opr_device_lambda2(lambda2, u, v, w, coef)
401  type(coef_t), intent(in) :: coef
402  type(field_t), intent(inout) :: lambda2
403  type(field_t), intent(in) :: u, v, w
404 #ifdef HAVE_HIP
405  call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
406  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
407  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
408  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
409  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
410  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
411 #elif HAVE_CUDA
412  call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
413  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
414  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
415  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
416  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
417  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
418 #elif HAVE_OPENCL
419  call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
420  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
421  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
422  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
423  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
424  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
425 #else
426  call neko_error('No device backend configured')
427 #endif
428  end subroutine opr_device_lambda2
429 
430  subroutine opr_device_cdtp(dtx, x, dr,ds, dt, coef)
431  type(coef_t), intent(in) :: coef
432  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: dtx
433  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: x
434  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dr
435  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: ds
436  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dt
437  type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
438 
439  dtx_d = device_get_ptr(dtx)
440  x_d = device_get_ptr(x)
441 
442  dr_d = device_get_ptr(dr)
443  ds_d = device_get_ptr(ds)
444  dt_d = device_get_ptr(dt)
445 
446  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
447 #ifdef HAVE_HIP
448  call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
449  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%B_d, &
450  coef%jac_d, msh%nelv, xh%lx)
451 #elif HAVE_CUDA
452  call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
453  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%B_d, &
454  coef%jac_d, msh%nelv, xh%lx)
455 #elif HAVE_OPENCL
456  call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
457  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%B_d, &
458  coef%jac_d, msh%nelv, xh%lx)
459 #else
460  call neko_error('No device backend configured')
461 #endif
462  end associate
463 
464  end subroutine opr_device_cdtp
465 
466  subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
467  type(space_t), intent(in) :: xh
468  type(coef_t), intent(in) :: coef
469  integer, intent(in) :: nelv, gdim
470  real(kind=rp), intent(inout) :: du(xh%lxyz,nelv)
471  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u
472  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vx
473  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vy
474  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vz
475  type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
476 
477  du_d = device_get_ptr(du)
478  u_d = device_get_ptr(u)
479 
480  vx_d = device_get_ptr(vx)
481  vy_d = device_get_ptr(vy)
482  vz_d = device_get_ptr(vz)
483 
484  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
485 #ifdef HAVE_HIP
486  call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
487  xh%dx_d, xh%dy_d, xh%dz_d, &
488  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
489  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
490  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
491  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
492 #elif HAVE_CUDA
493  call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
494  xh%dx_d, xh%dy_d, xh%dz_d, &
495  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
496  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
497  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
498  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
499 #elif HAVE_OPENCL
500  call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
501  xh%dx_d, xh%dy_d, xh%dz_d, &
502  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
503  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
504  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
505  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
506 #else
507  call neko_error('No device backend configured')
508 #endif
509  end associate
510 
511  end subroutine opr_device_conv1
512 
513  subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
514  type(field_t), intent(inout) :: w1
515  type(field_t), intent(inout) :: w2
516  type(field_t), intent(inout) :: w3
517  type(field_t), intent(inout) :: u1
518  type(field_t), intent(inout) :: u2
519  type(field_t), intent(inout) :: u3
520  type(field_t), intent(inout) :: work1
521  type(field_t), intent(inout) :: work2
522  type(coef_t), intent(in) :: c_xh
523  integer :: gdim, n, nelv
524 
525  n = w1%dof%size()
526  gdim = c_xh%msh%gdim
527  nelv = c_xh%msh%nelv
528 
529  ! this%work1=dw/dy ; this%work2=dv/dz
530 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
531 #ifdef HAVE_HIP
532  call hip_dudxyz(work1%x_d, u3%x_d, &
533  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
534  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
535  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
536 #elif HAVE_CUDA
537  call cuda_dudxyz(work1%x_d, u3%x_d, &
538  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
539  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
540  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
541 #elif HAVE_OPENCL
542  call opencl_dudxyz(work1%x_d, u3%x_d, &
543  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
544  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
545  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
546 #endif
547  if (gdim .eq. 3) then
548 #ifdef HAVE_HIP
549  call hip_dudxyz(work2%x_d, u2%x_d, &
550  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
551  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
552  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
553 #elif HAVE_CUDA
554  call cuda_dudxyz(work2%x_d, u2%x_d, &
555  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
556  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
557  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
558 #elif HAVE_OPENCL
559  call opencl_dudxyz(work2%x_d, u2%x_d, &
560  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
561  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
562  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
563 #endif
564  call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
565  else
566  call device_copy(w1%x_d, work1%x_d, n)
567  endif
568  ! this%work1=du/dz ; this%work2=dw/dx
569  if (gdim .eq. 3) then
570 #ifdef HAVE_HIP
571  call hip_dudxyz(work1%x_d, u1%x_d, &
572  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
573  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
574  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
575  call hip_dudxyz(work2%x_d, u3%x_d, &
576  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
577  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
578  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
579 #elif HAVE_CUDA
580  call cuda_dudxyz(work1%x_d, u1%x_d, &
581  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
582  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
583  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
584  call cuda_dudxyz(work2%x_d, u3%x_d, &
585  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
586  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
587  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
588 #elif HAVE_OPENCL
589  call opencl_dudxyz(work1%x_d, u1%x_d, &
590  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
591  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
592  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
593  call opencl_dudxyz(work2%x_d, u3%x_d, &
594  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
595  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
596  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
597 #endif
598  call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
599  else
600  call device_rzero (work1%x_d, n)
601 #ifdef HAVE_HIP
602  call hip_dudxyz(work2%x_d, u3%x_d, &
603  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
604  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
605  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
606 #elif HAVE_CUDA
607  call cuda_dudxyz(work2%x_d, u3%x_d, &
608  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
609  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
610  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
611 #elif HAVE_OPENCL
612  call opencl_dudxyz(work2%x_d, u3%x_d, &
613  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
614  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
615  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
616 #endif
617  call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
618  endif
619  ! this%work1=dv/dx ; this%work2=du/dy
620 #ifdef HAVE_HIP
621  call hip_dudxyz(work1%x_d, u2%x_d, &
622  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
623  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
624  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
625  call hip_dudxyz(work2%x_d, u1%x_d, &
626  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
627  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
628  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
629 #elif HAVE_CUDA
630  call cuda_dudxyz(work1%x_d, u2%x_d, &
631  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
632  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
633  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
634  call cuda_dudxyz(work2%x_d, u1%x_d, &
635  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
636  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
637  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
638 #elif HAVE_OPENCL
639  call opencl_dudxyz(work1%x_d, u2%x_d, &
640  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
641  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
642  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
643  call opencl_dudxyz(work2%x_d, u1%x_d, &
644  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
645  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
646  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
647 #endif
648  call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
649  !! BC dependent, Needs to change if cyclic
650 
651  call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
652  call c_xh%gs_h%op(w1, gs_op_add)
653  call c_xh%gs_h%op(w2, gs_op_add)
654  call c_xh%gs_h%op(w3, gs_op_add)
655  call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
656 
657 #else
658  call neko_error('No device backend configured')
659 #endif
660 
661  end subroutine opr_device_curl
662 
663  function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
664  type(space_t) :: xh
665  type(coef_t) :: coef
666  integer :: nelv, gdim
667  real(kind=rp) :: dt
668  real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w
669  real(kind=rp) :: cfl
670  type(c_ptr) :: u_d, v_d, w_d
671 
672  u_d = device_get_ptr(u)
673  v_d = device_get_ptr(v)
674  w_d = device_get_ptr(w)
675 
676 #ifdef HAVE_HIP
677  cfl = hip_cfl(dt, u_d, v_d, w_d, &
678  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
679  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
680  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
681  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
682  coef%jacinv_d, nelv, xh%lx)
683 #elif HAVE_CUDA
684  cfl = cuda_cfl(dt, u_d, v_d, w_d, &
685  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
686  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
687  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
688  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
689  coef%jacinv_d, nelv, xh%lx)
690 #elif HAVE_OPENCL
691  cfl = opencl_cfl(dt, u_d, v_d, w_d, &
692  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
693  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
694  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
695  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
696  coef%jacinv_d, nelv, xh%lx)
697 #else
698  cfl = 0.0_rp
699  call neko_error('No device backend configured')
700 #endif
701  end function opr_device_cfl
702 
703 end module opr_device
704 
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
Defines a mesh.
Definition: mesh.f90:34
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators accelerator backends.
Definition: opr_device.F90:34
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_device.F90:666
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_device.F90:433
subroutine, public opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_device.F90:469
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_device.F90:326
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_device.F90:516
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
Definition: opr_device.F90:361
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Definition: opr_device.F90:403
Defines a function space.
Definition: space.f90:34
Utilities.
Definition: utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *B, void *jac, int *nel, int *lx)
Definition: opr_cdtp.c:52
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *B, void *jac, int *nel, int *lx)
Definition: opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition: opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition: opr_cfl.cu:54
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.c:52
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.cu:60
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.c:52
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition: opr_lambda2.c:52
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition: opr_lambda2.cu:59
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition: opr_opgrad.c:53
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition: opr_opgrad.cu:59
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
The function space for the SEM solution fields.
Definition: space.f90:62