Neko  0.9.99
A portable framework for high-order spectral element flow simulations
opr_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module opr_device
35  use gather_scatter, only : gs_op_add
36  use num_types, only : rp, c_rp
37  use device, only : device_get_ptr
38  use space, only : space_t
39  use coefs, only : coef_t
40  use field, only : field_t
41  use utils, only : neko_error
43  use device_mathops, only : device_opcolv
44  use comm
45  use, intrinsic :: iso_c_binding
46  implicit none
47  private
48 
51 
52 #ifdef HAVE_HIP
53  interface
54  subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
55  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
56  bind(c, name = 'hip_dudxyz')
57  use, intrinsic :: iso_c_binding
58  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
59  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
60  integer(c_int) :: nel, lx
61  end subroutine hip_dudxyz
62  end interface
63 
64  interface
65  subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
66  dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
67  bind(c, name = 'hip_cdtp')
68  use, intrinsic :: iso_c_binding
69  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
70  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
71  integer(c_int) :: nel, lx
72  end subroutine hip_cdtp
73  end interface
74 
75  interface
76  subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
77  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
78  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
79  jacinv_d, nel, gdim, lx) &
80  bind(c, name = 'hip_conv1')
81  use, intrinsic :: iso_c_binding
82  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
83  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
84  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
85  type(c_ptr), value :: jacinv_d
86  integer(c_int) :: nel, gdim, lx
87  end subroutine hip_conv1
88  end interface
89 
90  interface
91  subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
92  dx_d, dy_d, dz_d, &
93  drdx_d, dsdx_d, dtdx_d, &
94  drdy_d, dsdy_d, dtdy_d, &
95  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
96  bind(c, name = 'hip_opgrad')
97  use, intrinsic :: iso_c_binding
98  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
99  type(c_ptr), value :: dx_d, dy_d, dz_d
100  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
101  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
102  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
103  type(c_ptr), value :: w3_d
104  integer(c_int) :: nel, lx
105  end subroutine hip_opgrad
106  end interface
107 
108  interface
109  subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
110  dx_d, dy_d, dz_d, &
111  drdx_d, dsdx_d, dtdx_d, &
112  drdy_d, dsdy_d, dtdy_d, &
113  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
114  bind(c, name = 'hip_lambda2')
115  use, intrinsic :: iso_c_binding
116  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
117  type(c_ptr), value :: dx_d, dy_d, dz_d
118  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
119  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
120  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
121  type(c_ptr), value :: jacinv_d
122  integer(c_int) :: nel, lx
123  end subroutine hip_lambda2
124  end interface
125 
126  interface
127  real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
128  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
129  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
130  jacinv_d, nel, lx) &
131  bind(c, name = 'hip_cfl')
132  use, intrinsic :: iso_c_binding
133  import c_rp
134  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
135  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
136  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
137  real(c_rp) :: dt
138  integer(c_int) :: nel, lx
139  end function hip_cfl
140  end interface
141 #elif HAVE_CUDA
142  interface
143  subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
144  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
145  bind(c, name = 'cuda_dudxyz')
146  use, intrinsic :: iso_c_binding
147  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
148  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
149  integer(c_int) :: nel, lx
150  end subroutine cuda_dudxyz
151  end interface
152 
153  interface
154  subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
155  dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
156  bind(c, name = 'cuda_cdtp')
157  use, intrinsic :: iso_c_binding
158  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
159  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
160  integer(c_int) :: nel, lx
161  end subroutine cuda_cdtp
162  end interface
163 
164  interface
165  subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
166  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
167  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
168  jacinv_d, nel, gdim, lx) &
169  bind(c, name = 'cuda_conv1')
170  use, intrinsic :: iso_c_binding
171  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
172  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
173  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
174  type(c_ptr), value :: jacinv_d
175  integer(c_int) :: nel, gdim, lx
176  end subroutine cuda_conv1
177  end interface
178 
179  interface
180  subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
181  dx_d, dy_d, dz_d, &
182  drdx_d, dsdx_d, dtdx_d, &
183  drdy_d, dsdy_d, dtdy_d, &
184  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
185  bind(c, name = 'cuda_opgrad')
186  use, intrinsic :: iso_c_binding
187  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
188  type(c_ptr), value :: dx_d, dy_d, dz_d
189  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
190  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
191  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
192  type(c_ptr), value :: w3_d
193  integer(c_int) :: nel, lx
194  end subroutine cuda_opgrad
195  end interface
196 
197  interface
198  subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
199  dx_d, dy_d, dz_d, &
200  drdx_d, dsdx_d, dtdx_d, &
201  drdy_d, dsdy_d, dtdy_d, &
202  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
203  bind(c, name = 'cuda_lambda2')
204  use, intrinsic :: iso_c_binding
205  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
206  type(c_ptr), value :: dx_d, dy_d, dz_d
207  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
208  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
209  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
210  type(c_ptr), value :: jacinv_d
211  integer(c_int) :: nel, lx
212  end subroutine cuda_lambda2
213  end interface
214 
215 
216  interface
217  real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
218  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
219  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
220  jacinv_d, nel, lx) &
221  bind(c, name = 'cuda_cfl')
222  use, intrinsic :: iso_c_binding
223  import c_rp
224  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
225  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
226  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
227  real(c_rp) :: dt
228  integer(c_int) :: nel, lx
229  end function cuda_cfl
230  end interface
231 #elif HAVE_OPENCL
232  interface
233  subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
234  dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
235  bind(c, name = 'opencl_dudxyz')
236  use, intrinsic :: iso_c_binding
237  type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
238  type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
239  integer(c_int) :: nel, lx
240  end subroutine opencl_dudxyz
241  end interface
242 
243  interface
244  subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
245  dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
246  bind(c, name = 'opencl_cdtp')
247  use, intrinsic :: iso_c_binding
248  type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
249  type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
250  integer(c_int) :: nel, lx
251  end subroutine opencl_cdtp
252  end interface
253 
254  interface
255  subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
256  dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
257  drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
258  jacinv_d, nel, gdim, lx) &
259  bind(c, name = 'opencl_conv1')
260  use, intrinsic :: iso_c_binding
261  type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
262  type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
263  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
264  type(c_ptr), value :: jacinv_d
265  integer(c_int) :: nel, gdim, lx
266  end subroutine opencl_conv1
267  end interface
268 
269  interface
270  subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
271  dx_d, dy_d, dz_d, &
272  drdx_d, dsdx_d, dtdx_d, &
273  drdy_d, dsdy_d, dtdy_d, &
274  drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
275  bind(c, name = 'opencl_opgrad')
276  use, intrinsic :: iso_c_binding
277  type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
278  type(c_ptr), value :: dx_d, dy_d, dz_d
279  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
280  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
281  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
282  type(c_ptr), value :: w3_d
283  integer(c_int) :: nel, lx
284  end subroutine opencl_opgrad
285  end interface
286 
287  interface
288  real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
289  drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
290  drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
291  jacinv_d, nel, lx) &
292  bind(c, name = 'opencl_cfl')
293  use, intrinsic :: iso_c_binding
294  import c_rp
295  type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
296  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
297  type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
298  real(c_rp) :: dt
299  integer(c_int) :: nel, lx
300  end function opencl_cfl
301  end interface
302 
303  interface
304  subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
305  dx_d, dy_d, dz_d, &
306  drdx_d, dsdx_d, dtdx_d, &
307  drdy_d, dsdy_d, dtdy_d, &
308  drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
309  bind(c, name = 'opencl_lambda2')
310  use, intrinsic :: iso_c_binding
311  type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
312  type(c_ptr), value :: dx_d, dy_d, dz_d
313  type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
314  type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
315  type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
316  type(c_ptr), value :: jacinv_d
317  integer(c_int) :: nel, lx
318  end subroutine opencl_lambda2
319  end interface
320 #endif
321 
322 contains
323 
324  subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
325  type(coef_t), intent(in), target :: coef
326  real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
327  real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
328  type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
329 
330  du_d = device_get_ptr(du)
331  u_d = device_get_ptr(u)
332 
333  dr_d = device_get_ptr(dr)
334  ds_d = device_get_ptr(ds)
335  dt_d = device_get_ptr(dt)
336 
337  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
338 #ifdef HAVE_HIP
339  call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
340  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
341  msh%nelv, xh%lx)
342 #elif HAVE_CUDA
343  call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
344  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
345  msh%nelv, xh%lx)
346 #elif HAVE_OPENCL
347  call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
348  xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
349  msh%nelv, xh%lx)
350 #else
351  call neko_error('No device backend configured')
352 #endif
353  end associate
354 
355  end subroutine opr_device_dudxyz
356 
357  subroutine opr_device_opgrad(ux, uy, uz, u, coef)
358  type(coef_t), intent(in) :: coef
359  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
360  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
361  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
362  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
363  type(c_ptr) :: ux_d, uy_d, uz_d, u_d
364 
365  ux_d = device_get_ptr(ux)
366  uy_d = device_get_ptr(uy)
367  uz_d = device_get_ptr(uz)
368 
369  u_d = device_get_ptr(u)
370 
371  associate(xh => coef%Xh, msh => coef%msh)
372 #ifdef HAVE_HIP
373  call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
374  xh%dx_d, xh%dy_d, xh%dz_d, &
375  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
376  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
377  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
378  xh%w3_d, msh%nelv, xh%lx)
379 #elif HAVE_CUDA
380  call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
381  xh%dx_d, xh%dy_d, xh%dz_d, &
382  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
383  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
384  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
385  xh%w3_d, msh%nelv, xh%lx)
386 #elif HAVE_OPENCL
387  call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
388  xh%dx_d, xh%dy_d, xh%dz_d, &
389  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
390  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
391  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
392  xh%w3_d, msh%nelv, xh%lx)
393 #else
394  call neko_error('No device backend configured')
395 #endif
396  end associate
397 
398  end subroutine opr_device_opgrad
399  subroutine opr_device_lambda2(lambda2, u, v, w, coef)
400  type(coef_t), intent(in) :: coef
401  type(field_t), intent(inout) :: lambda2
402  type(field_t), intent(in) :: u, v, w
403 #ifdef HAVE_HIP
404  call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
405  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
406  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
407  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
408  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
409  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
410 #elif HAVE_CUDA
411  call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
412  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
413  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
414  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
415  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
416  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
417 #elif HAVE_OPENCL
418  call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
419  coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
420  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
421  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
422  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
423  coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
424 #else
425  call neko_error('No device backend configured')
426 #endif
427  end subroutine opr_device_lambda2
428 
429  subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
430  type(coef_t), intent(in) :: coef
431  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
432  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
433  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
434  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
435  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
436  type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
437 
438  dtx_d = device_get_ptr(dtx)
439  x_d = device_get_ptr(x)
440 
441  dr_d = device_get_ptr(dr)
442  ds_d = device_get_ptr(ds)
443  dt_d = device_get_ptr(dt)
444 
445  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
446 #ifdef HAVE_HIP
447  call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
448  xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
449  msh%nelv, xh%lx)
450 #elif HAVE_CUDA
451  call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
452  xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
453  msh%nelv, xh%lx)
454 #elif HAVE_OPENCL
455  call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
456  xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
457  msh%nelv, xh%lx)
458 #else
459  call neko_error('No device backend configured')
460 #endif
461  end associate
462 
463  end subroutine opr_device_cdtp
464 
465  subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
466  type(space_t), intent(in) :: xh
467  type(coef_t), intent(in) :: coef
468  integer, intent(in) :: nelv, gdim
469  real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
470  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
471  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
472  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
473  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
474  type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
475 
476  du_d = device_get_ptr(du)
477  u_d = device_get_ptr(u)
478 
479  vx_d = device_get_ptr(vx)
480  vy_d = device_get_ptr(vy)
481  vz_d = device_get_ptr(vz)
482 
483  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
484 #ifdef HAVE_HIP
485  call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
486  xh%dx_d, xh%dy_d, xh%dz_d, &
487  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
491 #elif HAVE_CUDA
492  call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
493  xh%dx_d, xh%dy_d, xh%dz_d, &
494  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
498 #elif HAVE_OPENCL
499  call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
500  xh%dx_d, xh%dy_d, xh%dz_d, &
501  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
502  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
503  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
504  coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
505 #else
506  call neko_error('No device backend configured')
507 #endif
508  end associate
509 
510  end subroutine opr_device_conv1
511 
512  subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
513  type(field_t), intent(inout) :: w1
514  type(field_t), intent(inout) :: w2
515  type(field_t), intent(inout) :: w3
516  type(field_t), intent(inout) :: u1
517  type(field_t), intent(inout) :: u2
518  type(field_t), intent(inout) :: u3
519  type(field_t), intent(inout) :: work1
520  type(field_t), intent(inout) :: work2
521  type(coef_t), intent(in) :: c_xh
522  integer :: gdim, n, nelv
523 
524  n = w1%dof%size()
525  gdim = c_xh%msh%gdim
526  nelv = c_xh%msh%nelv
527 
528  ! this%work1=dw/dy ; this%work2=dv/dz
529 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
530 #ifdef HAVE_HIP
531  call hip_dudxyz(work1%x_d, u3%x_d, &
532  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
533  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
534  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
535 #elif HAVE_CUDA
536  call cuda_dudxyz(work1%x_d, u3%x_d, &
537  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
538  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
539  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
540 #elif HAVE_OPENCL
541  call opencl_dudxyz(work1%x_d, u3%x_d, &
542  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
543  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
544  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
545 #endif
546  if (gdim .eq. 3) then
547 #ifdef HAVE_HIP
548  call hip_dudxyz(work2%x_d, u2%x_d, &
549  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
550  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
551  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
552 #elif HAVE_CUDA
553  call cuda_dudxyz(work2%x_d, u2%x_d, &
554  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
555  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
556  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
557 #elif HAVE_OPENCL
558  call opencl_dudxyz(work2%x_d, u2%x_d, &
559  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
560  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
561  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
562 #endif
563  call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
564  else
565  call device_copy(w1%x_d, work1%x_d, n)
566  endif
567  ! this%work1=du/dz ; this%work2=dw/dx
568  if (gdim .eq. 3) then
569 #ifdef HAVE_HIP
570  call hip_dudxyz(work1%x_d, u1%x_d, &
571  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
572  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
573  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
574  call hip_dudxyz(work2%x_d, u3%x_d, &
575  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
576  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
577  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
578 #elif HAVE_CUDA
579  call cuda_dudxyz(work1%x_d, u1%x_d, &
580  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
581  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
582  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
583  call cuda_dudxyz(work2%x_d, u3%x_d, &
584  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
585  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
586  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
587 #elif HAVE_OPENCL
588  call opencl_dudxyz(work1%x_d, u1%x_d, &
589  c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
590  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
591  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
592  call opencl_dudxyz(work2%x_d, u3%x_d, &
593  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
594  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
595  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
596 #endif
597  call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
598  else
599  call device_rzero (work1%x_d, n)
600 #ifdef HAVE_HIP
601  call hip_dudxyz(work2%x_d, u3%x_d, &
602  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
603  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
604  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
605 #elif HAVE_CUDA
606  call cuda_dudxyz(work2%x_d, u3%x_d, &
607  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
608  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
609  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
610 #elif HAVE_OPENCL
611  call opencl_dudxyz(work2%x_d, u3%x_d, &
612  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
613  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
614  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
615 #endif
616  call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
617  endif
618  ! this%work1=dv/dx ; this%work2=du/dy
619 #ifdef HAVE_HIP
620  call hip_dudxyz(work1%x_d, u2%x_d, &
621  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
622  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
623  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
624  call hip_dudxyz(work2%x_d, u1%x_d, &
625  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
626  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
627  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
628 #elif HAVE_CUDA
629  call cuda_dudxyz(work1%x_d, u2%x_d, &
630  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
631  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
632  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
633  call cuda_dudxyz(work2%x_d, u1%x_d, &
634  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
635  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
636  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
637 #elif HAVE_OPENCL
638  call opencl_dudxyz(work1%x_d, u2%x_d, &
639  c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
640  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
641  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
642  call opencl_dudxyz(work2%x_d, u1%x_d, &
643  c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
644  c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
645  c_xh%jacinv_d, nelv, c_xh%Xh%lx)
646 #endif
647  call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
648  !! BC dependent, Needs to change if cyclic
649 
650  call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
651  call c_xh%gs_h%op(w1, gs_op_add)
652  call c_xh%gs_h%op(w2, gs_op_add)
653  call c_xh%gs_h%op(w3, gs_op_add)
654  call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
655 
656 #else
657  call neko_error('No device backend configured')
658 #endif
659 
660  end subroutine opr_device_curl
661 
662  function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
663  type(space_t) :: xh
664  type(coef_t) :: coef
665  integer :: nelv, gdim
666  real(kind=rp) :: dt
667  real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
668  real(kind=rp) :: cfl
669  type(c_ptr) :: u_d, v_d, w_d
670 
671  u_d = device_get_ptr(u)
672  v_d = device_get_ptr(v)
673  w_d = device_get_ptr(w)
674 
675 #ifdef HAVE_HIP
676  cfl = hip_cfl(dt, u_d, v_d, w_d, &
677  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
678  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
679  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
680  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
681  coef%jacinv_d, nelv, xh%lx)
682 #elif HAVE_CUDA
683  cfl = cuda_cfl(dt, u_d, v_d, w_d, &
684  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
685  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
686  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
687  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
688  coef%jacinv_d, nelv, xh%lx)
689 #elif HAVE_OPENCL
690  cfl = opencl_cfl(dt, u_d, v_d, w_d, &
691  coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
692  coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
693  coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
694  xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
695  coef%jacinv_d, nelv, xh%lx)
696 #else
697  cfl = 0.0_rp
698  call neko_error('No device backend configured')
699 #endif
700  end function opr_device_cfl
701 
702 end module opr_device
703 
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Definition: device_math.F90:76
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators accelerator backends.
Definition: opr_device.F90:34
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_device.F90:665
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_device.F90:432
subroutine, public opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_device.F90:468
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_device.F90:325
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_device.F90:515
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
Definition: opr_device.F90:360
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Definition: opr_device.F90:402
Defines a function space.
Definition: space.f90:34
Utilities.
Definition: utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition: opr_cdtp.c:52
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition: opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition: opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition: opr_cfl.cu:54
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.c:52
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.cu:60
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.c:52
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition: opr_lambda2.c:52
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition: opr_lambda2.cu:59
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition: opr_opgrad.c:53
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition: opr_opgrad.cu:59
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
The function space for the SEM solution fields.
Definition: space.f90:62