Neko  0.9.99
A portable framework for high-order spectral element flow simulations
pnpn_res_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2022-2023, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use gather_scatter, only : gs_t, gs_op_add
35  use operators
36  use field, only : field_t
37  use ax_product, only : ax_t
38  use coefs, only : coef_t
39  use facet_normal, only : facet_normal_t
40  use mesh, only : mesh_t
41  use num_types, only : rp, c_rp
42  use space, only : space_t
43  use device_math
44  use device_mathops
46  use, intrinsic :: iso_c_binding, only : c_ptr, c_int
48  implicit none
49  private
50 
51  type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
52  contains
53  procedure, nopass :: compute => pnpn_prs_res_device_compute
54  end type pnpn_prs_res_device_t
55 
56  type, public, extends(pnpn_vel_res_t) :: pnpn_vel_res_device_t
57  contains
58  procedure, nopass :: compute => pnpn_vel_res_device_compute
59  end type pnpn_vel_res_device_t
60 
61 #ifdef HAVE_HIP
62  interface
63  subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
64  wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
65  B_d, h1_d, mu, rho, n) &
66  bind(c, name = 'pnpn_prs_res_part1_hip')
67  use, intrinsic :: iso_c_binding
68  import c_rp
69  implicit none
70  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
71  type(c_ptr), value :: wa1_d, wa2_d, wa3_d
72  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
73  type(c_ptr), value :: B_d, h1_d
74  real(c_rp) :: mu, rho
75  integer(c_int) :: n
76  end subroutine pnpn_prs_res_part1_hip
77  end interface
78 
79  interface
80  subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
81  bind(c, name = 'pnpn_prs_res_part2_hip')
82  use, intrinsic :: iso_c_binding
83  implicit none
84  type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
85  integer(c_int) :: n
86  end subroutine pnpn_prs_res_part2_hip
87  end interface
88 
89  interface
90  subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
91  bind(c, name = 'pnpn_prs_res_part3_hip')
92  use, intrinsic :: iso_c_binding
93  import c_rp
94  implicit none
95  type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
96  real(c_rp) :: dtbd
97  integer(c_int) :: n
98  end subroutine pnpn_prs_res_part3_hip
99  end interface
100 
101  interface
102  subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
103  ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
104  bind(c, name = 'pnpn_vel_res_update_hip')
105  use, intrinsic :: iso_c_binding
106  implicit none
107  type(c_ptr), value :: u_res_d, v_res_d, w_res_d
108  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
109  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
110  integer(c_int) :: n
111  end subroutine pnpn_vel_res_update_hip
112  end interface
113 #elif HAVE_CUDA
114  interface
115  subroutine pnpn_prs_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
116  wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
117  B_d, h1_d, mu, rho, n) &
118  bind(c, name = 'pnpn_prs_res_part1_cuda')
119  use, intrinsic :: iso_c_binding
120  import c_rp
121  implicit none
122  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
123  type(c_ptr), value :: wa1_d, wa2_d, wa3_d
124  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
125  type(c_ptr), value :: B_d, h1_d
126  real(c_rp) :: mu, rho
127  integer(c_int) :: n
128  end subroutine pnpn_prs_res_part1_cuda
129  end interface
130 
131  interface
132  subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
133  bind(c, name = 'pnpn_prs_res_part2_cuda')
134  use, intrinsic :: iso_c_binding
135  implicit none
136  type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
137  integer(c_int) :: n
138  end subroutine pnpn_prs_res_part2_cuda
139  end interface
140 
141  interface
142  subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
143  bind(c, name = 'pnpn_prs_res_part3_cuda')
144  use, intrinsic :: iso_c_binding
145  import c_rp
146  implicit none
147  type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
148  real(c_rp) :: dtbd
149  integer(c_int) :: n
150  end subroutine pnpn_prs_res_part3_cuda
151  end interface
152 
153  interface
154  subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
155  ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
156  bind(c, name = 'pnpn_vel_res_update_cuda')
157  use, intrinsic :: iso_c_binding
158  implicit none
159  type(c_ptr), value :: u_res_d, v_res_d, w_res_d
160  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
161  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
162  integer(c_int) :: n
163  end subroutine pnpn_vel_res_update_cuda
164  end interface
165 #elif HAVE_OPENCL
166  interface
167  subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
168  wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
169  B_d, h1_d, mu, rho, n) &
170  bind(c, name = 'pnpn_prs_res_part1_opencl')
171  use, intrinsic :: iso_c_binding
172  import c_rp
173  implicit none
174  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
175  type(c_ptr), value :: wa1_d, wa2_d, wa3_d
176  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
177  type(c_ptr), value :: B_d, h1_d
178  real(c_rp) :: mu, rho
179  integer(c_int) :: n
180  end subroutine pnpn_prs_res_part1_opencl
181  end interface
182 
183  interface
184  subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
185  bind(c, name = 'pnpn_prs_res_part2_opencl')
186  use, intrinsic :: iso_c_binding
187  implicit none
188  type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
189  integer(c_int) :: n
190  end subroutine pnpn_prs_res_part2_opencl
191  end interface
192 
193  interface
194  subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, &
195  n) bind(c, name = 'pnpn_prs_res_part3_opencl')
196  use, intrinsic :: iso_c_binding
197  import c_rp
198  implicit none
199  type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
200  real(c_rp) :: dtbd
201  integer(c_int) :: n
202  end subroutine pnpn_prs_res_part3_opencl
203  end interface
204 
205  interface
206  subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
207  ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
208  bind(c, name = 'pnpn_vel_res_update_opencl')
209  use, intrinsic :: iso_c_binding
210  implicit none
211  type(c_ptr), value :: u_res_d, v_res_d, w_res_d
212  type(c_ptr), value :: ta1_d, ta2_d, ta3_d
213  type(c_ptr), value :: f_u_d, f_v_d, f_w_d
214  integer(c_int) :: n
215  end subroutine pnpn_vel_res_update_opencl
216  end interface
217 #endif
218 
219 
220 contains
221 
222  subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
223  f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt,&
224  mu, rho)
225  type(field_t), intent(inout) :: p, u, v, w
226  type(field_t), intent(inout) :: u_e, v_e, w_e
227  type(field_t), intent(inout) :: p_res
228  type(field_t), intent(inout) :: f_x, f_y, f_z
229  type(coef_t), intent(inout) :: c_Xh
230  type(gs_t), intent(inout) :: gs_Xh
231  type(facet_normal_t), intent(inout) :: bc_prs_surface
232  type(facet_normal_t), intent(inout) :: bc_sym_surface
233  class(ax_t), intent(inout) :: Ax
234  real(kind=rp), intent(inout) :: bd
235  real(kind=rp), intent(in) :: dt
236  type(field_t), intent(in) :: mu
237  type(field_t), intent(in) :: rho
238  real(kind=rp) :: dtbd
239  real(kind=rp) :: mu_val, rho_val
240  integer :: n, gdim
241  type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2
242  integer :: temp_indices(8)
243 
244 
245  ! We assume the material properties are constant
246  mu_val = mu%x(1,1,1,1)
247  rho_val = rho%x(1,1,1,1)
248 
249  call neko_scratch_registry%request_field(ta1, temp_indices(1))
250  call neko_scratch_registry%request_field(ta2, temp_indices(2))
251  call neko_scratch_registry%request_field(ta3, temp_indices(3))
252  call neko_scratch_registry%request_field(wa1, temp_indices(4))
253  call neko_scratch_registry%request_field(wa2, temp_indices(5))
254  call neko_scratch_registry%request_field(wa3, temp_indices(6))
255  call neko_scratch_registry%request_field(work1, temp_indices(7))
256  call neko_scratch_registry%request_field(work2, temp_indices(8))
257 
258  n = u%dof%size()
259  gdim = c_xh%msh%gdim
260 
261  call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh)
262  call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh)
263 
264 
265 #ifdef HAVE_HIP
266  call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
267  wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
268  c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
269 
270 #elif HAVE_CUDA
271  call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
272  wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
273  c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
274 #elif HAVE_OPENCL
275  call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
276  wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_z%x_d, f_z%x_d, &
277  c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
278 #endif
279  c_xh%ifh2 = .false.
280  call device_cfill(c_xh%h1_d, 1.0_rp / rho_val, n)
281 
282  call gs_xh%op(ta1, gs_op_add)
283  call gs_xh%op(ta2, gs_op_add)
284  call gs_xh%op(ta3, gs_op_add)
285 
286  call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
287 
288  call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
289  call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
290  call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
291 
292  call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
293 
294 #ifdef HAVE_HIP
295  call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
296 #elif HAVE_CUDA
297  call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
298 #elif HAVE_OPENCL
299  call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
300 #endif
301 
302  !
303  ! Surface velocity terms
304  call device_rzero(wa1%x_d, n)
305  call device_rzero(wa2%x_d, n)
306  call device_rzero(wa3%x_d, n)
307  dtbd = 1.0_rp
308 
309  call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, &
310  ta2%x_d, ta3%x_d)
311 
312 #ifdef HAVE_HIP
313  call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
314 #elif HAVE_CUDA
315  call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
316 #elif HAVE_OPENCL
317  call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, &
318  n)
319 #endif
320  !
321  dtbd = bd / dt
322 
323  call device_rzero(ta1%x_d, n)
324  call device_rzero(ta2%x_d, n)
325  call device_rzero(ta3%x_d, n)
326 
327  call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
328  u%x_d, v%x_d, w%x_d)
329 
330 #ifdef HAVE_HIP
331  call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
332 #elif HAVE_CUDA
333  call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
334 #elif HAVE_OPENCL
335  call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd,&
336  n)
337 #endif
338 
339  call neko_scratch_registry%relinquish_field(temp_indices)
340 
341  end subroutine pnpn_prs_res_device_compute
342 
343  subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
344  p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
345  class(ax_t), intent(in) :: Ax
346  type(mesh_t), intent(inout) :: msh
347  type(space_t), intent(inout) :: Xh
348  type(field_t), intent(inout) :: p, u, v, w
349  type(field_t), intent(inout) :: u_res, v_res, w_res
350  type(field_t), intent(inout) :: f_x, f_y, f_z
351  type(coef_t), intent(inout) :: c_Xh
352  type(field_t), intent(in) :: mu
353  type(field_t), intent(in) :: rho
354  real(kind=rp), intent(in) :: bd
355  real(kind=rp), intent(in) :: dt
356  integer, intent(in) :: n
357  integer :: temp_indices(3)
358  type(field_t), pointer :: ta1, ta2, ta3
359  real(kind=rp) :: mu_val, rho_val
360 
361  ! We assume the material properties are constant
362  mu_val = mu%x(1,1,1,1)
363  rho_val = rho%x(1,1,1,1)
364 
365  call device_cfill(c_xh%h1_d, mu_val, n)
366  call device_cfill(c_xh%h2_d, rho_val * (bd / dt), n)
367  c_xh%ifh2 = .true.
368 
369  call ax%compute_vector(u_res%x, v_res%x, w_res%x, &
370  u%x, v%x, w%x, c_xh, msh, xh)
371 
372  call neko_scratch_registry%request_field(ta1, temp_indices(1))
373  call neko_scratch_registry%request_field(ta2, temp_indices(2))
374  call neko_scratch_registry%request_field(ta3, temp_indices(3))
375 
376  call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
377 
378 #ifdef HAVE_HIP
379  call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
380  ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
381 #elif HAVE_CUDA
382  call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
383  ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
384 #elif HAVE_OPENCL
385  call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
386  ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
387 #endif
388 
389  call neko_scratch_registry%relinquish_field(temp_indices)
390  end subroutine pnpn_vel_res_device_compute
391 
392 end module pnpn_res_device
Defines a Matrix-vector product.
Definition: ax.f90:34
Coefficients.
Definition: coef.f90:34
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition: field.f90:34
Gather-scatter.
Defines a mesh.
Definition: mesh.f90:34
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators.
Definition: operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
Definition: operators.f90:171
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
Definition: operators.f90:362
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
Definition: operators.f90:230
subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt, mu, rho)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition: pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition: space.f90:34
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition: pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition: pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition: pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition: pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition: pnpn_res.cu:63
void pnpn_prs_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition: pnpn_res.cu:77
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition: pnpn_res.cu:92
void pnpn_prs_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition: pnpn_res.cu:43
Base type for a matrix-vector product providing .
Definition: ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Dirichlet condition in facet normal direction.
Abstract type to compute pressure residual.
Definition: pnpn_res.f90:47
Abstract type to compute velocity residual.
Definition: pnpn_res.f90:53
The function space for the SEM solution fields.
Definition: space.f90:62