Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pnpn_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2022-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use gather_scatter, only : gs_t, gs_op_add
35 use operators
36 use field, only : field_t
37 use ax_product, only : ax_t
38 use coefs, only : coef_t
40 use mesh, only : mesh_t
41 use num_types, only : rp, c_rp
42 use space, only : space_t
43 use device_math
46 use device, only : device_event_sync
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
49 implicit none
50 private
51
52 type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
53 contains
54 procedure, nopass :: compute => pnpn_prs_res_device_compute
56
57 type, public, extends(pnpn_vel_res_t) :: pnpn_vel_res_device_t
58 contains
59 procedure, nopass :: compute => pnpn_vel_res_device_compute
61
62#ifdef HAVE_HIP
63 interface
64 subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
65 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
66 B_d, h1_d, mu, rho, n) &
67 bind(c, name = 'pnpn_prs_res_part1_hip')
68 use, intrinsic :: iso_c_binding
69 import c_rp
70 implicit none
71 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
72 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
73 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
74 type(c_ptr), value :: B_d, h1_d
75 real(c_rp) :: mu, rho
76 integer(c_int) :: n
77 end subroutine pnpn_prs_res_part1_hip
78 end interface
79
80 interface
81 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
82 bind(c, name = 'pnpn_prs_res_part2_hip')
83 use, intrinsic :: iso_c_binding
84 implicit none
85 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
86 integer(c_int) :: n
87 end subroutine pnpn_prs_res_part2_hip
88 end interface
89
90 interface
91 subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
92 bind(c, name = 'pnpn_prs_res_part3_hip')
93 use, intrinsic :: iso_c_binding
94 import c_rp
95 implicit none
96 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
97 real(c_rp) :: dtbd
98 integer(c_int) :: n
99 end subroutine pnpn_prs_res_part3_hip
100 end interface
101
102 interface
103 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
104 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
105 bind(c, name = 'pnpn_vel_res_update_hip')
106 use, intrinsic :: iso_c_binding
107 implicit none
108 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
109 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
110 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
111 integer(c_int) :: n
112 end subroutine pnpn_vel_res_update_hip
113 end interface
114#elif HAVE_CUDA
115 interface
116 subroutine pnpn_prs_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
117 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
118 B_d, h1_d, mu, rho, n) &
119 bind(c, name = 'pnpn_prs_res_part1_cuda')
120 use, intrinsic :: iso_c_binding
121 import c_rp
122 implicit none
123 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
124 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
125 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
126 type(c_ptr), value :: B_d, h1_d
127 real(c_rp) :: mu, rho
128 integer(c_int) :: n
129 end subroutine pnpn_prs_res_part1_cuda
130 end interface
131
132 interface
133 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
134 bind(c, name = 'pnpn_prs_res_part2_cuda')
135 use, intrinsic :: iso_c_binding
136 implicit none
137 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
138 integer(c_int) :: n
139 end subroutine pnpn_prs_res_part2_cuda
140 end interface
141
142 interface
143 subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
144 bind(c, name = 'pnpn_prs_res_part3_cuda')
145 use, intrinsic :: iso_c_binding
146 import c_rp
147 implicit none
148 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
149 real(c_rp) :: dtbd
150 integer(c_int) :: n
151 end subroutine pnpn_prs_res_part3_cuda
152 end interface
153
154 interface
155 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
156 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
157 bind(c, name = 'pnpn_vel_res_update_cuda')
158 use, intrinsic :: iso_c_binding
159 implicit none
160 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
161 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
162 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
163 integer(c_int) :: n
164 end subroutine pnpn_vel_res_update_cuda
165 end interface
166#elif HAVE_OPENCL
167 interface
168 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
169 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
170 B_d, h1_d, mu, rho, n) &
171 bind(c, name = 'pnpn_prs_res_part1_opencl')
172 use, intrinsic :: iso_c_binding
173 import c_rp
174 implicit none
175 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
176 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
177 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
178 type(c_ptr), value :: B_d, h1_d
179 real(c_rp) :: mu, rho
180 integer(c_int) :: n
181 end subroutine pnpn_prs_res_part1_opencl
182 end interface
183
184 interface
185 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
186 bind(c, name = 'pnpn_prs_res_part2_opencl')
187 use, intrinsic :: iso_c_binding
188 implicit none
189 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
190 integer(c_int) :: n
191 end subroutine pnpn_prs_res_part2_opencl
192 end interface
193
194 interface
195 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, &
196 n) bind(c, name = 'pnpn_prs_res_part3_opencl')
197 use, intrinsic :: iso_c_binding
198 import c_rp
199 implicit none
200 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
201 real(c_rp) :: dtbd
202 integer(c_int) :: n
203 end subroutine pnpn_prs_res_part3_opencl
204 end interface
205
206 interface
207 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
208 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
209 bind(c, name = 'pnpn_vel_res_update_opencl')
210 use, intrinsic :: iso_c_binding
211 implicit none
212 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
213 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
214 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
215 integer(c_int) :: n
216 end subroutine pnpn_vel_res_update_opencl
217 end interface
218#endif
219
220
221contains
222
223 subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
224 f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt,&
225 mu, rho, event)
226 type(field_t), intent(inout) :: p, u, v, w
227 type(field_t), intent(in) :: u_e, v_e, w_e
228 type(field_t), intent(inout) :: p_res
229 type(field_t), intent(in) :: f_x, f_y, f_z
230 type(coef_t), intent(inout) :: c_Xh
231 type(gs_t), intent(inout) :: gs_Xh
232 type(facet_normal_t), intent(in) :: bc_prs_surface
233 type(facet_normal_t), intent(in) :: bc_sym_surface
234 class(ax_t), intent(inout) :: Ax
235 real(kind=rp), intent(in) :: bd
236 real(kind=rp), intent(in) :: dt
237 type(field_t), intent(in) :: mu
238 type(field_t), intent(in) :: rho
239 type(c_ptr), intent(inout) :: event
240 real(kind=rp) :: dtbd
241 real(kind=rp) :: mu_val, rho_val
242 integer :: n, gdim
243 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2
244 integer :: temp_indices(8)
245
246
247 ! We assume the material properties are constant
248 mu_val = mu%x(1,1,1,1)
249 rho_val = rho%x(1,1,1,1)
250
251 call neko_scratch_registry%request_field(ta1, temp_indices(1), .false.)
252 call neko_scratch_registry%request_field(ta2, temp_indices(2), .false.)
253 call neko_scratch_registry%request_field(ta3, temp_indices(3), .false.)
254 call neko_scratch_registry%request_field(wa1, temp_indices(4), .false.)
255 call neko_scratch_registry%request_field(wa2, temp_indices(5), .false.)
256 call neko_scratch_registry%request_field(wa3, temp_indices(6), .false.)
257 call neko_scratch_registry%request_field(work1, temp_indices(7), .false.)
258 call neko_scratch_registry%request_field(work2, temp_indices(8), .false.)
259
260 n = u%dof%size()
261 gdim = c_xh%msh%gdim
262
263 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh, event)
264 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh, event)
265
266
267#ifdef HAVE_HIP
268 call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
269 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
270 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
271
272#elif HAVE_CUDA
273 call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
274 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
275 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
276#elif HAVE_OPENCL
277 call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
278 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
279 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
280#endif
281 c_xh%ifh2 = .false.
282 call device_cfill(c_xh%h1_d, 1.0_rp / rho_val, n)
283
284 call rotate_cyc(ta1%x, ta2%x, ta3%x, 1, c_xh)
285 call gs_xh%op(ta1, gs_op_add, event)
286 call device_event_sync(event)
287 call gs_xh%op(ta2, gs_op_add, event)
288 call device_event_sync(event)
289 call gs_xh%op(ta3, gs_op_add, event)
290 call device_event_sync(event)
291 call rotate_cyc(ta1%x, ta2%x, ta3%x, 0, c_xh)
292
293 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
294
295 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
296 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
297 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
298
299 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
300
301#ifdef HAVE_HIP
302 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
303#elif HAVE_CUDA
304 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
305#elif HAVE_OPENCL
306 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
307#endif
308
309 !
310 ! Surface velocity terms
311 call device_rzero(wa1%x_d, n)
312 call device_rzero(wa2%x_d, n)
313 call device_rzero(wa3%x_d, n)
314 dtbd = 1.0_rp
315
316 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, &
317 ta2%x_d, ta3%x_d)
318
319#ifdef HAVE_HIP
320 call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
321#elif HAVE_CUDA
322 call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
323#elif HAVE_OPENCL
324 call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, &
325 n)
326#endif
327 !
328 dtbd = bd / dt
329
330 call device_rzero(ta1%x_d, n)
331 call device_rzero(ta2%x_d, n)
332 call device_rzero(ta3%x_d, n)
333
334 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
335 u%x_d, v%x_d, w%x_d)
336
337#ifdef HAVE_HIP
338 call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
339#elif HAVE_CUDA
340 call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
341#elif HAVE_OPENCL
342 call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd,&
343 n)
344#endif
345
346 call neko_scratch_registry%relinquish_field(temp_indices)
347
348 end subroutine pnpn_prs_res_device_compute
349
350 subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
351 p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
352 class(ax_t), intent(in) :: Ax
353 type(mesh_t), intent(inout) :: msh
354 type(space_t), intent(inout) :: Xh
355 type(field_t), intent(inout) :: p, u, v, w
356 type(field_t), intent(inout) :: u_res, v_res, w_res
357 type(field_t), intent(in) :: f_x, f_y, f_z
358 type(coef_t), intent(inout) :: c_Xh
359 type(field_t), intent(in) :: mu
360 type(field_t), intent(in) :: rho
361 real(kind=rp), intent(in) :: bd
362 real(kind=rp), intent(in) :: dt
363 integer, intent(in) :: n
364 integer :: temp_indices(3)
365 type(field_t), pointer :: ta1, ta2, ta3
366 real(kind=rp) :: mu_val, rho_val
367
368 ! We assume the material properties are constant
369 mu_val = mu%x(1,1,1,1)
370 rho_val = rho%x(1,1,1,1)
371
372 call device_cfill(c_xh%h1_d, mu_val, n)
373 call device_cfill(c_xh%h2_d, rho_val * (bd / dt), n)
374 c_xh%ifh2 = .true.
375
376 call ax%compute_vector(u_res%x, v_res%x, w_res%x, &
377 u%x, v%x, w%x, c_xh, msh, xh)
378
379 call neko_scratch_registry%request_field(ta1, temp_indices(1), .false.)
380 call neko_scratch_registry%request_field(ta2, temp_indices(2), .false.)
381 call neko_scratch_registry%request_field(ta3, temp_indices(3), .false.)
382
383 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
384
385#ifdef HAVE_HIP
386 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
387 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
388#elif HAVE_CUDA
389 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
390 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
391#elif HAVE_OPENCL
392 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
393 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
394#endif
395
396 call neko_scratch_registry%relinquish_field(temp_indices)
397 end subroutine pnpn_vel_res_device_compute
398
399end module pnpn_res_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho, event)
subroutine pnpn_vel_res_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_prs_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.cu:77
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.cu:43
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Dirichlet condition in facet normal direction.
Abstract type to compute pressure residual.
Definition pnpn_res.f90:48
Abstract type to compute velocity residual.
Definition pnpn_res.f90:54
The function space for the SEM solution fields.
Definition space.f90:63