Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pnpn_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2022-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use gather_scatter, only : gs_t, gs_op_add
35 use operators
36 use field, only : field_t
37 use ax_product, only : ax_t
38 use coefs, only : coef_t
40 use mesh, only : mesh_t
41 use num_types, only : rp, c_rp
42 use space, only : space_t
43 use device_math
46 use device, only : device_event_sync
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
49 implicit none
50 private
51
52 type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
53 contains
54 procedure, nopass :: compute => pnpn_prs_res_device_compute
56
57 type, public, extends(pnpn_vel_res_t) :: pnpn_vel_res_device_t
58 contains
59 procedure, nopass :: compute => pnpn_vel_res_device_compute
61
62#ifdef HAVE_HIP
63 interface
64 subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
65 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
66 B_d, h1_d, mu, rho, n) &
67 bind(c, name = 'pnpn_prs_res_part1_hip')
68 use, intrinsic :: iso_c_binding
69 import c_rp
70 implicit none
71 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
72 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
73 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
74 type(c_ptr), value :: B_d, h1_d
75 real(c_rp) :: mu, rho
76 integer(c_int) :: n
77 end subroutine pnpn_prs_res_part1_hip
78 end interface
79
80 interface
81 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
82 bind(c, name = 'pnpn_prs_res_part2_hip')
83 use, intrinsic :: iso_c_binding
84 implicit none
85 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
86 integer(c_int) :: n
87 end subroutine pnpn_prs_res_part2_hip
88 end interface
89
90 interface
91 subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
92 bind(c, name = 'pnpn_prs_res_part3_hip')
93 use, intrinsic :: iso_c_binding
94 import c_rp
95 implicit none
96 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
97 real(c_rp) :: dtbd
98 integer(c_int) :: n
99 end subroutine pnpn_prs_res_part3_hip
100 end interface
101
102 interface
103 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
104 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
105 bind(c, name = 'pnpn_vel_res_update_hip')
106 use, intrinsic :: iso_c_binding
107 implicit none
108 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
109 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
110 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
111 integer(c_int) :: n
112 end subroutine pnpn_vel_res_update_hip
113 end interface
114#elif HAVE_CUDA
115 interface
116 subroutine pnpn_prs_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
117 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
118 B_d, h1_d, mu, rho, n) &
119 bind(c, name = 'pnpn_prs_res_part1_cuda')
120 use, intrinsic :: iso_c_binding
121 import c_rp
122 implicit none
123 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
124 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
125 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
126 type(c_ptr), value :: B_d, h1_d
127 real(c_rp) :: mu, rho
128 integer(c_int) :: n
129 end subroutine pnpn_prs_res_part1_cuda
130 end interface
131
132 interface
133 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
134 bind(c, name = 'pnpn_prs_res_part2_cuda')
135 use, intrinsic :: iso_c_binding
136 implicit none
137 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
138 integer(c_int) :: n
139 end subroutine pnpn_prs_res_part2_cuda
140 end interface
141
142 interface
143 subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
144 bind(c, name = 'pnpn_prs_res_part3_cuda')
145 use, intrinsic :: iso_c_binding
146 import c_rp
147 implicit none
148 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
149 real(c_rp) :: dtbd
150 integer(c_int) :: n
151 end subroutine pnpn_prs_res_part3_cuda
152 end interface
153
154 interface
155 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
156 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
157 bind(c, name = 'pnpn_vel_res_update_cuda')
158 use, intrinsic :: iso_c_binding
159 implicit none
160 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
161 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
162 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
163 integer(c_int) :: n
164 end subroutine pnpn_vel_res_update_cuda
165 end interface
166#elif HAVE_OPENCL
167 interface
168 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
169 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
170 B_d, h1_d, mu, rho, n) &
171 bind(c, name = 'pnpn_prs_res_part1_opencl')
172 use, intrinsic :: iso_c_binding
173 import c_rp
174 implicit none
175 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
176 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
177 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
178 type(c_ptr), value :: B_d, h1_d
179 real(c_rp) :: mu, rho
180 integer(c_int) :: n
181 end subroutine pnpn_prs_res_part1_opencl
182 end interface
183
184 interface
185 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
186 bind(c, name = 'pnpn_prs_res_part2_opencl')
187 use, intrinsic :: iso_c_binding
188 implicit none
189 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
190 integer(c_int) :: n
191 end subroutine pnpn_prs_res_part2_opencl
192 end interface
193
194 interface
195 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, &
196 n) bind(c, name = 'pnpn_prs_res_part3_opencl')
197 use, intrinsic :: iso_c_binding
198 import c_rp
199 implicit none
200 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
201 real(c_rp) :: dtbd
202 integer(c_int) :: n
203 end subroutine pnpn_prs_res_part3_opencl
204 end interface
205
206 interface
207 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
208 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
209 bind(c, name = 'pnpn_vel_res_update_opencl')
210 use, intrinsic :: iso_c_binding
211 implicit none
212 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
213 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
214 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
215 integer(c_int) :: n
216 end subroutine pnpn_vel_res_update_opencl
217 end interface
218#endif
219
220
221contains
222
223 subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
224 f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt,&
225 mu, rho, event)
226 type(field_t), intent(inout) :: p, u, v, w
227 type(field_t), intent(in) :: u_e, v_e, w_e
228 type(field_t), intent(inout) :: p_res
229 type(field_t), intent(in) :: f_x, f_y, f_z
230 type(coef_t), intent(inout) :: c_Xh
231 type(gs_t), intent(inout) :: gs_Xh
232 type(facet_normal_t), intent(in) :: bc_prs_surface
233 type(facet_normal_t), intent(in) :: bc_sym_surface
234 class(ax_t), intent(inout) :: Ax
235 real(kind=rp), intent(in) :: bd
236 real(kind=rp), intent(in) :: dt
237 type(field_t), intent(in) :: mu
238 type(field_t), intent(in) :: rho
239 type(c_ptr), intent(inout) :: event
240 real(kind=rp) :: dtbd
241 real(kind=rp) :: mu_val, rho_val
242 integer :: n, gdim
243 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2
244 integer :: temp_indices(8)
245
246
247 ! We assume the material properties are constant
248 mu_val = mu%x(1,1,1,1)
249 rho_val = rho%x(1,1,1,1)
250
251 call neko_scratch_registry%request_field(ta1, temp_indices(1))
252 call neko_scratch_registry%request_field(ta2, temp_indices(2))
253 call neko_scratch_registry%request_field(ta3, temp_indices(3))
254 call neko_scratch_registry%request_field(wa1, temp_indices(4))
255 call neko_scratch_registry%request_field(wa2, temp_indices(5))
256 call neko_scratch_registry%request_field(wa3, temp_indices(6))
257 call neko_scratch_registry%request_field(work1, temp_indices(7))
258 call neko_scratch_registry%request_field(work2, temp_indices(8))
259
260 n = u%dof%size()
261 gdim = c_xh%msh%gdim
262
263 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh, event)
264 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh, event)
265
266
267#ifdef HAVE_HIP
268 call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
269 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
270 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
271
272#elif HAVE_CUDA
273 call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
274 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
275 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
276#elif HAVE_OPENCL
277 call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
278 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
279 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
280#endif
281 c_xh%ifh2 = .false.
282 call device_cfill(c_xh%h1_d, 1.0_rp / rho_val, n)
283
284 call gs_xh%op(ta1, gs_op_add, event)
285 call device_event_sync(event)
286 call gs_xh%op(ta2, gs_op_add, event)
287 call device_event_sync(event)
288 call gs_xh%op(ta3, gs_op_add, event)
289 call device_event_sync(event)
290
291 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
292
293 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
294 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
295 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
296
297 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
298
299#ifdef HAVE_HIP
300 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
301#elif HAVE_CUDA
302 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
303#elif HAVE_OPENCL
304 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
305#endif
306
307 !
308 ! Surface velocity terms
309 call device_rzero(wa1%x_d, n)
310 call device_rzero(wa2%x_d, n)
311 call device_rzero(wa3%x_d, n)
312 dtbd = 1.0_rp
313
314 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, &
315 ta2%x_d, ta3%x_d)
316
317#ifdef HAVE_HIP
318 call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
319#elif HAVE_CUDA
320 call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
321#elif HAVE_OPENCL
322 call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, &
323 n)
324#endif
325 !
326 dtbd = bd / dt
327
328 call device_rzero(ta1%x_d, n)
329 call device_rzero(ta2%x_d, n)
330 call device_rzero(ta3%x_d, n)
331
332 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
333 u%x_d, v%x_d, w%x_d)
334
335#ifdef HAVE_HIP
336 call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
337#elif HAVE_CUDA
338 call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
339#elif HAVE_OPENCL
340 call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd,&
341 n)
342#endif
343
344 call neko_scratch_registry%relinquish_field(temp_indices)
345
346 end subroutine pnpn_prs_res_device_compute
347
348 subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
349 p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
350 class(ax_t), intent(in) :: Ax
351 type(mesh_t), intent(inout) :: msh
352 type(space_t), intent(inout) :: Xh
353 type(field_t), intent(inout) :: p, u, v, w
354 type(field_t), intent(inout) :: u_res, v_res, w_res
355 type(field_t), intent(in) :: f_x, f_y, f_z
356 type(coef_t), intent(inout) :: c_Xh
357 type(field_t), intent(in) :: mu
358 type(field_t), intent(in) :: rho
359 real(kind=rp), intent(in) :: bd
360 real(kind=rp), intent(in) :: dt
361 integer, intent(in) :: n
362 integer :: temp_indices(3)
363 type(field_t), pointer :: ta1, ta2, ta3
364 real(kind=rp) :: mu_val, rho_val
365
366 ! We assume the material properties are constant
367 mu_val = mu%x(1,1,1,1)
368 rho_val = rho%x(1,1,1,1)
369
370 call device_cfill(c_xh%h1_d, mu_val, n)
371 call device_cfill(c_xh%h2_d, rho_val * (bd / dt), n)
372 c_xh%ifh2 = .true.
373
374 call ax%compute_vector(u_res%x, v_res%x, w_res%x, &
375 u%x, v%x, w%x, c_xh, msh, xh)
376
377 call neko_scratch_registry%request_field(ta1, temp_indices(1))
378 call neko_scratch_registry%request_field(ta2, temp_indices(2))
379 call neko_scratch_registry%request_field(ta3, temp_indices(3))
380
381 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
382
383#ifdef HAVE_HIP
384 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
385 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
386#elif HAVE_CUDA
387 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
388 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
389#elif HAVE_OPENCL
390 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
391 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
392#endif
393
394 call neko_scratch_registry%relinquish_field(temp_indices)
395 end subroutine pnpn_vel_res_device_compute
396
397end module pnpn_res_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1309
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho, event)
subroutine pnpn_vel_res_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_prs_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.cu:77
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.cu:43
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Dirichlet condition in facet normal direction.
Abstract type to compute pressure residual.
Definition pnpn_res.f90:48
Abstract type to compute velocity residual.
Definition pnpn_res.f90:54
The function space for the SEM solution fields.
Definition space.f90:62