Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
pnpn_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2022-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use gather_scatter, only : gs_t, gs_op_add
35 use operators
36 use field, only : field_t
37 use ax_product, only : ax_t
38 use coefs, only : coef_t
40 use mesh, only : mesh_t
41 use num_types, only : rp, c_rp
42 use space, only : space_t
43 use device_math
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
49 implicit none
50 private
51
52 type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
53 contains
54 procedure, nopass :: compute => pnpn_prs_res_device_compute
56
57 type, public, extends(pnpn_vel_res_t) :: pnpn_vel_res_device_t
58 contains
59 procedure, nopass :: compute => pnpn_vel_res_device_compute
61
62#ifdef HAVE_HIP
63 interface
64 subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
65 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
66 B_d, h1_d, mu, rho, n) &
67 bind(c, name = 'pnpn_prs_res_part1_hip')
68 use, intrinsic :: iso_c_binding
69 import c_rp
70 implicit none
71 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
72 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
73 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
74 type(c_ptr), value :: B_d, h1_d
75 real(c_rp) :: mu, rho
76 integer(c_int) :: n
77 end subroutine pnpn_prs_res_part1_hip
78 end interface
79
80 interface
81 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
82 bind(c, name = 'pnpn_prs_res_part2_hip')
83 use, intrinsic :: iso_c_binding
84 implicit none
85 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
86 integer(c_int) :: n
87 end subroutine pnpn_prs_res_part2_hip
88 end interface
89
90 interface
91 subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
92 bind(c, name = 'pnpn_prs_res_part3_hip')
93 use, intrinsic :: iso_c_binding
94 import c_rp
95 implicit none
96 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
97 real(c_rp) :: dtbd
98 integer(c_int) :: n
99 end subroutine pnpn_prs_res_part3_hip
100 end interface
101
102 interface
103 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
104 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
105 bind(c, name = 'pnpn_vel_res_update_hip')
106 use, intrinsic :: iso_c_binding
107 implicit none
108 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
109 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
110 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
111 integer(c_int) :: n
112 end subroutine pnpn_vel_res_update_hip
113 end interface
114#elif HAVE_CUDA
115 interface
116 subroutine pnpn_prs_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
117 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
118 B_d, h1_d, mu, rho, n) &
119 bind(c, name = 'pnpn_prs_res_part1_cuda')
120 use, intrinsic :: iso_c_binding
121 import c_rp
122 implicit none
123 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
124 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
125 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
126 type(c_ptr), value :: B_d, h1_d
127 real(c_rp) :: mu, rho
128 integer(c_int) :: n
129 end subroutine pnpn_prs_res_part1_cuda
130 end interface
131
132 interface
133 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
134 bind(c, name = 'pnpn_prs_res_part2_cuda')
135 use, intrinsic :: iso_c_binding
136 implicit none
137 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
138 integer(c_int) :: n
139 end subroutine pnpn_prs_res_part2_cuda
140 end interface
141
142 interface
143 subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
144 bind(c, name = 'pnpn_prs_res_part3_cuda')
145 use, intrinsic :: iso_c_binding
146 import c_rp
147 implicit none
148 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
149 real(c_rp) :: dtbd
150 integer(c_int) :: n
151 end subroutine pnpn_prs_res_part3_cuda
152 end interface
153
154 interface
155 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
156 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
157 bind(c, name = 'pnpn_vel_res_update_cuda')
158 use, intrinsic :: iso_c_binding
159 implicit none
160 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
161 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
162 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
163 integer(c_int) :: n
164 end subroutine pnpn_vel_res_update_cuda
165 end interface
166#elif HAVE_OPENCL
167 interface
168 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
169 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
170 B_d, h1_d, mu, rho, n) &
171 bind(c, name = 'pnpn_prs_res_part1_opencl')
172 use, intrinsic :: iso_c_binding
173 import c_rp
174 implicit none
175 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
176 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
177 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
178 type(c_ptr), value :: B_d, h1_d
179 real(c_rp) :: mu, rho
180 integer(c_int) :: n
181 end subroutine pnpn_prs_res_part1_opencl
182 end interface
183
184 interface
185 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
186 bind(c, name = 'pnpn_prs_res_part2_opencl')
187 use, intrinsic :: iso_c_binding
188 implicit none
189 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
190 integer(c_int) :: n
191 end subroutine pnpn_prs_res_part2_opencl
192 end interface
193
194 interface
195 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, &
196 n) bind(c, name = 'pnpn_prs_res_part3_opencl')
197 use, intrinsic :: iso_c_binding
198 import c_rp
199 implicit none
200 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
201 real(c_rp) :: dtbd
202 integer(c_int) :: n
203 end subroutine pnpn_prs_res_part3_opencl
204 end interface
205
206 interface
207 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
208 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
209 bind(c, name = 'pnpn_vel_res_update_opencl')
210 use, intrinsic :: iso_c_binding
211 implicit none
212 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
213 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
214 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
215 integer(c_int) :: n
216 end subroutine pnpn_vel_res_update_opencl
217 end interface
218#endif
219
220
221contains
222
223 subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
224 f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt,&
225 mu, rho, event)
226 type(field_t), intent(inout) :: p, u, v, w
227 type(field_t), intent(in) :: u_e, v_e, w_e
228 type(field_t), intent(inout) :: p_res
229 type(field_t), intent(in) :: f_x, f_y, f_z
230 type(coef_t), intent(inout) :: c_Xh
231 type(gs_t), intent(inout) :: gs_Xh
232 type(facet_normal_t), intent(in) :: bc_prs_surface
233 type(facet_normal_t), intent(in) :: bc_sym_surface
234 class(ax_t), intent(inout) :: Ax
235 real(kind=rp), intent(in) :: bd
236 real(kind=rp), intent(in) :: dt
237 type(field_t), intent(in) :: mu
238 type(field_t), intent(in) :: rho
239 type(c_ptr), intent(inout) :: event
240 real(kind=rp) :: dtbd
241 real(kind=rp) :: mu_val, rho_val
242 integer :: n, gdim
243 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2
244 integer :: temp_indices(8)
245
246
247 ! We assume the material properties are constant
248 mu_val = mu%x(1,1,1,1)
249 rho_val = rho%x(1,1,1,1)
250
251 call neko_scratch_registry%request_field(ta1, temp_indices(1))
252 call neko_scratch_registry%request_field(ta2, temp_indices(2))
253 call neko_scratch_registry%request_field(ta3, temp_indices(3))
254 call neko_scratch_registry%request_field(wa1, temp_indices(4))
255 call neko_scratch_registry%request_field(wa2, temp_indices(5))
256 call neko_scratch_registry%request_field(wa3, temp_indices(6))
257 call neko_scratch_registry%request_field(work1, temp_indices(7))
258 call neko_scratch_registry%request_field(work2, temp_indices(8))
259
260 n = u%dof%size()
261 gdim = c_xh%msh%gdim
262
263 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh, event)
264 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh, event)
265
266
267#ifdef HAVE_HIP
268 call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
269 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
270 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
271
272#elif HAVE_CUDA
273 call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
274 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
275 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
276#elif HAVE_OPENCL
277 call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
278 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_z%x_d, f_z%x_d, &
279 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
280#endif
281 c_xh%ifh2 = .false.
282 call device_cfill(c_xh%h1_d, 1.0_rp / rho_val, n)
283
284 call gs_xh%op(ta1, gs_op_add, event)
285 call gs_xh%op(ta2, gs_op_add, event)
286 call gs_xh%op(ta3, gs_op_add, event)
288
289 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
290
291 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
292 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
293 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
294
295 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
296
297#ifdef HAVE_HIP
298 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
299#elif HAVE_CUDA
300 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
301#elif HAVE_OPENCL
302 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
303#endif
304
305 !
306 ! Surface velocity terms
307 call device_rzero(wa1%x_d, n)
308 call device_rzero(wa2%x_d, n)
309 call device_rzero(wa3%x_d, n)
310 dtbd = 1.0_rp
311
312 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, &
313 ta2%x_d, ta3%x_d)
314
315#ifdef HAVE_HIP
316 call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
317#elif HAVE_CUDA
318 call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
319#elif HAVE_OPENCL
320 call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, &
321 n)
322#endif
323 !
324 dtbd = bd / dt
325
326 call device_rzero(ta1%x_d, n)
327 call device_rzero(ta2%x_d, n)
328 call device_rzero(ta3%x_d, n)
329
330 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
331 u%x_d, v%x_d, w%x_d)
332
333#ifdef HAVE_HIP
334 call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
335#elif HAVE_CUDA
336 call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
337#elif HAVE_OPENCL
338 call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd,&
339 n)
340#endif
341
342 call neko_scratch_registry%relinquish_field(temp_indices)
343
344 end subroutine pnpn_prs_res_device_compute
345
346 subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
347 p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
348 class(ax_t), intent(in) :: Ax
349 type(mesh_t), intent(inout) :: msh
350 type(space_t), intent(inout) :: Xh
351 type(field_t), intent(inout) :: p, u, v, w
352 type(field_t), intent(inout) :: u_res, v_res, w_res
353 type(field_t), intent(in) :: f_x, f_y, f_z
354 type(coef_t), intent(inout) :: c_Xh
355 type(field_t), intent(in) :: mu
356 type(field_t), intent(in) :: rho
357 real(kind=rp), intent(in) :: bd
358 real(kind=rp), intent(in) :: dt
359 integer, intent(in) :: n
360 integer :: temp_indices(3)
361 type(field_t), pointer :: ta1, ta2, ta3
362 real(kind=rp) :: mu_val, rho_val
363
364 ! We assume the material properties are constant
365 mu_val = mu%x(1,1,1,1)
366 rho_val = rho%x(1,1,1,1)
367
368 call device_cfill(c_xh%h1_d, mu_val, n)
369 call device_cfill(c_xh%h2_d, rho_val * (bd / dt), n)
370 c_xh%ifh2 = .true.
371
372 call ax%compute_vector(u_res%x, v_res%x, w_res%x, &
373 u%x, v%x, w%x, c_xh, msh, xh)
374
375 call neko_scratch_registry%request_field(ta1, temp_indices(1))
376 call neko_scratch_registry%request_field(ta2, temp_indices(2))
377 call neko_scratch_registry%request_field(ta3, temp_indices(3))
378
379 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
380
381#ifdef HAVE_HIP
382 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
383 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
384#elif HAVE_CUDA
385 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
386 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
387#elif HAVE_OPENCL
388 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
389 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
390#endif
391
392 call neko_scratch_registry%relinquish_field(temp_indices)
393 end subroutine pnpn_vel_res_device_compute
394
395end module pnpn_res_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1244
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1138
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:50
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho, event)
subroutine pnpn_vel_res_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_prs_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.cu:77
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.cu:43
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Dirichlet condition in facet normal direction.
Abstract type to compute pressure residual.
Definition pnpn_res.f90:48
Abstract type to compute velocity residual.
Definition pnpn_res.f90:54
The function space for the SEM solution fields.
Definition space.f90:62