Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pnpn_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2022-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use gather_scatter, only : gs_t, gs_op_add
35 use operators
36 use field, only : field_t
37 use ax_product, only : ax_t
38 use coefs, only : coef_t
40 use mesh, only : mesh_t
41 use num_types, only : rp, c_rp
42 use space, only : space_t
43 use device_math
46 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
48 implicit none
49 private
50
51 type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
52 contains
53 procedure, nopass :: compute => pnpn_prs_res_device_compute
55
56 type, public, extends(pnpn_vel_res_t) :: pnpn_vel_res_device_t
57 contains
58 procedure, nopass :: compute => pnpn_vel_res_device_compute
60
61#ifdef HAVE_HIP
62 interface
63 subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
64 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
65 B_d, h1_d, mu, rho, n) &
66 bind(c, name = 'pnpn_prs_res_part1_hip')
67 use, intrinsic :: iso_c_binding
68 import c_rp
69 implicit none
70 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
71 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
72 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
73 type(c_ptr), value :: B_d, h1_d
74 real(c_rp) :: mu, rho
75 integer(c_int) :: n
76 end subroutine pnpn_prs_res_part1_hip
77 end interface
78
79 interface
80 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
81 bind(c, name = 'pnpn_prs_res_part2_hip')
82 use, intrinsic :: iso_c_binding
83 implicit none
84 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
85 integer(c_int) :: n
86 end subroutine pnpn_prs_res_part2_hip
87 end interface
88
89 interface
90 subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
91 bind(c, name = 'pnpn_prs_res_part3_hip')
92 use, intrinsic :: iso_c_binding
93 import c_rp
94 implicit none
95 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
96 real(c_rp) :: dtbd
97 integer(c_int) :: n
98 end subroutine pnpn_prs_res_part3_hip
99 end interface
100
101 interface
102 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
103 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
104 bind(c, name = 'pnpn_vel_res_update_hip')
105 use, intrinsic :: iso_c_binding
106 implicit none
107 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
108 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
109 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
110 integer(c_int) :: n
111 end subroutine pnpn_vel_res_update_hip
112 end interface
113#elif HAVE_CUDA
114 interface
115 subroutine pnpn_prs_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
116 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
117 B_d, h1_d, mu, rho, n) &
118 bind(c, name = 'pnpn_prs_res_part1_cuda')
119 use, intrinsic :: iso_c_binding
120 import c_rp
121 implicit none
122 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
123 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
124 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
125 type(c_ptr), value :: B_d, h1_d
126 real(c_rp) :: mu, rho
127 integer(c_int) :: n
128 end subroutine pnpn_prs_res_part1_cuda
129 end interface
130
131 interface
132 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
133 bind(c, name = 'pnpn_prs_res_part2_cuda')
134 use, intrinsic :: iso_c_binding
135 implicit none
136 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
137 integer(c_int) :: n
138 end subroutine pnpn_prs_res_part2_cuda
139 end interface
140
141 interface
142 subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
143 bind(c, name = 'pnpn_prs_res_part3_cuda')
144 use, intrinsic :: iso_c_binding
145 import c_rp
146 implicit none
147 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
148 real(c_rp) :: dtbd
149 integer(c_int) :: n
150 end subroutine pnpn_prs_res_part3_cuda
151 end interface
152
153 interface
154 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
155 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
156 bind(c, name = 'pnpn_vel_res_update_cuda')
157 use, intrinsic :: iso_c_binding
158 implicit none
159 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
160 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
161 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
162 integer(c_int) :: n
163 end subroutine pnpn_vel_res_update_cuda
164 end interface
165#elif HAVE_OPENCL
166 interface
167 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
168 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
169 B_d, h1_d, mu, rho, n) &
170 bind(c, name = 'pnpn_prs_res_part1_opencl')
171 use, intrinsic :: iso_c_binding
172 import c_rp
173 implicit none
174 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
175 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
176 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
177 type(c_ptr), value :: B_d, h1_d
178 real(c_rp) :: mu, rho
179 integer(c_int) :: n
180 end subroutine pnpn_prs_res_part1_opencl
181 end interface
182
183 interface
184 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
185 bind(c, name = 'pnpn_prs_res_part2_opencl')
186 use, intrinsic :: iso_c_binding
187 implicit none
188 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
189 integer(c_int) :: n
190 end subroutine pnpn_prs_res_part2_opencl
191 end interface
192
193 interface
194 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, &
195 n) bind(c, name = 'pnpn_prs_res_part3_opencl')
196 use, intrinsic :: iso_c_binding
197 import c_rp
198 implicit none
199 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
200 real(c_rp) :: dtbd
201 integer(c_int) :: n
202 end subroutine pnpn_prs_res_part3_opencl
203 end interface
204
205 interface
206 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
207 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
208 bind(c, name = 'pnpn_vel_res_update_opencl')
209 use, intrinsic :: iso_c_binding
210 implicit none
211 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
212 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
213 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
214 integer(c_int) :: n
215 end subroutine pnpn_vel_res_update_opencl
216 end interface
217#endif
218
219
220contains
221
222 subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
223 f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt,&
224 mu, rho)
225 type(field_t), intent(inout) :: p, u, v, w
226 type(field_t), intent(in) :: u_e, v_e, w_e
227 type(field_t), intent(inout) :: p_res
228 type(field_t), intent(in) :: f_x, f_y, f_z
229 type(coef_t), intent(inout) :: c_Xh
230 type(gs_t), intent(inout) :: gs_Xh
231 type(facet_normal_t), intent(in) :: bc_prs_surface
232 type(facet_normal_t), intent(in) :: bc_sym_surface
233 class(ax_t), intent(inout) :: Ax
234 real(kind=rp), intent(in) :: bd
235 real(kind=rp), intent(in) :: dt
236 type(field_t), intent(in) :: mu
237 type(field_t), intent(in) :: rho
238 real(kind=rp) :: dtbd
239 real(kind=rp) :: mu_val, rho_val
240 integer :: n, gdim
241 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2
242 integer :: temp_indices(8)
243
244
245 ! We assume the material properties are constant
246 mu_val = mu%x(1,1,1,1)
247 rho_val = rho%x(1,1,1,1)
248
249 call neko_scratch_registry%request_field(ta1, temp_indices(1))
250 call neko_scratch_registry%request_field(ta2, temp_indices(2))
251 call neko_scratch_registry%request_field(ta3, temp_indices(3))
252 call neko_scratch_registry%request_field(wa1, temp_indices(4))
253 call neko_scratch_registry%request_field(wa2, temp_indices(5))
254 call neko_scratch_registry%request_field(wa3, temp_indices(6))
255 call neko_scratch_registry%request_field(work1, temp_indices(7))
256 call neko_scratch_registry%request_field(work2, temp_indices(8))
257
258 n = u%dof%size()
259 gdim = c_xh%msh%gdim
260
261 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh)
262 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh)
263
264
265#ifdef HAVE_HIP
266 call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
267 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
268 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
269
270#elif HAVE_CUDA
271 call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
272 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
273 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
274#elif HAVE_OPENCL
275 call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
276 wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_z%x_d, f_z%x_d, &
277 c_xh%B_d, c_xh%h1_d, mu_val, rho_val, n)
278#endif
279 c_xh%ifh2 = .false.
280 call device_cfill(c_xh%h1_d, 1.0_rp / rho_val, n)
281
282 call gs_xh%op(ta1, gs_op_add)
283 call gs_xh%op(ta2, gs_op_add)
284 call gs_xh%op(ta3, gs_op_add)
285
286 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
287
288 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
289 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
290 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
291
292 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
293
294#ifdef HAVE_HIP
295 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
296#elif HAVE_CUDA
297 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
298#elif HAVE_OPENCL
299 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
300#endif
301
302 !
303 ! Surface velocity terms
304 call device_rzero(wa1%x_d, n)
305 call device_rzero(wa2%x_d, n)
306 call device_rzero(wa3%x_d, n)
307 dtbd = 1.0_rp
308
309 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, &
310 ta2%x_d, ta3%x_d)
311
312#ifdef HAVE_HIP
313 call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
314#elif HAVE_CUDA
315 call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
316#elif HAVE_OPENCL
317 call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, &
318 n)
319#endif
320 !
321 dtbd = bd / dt
322
323 call device_rzero(ta1%x_d, n)
324 call device_rzero(ta2%x_d, n)
325 call device_rzero(ta3%x_d, n)
326
327 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
328 u%x_d, v%x_d, w%x_d)
329
330#ifdef HAVE_HIP
331 call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
332#elif HAVE_CUDA
333 call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
334#elif HAVE_OPENCL
335 call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd,&
336 n)
337#endif
338
339 call neko_scratch_registry%relinquish_field(temp_indices)
340
341 end subroutine pnpn_prs_res_device_compute
342
343 subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
344 p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
345 class(ax_t), intent(in) :: Ax
346 type(mesh_t), intent(inout) :: msh
347 type(space_t), intent(inout) :: Xh
348 type(field_t), intent(inout) :: p, u, v, w
349 type(field_t), intent(inout) :: u_res, v_res, w_res
350 type(field_t), intent(in) :: f_x, f_y, f_z
351 type(coef_t), intent(inout) :: c_Xh
352 type(field_t), intent(in) :: mu
353 type(field_t), intent(in) :: rho
354 real(kind=rp), intent(in) :: bd
355 real(kind=rp), intent(in) :: dt
356 integer, intent(in) :: n
357 integer :: temp_indices(3)
358 type(field_t), pointer :: ta1, ta2, ta3
359 real(kind=rp) :: mu_val, rho_val
360
361 ! We assume the material properties are constant
362 mu_val = mu%x(1,1,1,1)
363 rho_val = rho%x(1,1,1,1)
364
365 call device_cfill(c_xh%h1_d, mu_val, n)
366 call device_cfill(c_xh%h2_d, rho_val * (bd / dt), n)
367 c_xh%ifh2 = .true.
368
369 call ax%compute_vector(u_res%x, v_res%x, w_res%x, &
370 u%x, v%x, w%x, c_xh, msh, xh)
371
372 call neko_scratch_registry%request_field(ta1, temp_indices(1))
373 call neko_scratch_registry%request_field(ta2, temp_indices(2))
374 call neko_scratch_registry%request_field(ta3, temp_indices(3))
375
376 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
377
378#ifdef HAVE_HIP
379 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
380 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
381#elif HAVE_CUDA
382 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
383 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
384#elif HAVE_OPENCL
385 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
386 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
387#endif
388
389 call neko_scratch_registry%relinquish_field(temp_indices)
390 end subroutine pnpn_vel_res_device_compute
391
392end module pnpn_res_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho)
subroutine pnpn_vel_res_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_prs_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.cu:77
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.cu:43
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Dirichlet condition in facet normal direction.
Abstract type to compute pressure residual.
Definition pnpn_res.f90:47
Abstract type to compute velocity residual.
Definition pnpn_res.f90:53
The function space for the SEM solution fields.
Definition space.f90:62