Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pnpn_res_stress_device.F90
Go to the documentation of this file.
1
3 use gather_scatter, only : gs_t, gs_op_add
4 use utils, only : neko_error
6 use field, only : field_t
7 use ax_product, only : ax_t
8 use coefs, only : coef_t
12 use mesh, only : mesh_t
13 use num_types, only : rp, c_rp
14 use space, only : space_t
15 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
19 implicit none
20 private
21
25 contains
26 procedure, nopass :: compute => pnpn_prs_res_stress_device_compute
28
32 contains
33 procedure, nopass :: compute => pnpn_vel_res_stress_device_compute
35
36#ifdef HAVE_HIP
37 interface
38 subroutine pnpn_prs_stress_res_part1_hip(ta1_d, ta2_d, ta3_d, &
39 wa1_d, wa2_d, wa3_d, s11_d, s22_d, s33_d, &
40 s12_d, s13_d, s23_d, f_u_d, f_v_d, f_w_d, &
41 B_d, h1_d, rho_d, n) &
42 bind(c, name = 'pnpn_prs_stress_res_part1_hip')
43 use, intrinsic :: iso_c_binding
44 import c_rp
45 implicit none
46 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
47 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
48 type(c_ptr), value :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
49 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
50 type(c_ptr), value :: B_d, h1_d, rho_d
51 integer(c_int) :: n
53 end interface
54
55 interface
56 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
57 bind(c, name = 'pnpn_prs_res_part2_hip')
58 use, intrinsic :: iso_c_binding
59 implicit none
60 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
61 integer(c_int) :: n
62 end subroutine pnpn_prs_res_part2_hip
63 end interface
64
65 interface
66 subroutine pnpn_prs_stress_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, &
67 wa1_d, wa2_d, wa3_d, dtbd, n) &
68 bind(c, name = 'pnpn_prs_stress_res_part3_hip')
69 use, intrinsic :: iso_c_binding
70 import c_rp
71 implicit none
72 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
73 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
74 real(c_rp) :: dtbd
75 integer(c_int) :: n
77 end interface
78
79 interface
80 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
81 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
82 bind(c, name = 'pnpn_vel_res_update_hip')
83 use, intrinsic :: iso_c_binding
84 implicit none
85 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
86 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
87 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
88 integer(c_int) :: n
89 end subroutine pnpn_vel_res_update_hip
90 end interface
91#elif HAVE_CUDA
92 interface
93 subroutine pnpn_prs_stress_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
94 wa1_d, wa2_d, wa3_d, s11_d, s22_d, s33_d, &
95 s12_d, s13_d, s23_d, f_u_d, f_v_d, f_w_d, &
96 B_d, h1_d, rho_d, n) &
97 bind(c, name = 'pnpn_prs_stress_res_part1_cuda')
98 use, intrinsic :: iso_c_binding
99 import c_rp
100 implicit none
101 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
102 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
103 type(c_ptr), value :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
104 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
105 type(c_ptr), value :: B_d, h1_d, rho_d
106 integer(c_int) :: n
107 end subroutine pnpn_prs_stress_res_part1_cuda
108 end interface
109
110 interface
111 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
112 bind(c, name = 'pnpn_prs_res_part2_cuda')
113 use, intrinsic :: iso_c_binding
114 implicit none
115 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
116 integer(c_int) :: n
117 end subroutine pnpn_prs_res_part2_cuda
118 end interface
119
120 interface
121 subroutine pnpn_prs_stress_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, &
122 wa1_d, wa2_d, wa3_d, dtbd, n) &
123 bind(c, name = 'pnpn_prs_stress_res_part3_cuda')
124 use, intrinsic :: iso_c_binding
125 import c_rp
126 implicit none
127 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
128 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
129 real(c_rp) :: dtbd
130 integer(c_int) :: n
131 end subroutine pnpn_prs_stress_res_part3_cuda
132 end interface
133
134 interface
135 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
136 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
137 bind(c, name = 'pnpn_vel_res_update_cuda')
138 use, intrinsic :: iso_c_binding
139 implicit none
140 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
141 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
142 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
143 integer(c_int) :: n
144 end subroutine pnpn_vel_res_update_cuda
145 end interface
146#elif HAVE_OPENCL
147 interface
148 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
149 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
150 B_d, h1_d, mu, rho, n) &
151 bind(c, name = 'pnpn_prs_res_part1_opencl')
152 use, intrinsic :: iso_c_binding
153 import c_rp
154 implicit none
155 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
156 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
157 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
158 type(c_ptr), value :: B_d, h1_d
159 real(c_rp) :: mu, rho
160 integer(c_int) :: n
161 end subroutine pnpn_prs_res_part1_opencl
162 end interface
163
164 interface
165 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
166 bind(c, name = 'pnpn_prs_res_part2_opencl')
167 use, intrinsic :: iso_c_binding
168 implicit none
169 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
170 integer(c_int) :: n
171 end subroutine pnpn_prs_res_part2_opencl
172 end interface
173
174 interface
175 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, &
176 wa1_d, wa2_d, wa3_d, dtbd, n) &
177 bind(c, name = 'pnpn_prs_res_part3_opencl')
178 use, intrinsic :: iso_c_binding
179 import c_rp
180 implicit none
181 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
182 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
183 real(c_rp) :: dtbd
184 integer(c_int) :: n
185 end subroutine pnpn_prs_res_part3_opencl
186 end interface
187
188 interface
189 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
190 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
191 bind(c, name = 'pnpn_vel_res_update_opencl')
192 use, intrinsic :: iso_c_binding
193 implicit none
194 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
195 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
196 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
197 integer(c_int) :: n
198 end subroutine pnpn_vel_res_update_opencl
199 end interface
200#endif
201
202contains
203
204 subroutine pnpn_prs_res_stress_device_compute(p, p_res, u, v, w, u_e, v_e,&
205 w_e, f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd,&
206 dt, mu, rho)
207 type(field_t), intent(inout) :: p, u, v, w
208 type(field_t), intent(in) :: u_e, v_e, w_e
209 type(field_t), intent(inout) :: p_res
210 type(field_t), intent(in) :: f_x, f_y, f_z
211 type(coef_t), intent(inout) :: c_Xh
212 type(gs_t), intent(inout) :: gs_Xh
213 type(facet_normal_t), intent(in) :: bc_prs_surface
214 type(facet_normal_t), intent(in) :: bc_sym_surface
215 class(ax_t), intent(inout) :: Ax
216 real(kind=rp), intent(in) :: bd
217 real(kind=rp), intent(in) :: dt
218 type(field_t), intent(in) :: mu
219 type(field_t), intent(in) :: rho
220 real(kind=rp) :: dtbd
221 integer :: n, nelv, lxyz, gdim
222 integer :: i, e
223 ! Work arrays
224 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2, work3
225 type(field_t), pointer :: s11, s22, s33, s12, s13, s23
226 integer :: temp_indices(15)
227
228 ! Work arrays
229 call neko_scratch_registry%request_field(ta1, temp_indices(1))
230 call neko_scratch_registry%request_field(ta2, temp_indices(2))
231 call neko_scratch_registry%request_field(ta3, temp_indices(3))
232 call neko_scratch_registry%request_field(wa1, temp_indices(4))
233 call neko_scratch_registry%request_field(wa2, temp_indices(5))
234 call neko_scratch_registry%request_field(wa3, temp_indices(6))
235 call neko_scratch_registry%request_field(work1, temp_indices(7))
236 call neko_scratch_registry%request_field(work2, temp_indices(8))
237 call neko_scratch_registry%request_field(work3, temp_indices(9))
238
239 ! Stress tensor
240 call neko_scratch_registry%request_field(s11, temp_indices(10))
241 call neko_scratch_registry%request_field(s22, temp_indices(11))
242 call neko_scratch_registry%request_field(s33, temp_indices(12))
243 call neko_scratch_registry%request_field(s12, temp_indices(13))
244 call neko_scratch_registry%request_field(s13, temp_indices(14))
245 call neko_scratch_registry%request_field(s23, temp_indices(15))
246
247 n = c_xh%dof%size()
248 lxyz = c_xh%Xh%lxyz
249 nelv = c_xh%msh%nelv
250 gdim = c_xh%msh%gdim
251
252 call device_copy(c_xh%h1_d, rho%x_d, n)
253 call device_invcol1(c_xh%h1_d, n)
254 call device_rzero(c_xh%h2_d, n)
255 c_xh%ifh2 = .false.
256
257 ! mu times the double curl of the velocity
258 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh)
259 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh)
260
261 call device_col2(wa1%x_d, mu%x_d, n)
262 call device_col2(wa2%x_d, mu%x_d, n)
263 call device_col2(wa3%x_d, mu%x_d, n)
264
265
266 ! The strain rate tensor
267 call strain_rate(s11%x, s22%x, s33%x, s12%x, s13%x, s23%x, &
268 u_e, v_e, w_e, c_xh)
269
270
271 ! Gradient of viscosity * 2
272 !call opgrad(ta1%x, ta2%x, ta3%x, mu%x, c_Xh)
273 call dudxyz(ta1%x, mu%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
274 call dudxyz(ta2%x, mu%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
275 call dudxyz(ta3%x, mu%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
276
277#ifdef HAVE_HIP
278 call pnpn_prs_stress_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
279 wa1%x_d, wa2%x_d, wa3%x_d, &
280 s11%x_d, s22%x_d, s33%x_d, s12%x_d, s13%x_d, s23%x_d, &
281 f_x%x_d, f_y%x_d, f_z%x_d, &
282 c_xh%B_d, c_xh%h1_d, rho%x_d, n)
283#elif HAVE_CUDA
284 call pnpn_prs_stress_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
285 wa1%x_d, wa2%x_d, wa3%x_d, &
286 s11%x_d, s22%x_d, s33%x_d, s12%x_d, s13%x_d, s23%x_d, &
287 f_x%x_d, f_y%x_d, f_z%x_d, &
288 c_xh%B_d, c_xh%h1_d, rho%x_d, n)
289#else
290 call neko_error('No device backend configured')
291#endif
292
293 call gs_xh%op(ta1, gs_op_add)
294 call gs_xh%op(ta2, gs_op_add)
295 call gs_xh%op(ta3, gs_op_add)
296
297 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
298
299 ! Compute the components of the divergence of the rhs
300 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
301 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
302 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
303
304 ! The laplacian of the pressure
305 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
306
307#ifdef HAVE_HIP
308 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
309#elif HAVE_CUDA
310 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
311#elif HAVE_OPENCL
312 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
313#endif
314
315 !
316 ! Surface velocity terms
317 !
318 call device_rzero(wa1%x_d, n)
319 call device_rzero(wa2%x_d, n)
320 call device_rzero(wa3%x_d, n)
321
322 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, &
323 ta1%x_d , ta2%x_d, ta3%x_d)
324
325 dtbd = bd / dt
326 call device_rzero(ta1%x_d, n)
327 call device_rzero(ta2%x_d, n)
328 call device_rzero(ta3%x_d, n)
329
330 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
331 u%x_D, v%x_d, w%x_d)
332
333#ifdef HAVE_HIP
334 call pnpn_prs_stress_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, &
335 wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
336#elif HAVE_CUDA
337 call pnpn_prs_stress_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, &
338 wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
339#else
340 call neko_error('No device backend configured')
341#endif
342
343 call neko_scratch_registry%relinquish_field(temp_indices)
344
346
347 subroutine pnpn_vel_res_stress_device_compute(Ax, u, v, w, u_res, v_res, &
348 w_res, p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
349 class(ax_t), intent(in) :: Ax
350 type(mesh_t), intent(inout) :: msh
351 type(space_t), intent(inout) :: Xh
352 type(field_t), intent(inout) :: p, u, v, w
353 type(field_t), intent(inout) :: u_res, v_res, w_res
354 type(field_t), intent(in) :: f_x, f_y, f_z
355 type(coef_t), intent(inout) :: c_Xh
356 type(field_t), intent(in) :: mu
357 type(field_t), intent(in) :: rho
358 real(kind=rp), intent(in) :: bd
359 real(kind=rp), intent(in) :: dt
360 real(kind=rp) :: bddt
361 integer :: temp_indices(3)
362 type(field_t), pointer :: ta1, ta2, ta3
363 integer, intent(in) :: n
364 integer :: i
365
366 call device_copy(c_xh%h1_d, mu%x_d, n)
367 call device_copy(c_xh%h2_d, rho%x_d, n)
368
369 bddt = bd / dt
370 call device_cmult(c_xh%h2_d, bddt, n)
371
372 c_xh%ifh2 = .true.
373
374 ! Viscous stresses
375 call ax%compute_vector(u_res%x, v_res%x, w_res%x, u%x, v%x, w%x, c_xh,&
376 msh, xh)
377
378 call neko_scratch_registry%request_field(ta1, temp_indices(1))
379 call neko_scratch_registry%request_field(ta2, temp_indices(2))
380 call neko_scratch_registry%request_field(ta3, temp_indices(3))
381
382 ! Pressure gradient
383 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
384
385 ! Sum all the terms
386#ifdef HAVE_HIP
387 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
388 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
389#elif HAVE_CUDA
390 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
391 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
392#elif HAVE_OPENCL
393 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
394 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
395#endif
396
397 call neko_scratch_registry%relinquish_field(temp_indices)
398
400
401end module pnpn_res_stress_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:76
Residuals in the Pn-Pn formulation (device version)
subroutine pnpn_vel_res_stress_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
subroutine pnpn_prs_res_stress_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_stress_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *s11, void *s22, void *s33, void *s12, void *s13, void *s23, void *f_u, void *f_v, void *f_w, void *B, void *h1, void *rho, int *n)
void pnpn_prs_stress_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, real *dtbd, int *n)
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Dirichlet condition in facet normal direction.
Device implementation of the pressure residual for the PnPn fluid with full viscous stress formulatio...
Device implementation of the velocity residual for the PnPn fluid with full viscous stress formulatio...
Abstract type to compute pressure residual.
Definition pnpn_res.f90:47
Abstract type to compute velocity residual.
Definition pnpn_res.f90:53
The function space for the SEM solution fields.
Definition space.f90:62