Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pnpn_res_stress_device.F90
Go to the documentation of this file.
1
3 use gather_scatter, only : gs_t, gs_op_add
4 use utils, only : neko_error
6 use field, only : field_t
7 use ax_product, only : ax_t
8 use coefs, only : coef_t
12 use mesh, only : mesh_t
13 use num_types, only : rp, c_rp
14 use space, only : space_t
15 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
19 implicit none
20 private
21
25 contains
26 procedure, nopass :: compute => pnpn_prs_res_stress_device_compute
28
32 contains
33 procedure, nopass :: compute => pnpn_vel_res_stress_device_compute
35
36#ifdef HAVE_HIP
37 interface
38 subroutine pnpn_prs_stress_res_part1_hip(ta1_d, ta2_d, ta3_d, &
39 wa1_d, wa2_d, wa3_d, s11_d, s22_d, s33_d, &
40 s12_d, s13_d, s23_d, f_u_d, f_v_d, f_w_d, &
41 B_d, h1_d, rho_d, n) &
42 bind(c, name = 'pnpn_prs_stress_res_part1_hip')
43 use, intrinsic :: iso_c_binding
44 import c_rp
45 implicit none
46 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
47 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
48 type(c_ptr), value :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
49 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
50 type(c_ptr), value :: B_d, h1_d, rho_d
51 integer(c_int) :: n
53 end interface
54
55 interface
56 subroutine pnpn_prs_res_part2_hip(p_res_d, wa1_d, wa2_d, wa3_d, n) &
57 bind(c, name = 'pnpn_prs_res_part2_hip')
58 use, intrinsic :: iso_c_binding
59 implicit none
60 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
61 integer(c_int) :: n
62 end subroutine pnpn_prs_res_part2_hip
63 end interface
64
65 interface
66 subroutine pnpn_prs_stress_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, &
67 wa1_d, wa2_d, wa3_d, dtbd, n) &
68 bind(c, name = 'pnpn_prs_stress_res_part3_hip')
69 use, intrinsic :: iso_c_binding
70 import c_rp
71 implicit none
72 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
73 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
74 real(c_rp) :: dtbd
75 integer(c_int) :: n
77 end interface
78
79 interface
80 subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
81 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
82 bind(c, name = 'pnpn_vel_res_update_hip')
83 use, intrinsic :: iso_c_binding
84 implicit none
85 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
86 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
87 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
88 integer(c_int) :: n
89 end subroutine pnpn_vel_res_update_hip
90 end interface
91#elif HAVE_CUDA
92 interface
93 subroutine pnpn_prs_stress_res_part1_cuda(ta1_d, ta2_d, ta3_d, &
94 wa1_d, wa2_d, wa3_d, s11_d, s22_d, s33_d, &
95 s12_d, s13_d, s23_d, f_u_d, f_v_d, f_w_d, &
96 B_d, h1_d, rho_d, n) &
97 bind(c, name = 'pnpn_prs_stress_res_part1_cuda')
98 use, intrinsic :: iso_c_binding
99 import c_rp
100 implicit none
101 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
102 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
103 type(c_ptr), value :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
104 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
105 type(c_ptr), value :: B_d, h1_d, rho_d
106 integer(c_int) :: n
107 end subroutine pnpn_prs_stress_res_part1_cuda
108 end interface
109
110 interface
111 subroutine pnpn_prs_res_part2_cuda(p_res_d, wa1_d, wa2_d, wa3_d, n) &
112 bind(c, name = 'pnpn_prs_res_part2_cuda')
113 use, intrinsic :: iso_c_binding
114 implicit none
115 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
116 integer(c_int) :: n
117 end subroutine pnpn_prs_res_part2_cuda
118 end interface
119
120 interface
121 subroutine pnpn_prs_stress_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, &
122 wa1_d, wa2_d, wa3_d, dtbd, n) &
123 bind(c, name = 'pnpn_prs_stress_res_part3_cuda')
124 use, intrinsic :: iso_c_binding
125 import c_rp
126 implicit none
127 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
128 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
129 real(c_rp) :: dtbd
130 integer(c_int) :: n
131 end subroutine pnpn_prs_stress_res_part3_cuda
132 end interface
133
134 interface
135 subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
136 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
137 bind(c, name = 'pnpn_vel_res_update_cuda')
138 use, intrinsic :: iso_c_binding
139 implicit none
140 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
141 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
142 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
143 integer(c_int) :: n
144 end subroutine pnpn_vel_res_update_cuda
145 end interface
146#elif HAVE_OPENCL
147 interface
148 subroutine pnpn_prs_res_part1_opencl(ta1_d, ta2_d, ta3_d, &
149 wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
150 B_d, h1_d, mu, rho, n) &
151 bind(c, name = 'pnpn_prs_res_part1_opencl')
152 use, intrinsic :: iso_c_binding
153 import c_rp
154 implicit none
155 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
156 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
157 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
158 type(c_ptr), value :: B_d, h1_d
159 real(c_rp) :: mu, rho
160 integer(c_int) :: n
161 end subroutine pnpn_prs_res_part1_opencl
162 end interface
163
164 interface
165 subroutine pnpn_prs_res_part2_opencl(p_res_d, wa1_d, wa2_d, wa3_d, n) &
166 bind(c, name = 'pnpn_prs_res_part2_opencl')
167 use, intrinsic :: iso_c_binding
168 implicit none
169 type(c_ptr), value :: p_res_d, wa1_d, wa2_d, wa3_d
170 integer(c_int) :: n
171 end subroutine pnpn_prs_res_part2_opencl
172 end interface
173
174 interface
175 subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, &
176 wa1_d, wa2_d, wa3_d, dtbd, n) &
177 bind(c, name = 'pnpn_prs_res_part3_opencl')
178 use, intrinsic :: iso_c_binding
179 import c_rp
180 implicit none
181 type(c_ptr), value :: p_res_d, ta1_d, ta2_d, ta3_d
182 type(c_ptr), value :: wa1_d, wa2_d, wa3_d
183 real(c_rp) :: dtbd
184 integer(c_int) :: n
185 end subroutine pnpn_prs_res_part3_opencl
186 end interface
187
188 interface
189 subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
190 ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
191 bind(c, name = 'pnpn_vel_res_update_opencl')
192 use, intrinsic :: iso_c_binding
193 implicit none
194 type(c_ptr), value :: u_res_d, v_res_d, w_res_d
195 type(c_ptr), value :: ta1_d, ta2_d, ta3_d
196 type(c_ptr), value :: f_u_d, f_v_d, f_w_d
197 integer(c_int) :: n
198 end subroutine pnpn_vel_res_update_opencl
199 end interface
200#endif
201
202contains
203
204 subroutine pnpn_prs_res_stress_device_compute(p, p_res, u, v, w, u_e, v_e,&
205 w_e, f_x, f_y, f_z, c_Xh, gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd,&
206 dt, mu, rho, event)
207 type(field_t), intent(inout) :: p, u, v, w
208 type(field_t), intent(in) :: u_e, v_e, w_e
209 type(field_t), intent(inout) :: p_res
210 type(field_t), intent(in) :: f_x, f_y, f_z
211 type(coef_t), intent(inout) :: c_Xh
212 type(gs_t), intent(inout) :: gs_Xh
213 type(facet_normal_t), intent(in) :: bc_prs_surface
214 type(facet_normal_t), intent(in) :: bc_sym_surface
215 class(ax_t), intent(inout) :: Ax
216 real(kind=rp), intent(in) :: bd
217 real(kind=rp), intent(in) :: dt
218 type(field_t), intent(in) :: mu
219 type(field_t), intent(in) :: rho
220 type(c_ptr), intent(inout) :: event
221 real(kind=rp) :: dtbd
222 integer :: n, nelv, lxyz, gdim
223 integer :: i, e
224 ! Work arrays
225 type(field_t), pointer :: ta1, ta2, ta3, wa1, wa2, wa3, work1, work2, work3
226 type(field_t), pointer :: s11, s22, s33, s12, s13, s23
227 integer :: temp_indices(15)
228
229 ! Work arrays
230 call neko_scratch_registry%request_field(ta1, temp_indices(1))
231 call neko_scratch_registry%request_field(ta2, temp_indices(2))
232 call neko_scratch_registry%request_field(ta3, temp_indices(3))
233 call neko_scratch_registry%request_field(wa1, temp_indices(4))
234 call neko_scratch_registry%request_field(wa2, temp_indices(5))
235 call neko_scratch_registry%request_field(wa3, temp_indices(6))
236 call neko_scratch_registry%request_field(work1, temp_indices(7))
237 call neko_scratch_registry%request_field(work2, temp_indices(8))
238 call neko_scratch_registry%request_field(work3, temp_indices(9))
239
240 ! Stress tensor
241 call neko_scratch_registry%request_field(s11, temp_indices(10))
242 call neko_scratch_registry%request_field(s22, temp_indices(11))
243 call neko_scratch_registry%request_field(s33, temp_indices(12))
244 call neko_scratch_registry%request_field(s12, temp_indices(13))
245 call neko_scratch_registry%request_field(s13, temp_indices(14))
246 call neko_scratch_registry%request_field(s23, temp_indices(15))
247
248 n = c_xh%dof%size()
249 lxyz = c_xh%Xh%lxyz
250 nelv = c_xh%msh%nelv
251 gdim = c_xh%msh%gdim
252
253 call device_copy(c_xh%h1_d, rho%x_d, n)
254 call device_invcol1(c_xh%h1_d, n)
255 call device_rzero(c_xh%h2_d, n)
256 c_xh%ifh2 = .false.
257
258 ! mu times the double curl of the velocity
259 call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_xh, event)
260 call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_xh, event)
261
262 call device_col2(wa1%x_d, mu%x_d, n)
263 call device_col2(wa2%x_d, mu%x_d, n)
264 call device_col2(wa3%x_d, mu%x_d, n)
265
266
267 ! The strain rate tensor
268 call strain_rate(s11%x, s22%x, s33%x, s12%x, s13%x, s23%x, &
269 u_e, v_e, w_e, c_xh)
270
271
272 ! Gradient of viscosity * 2
273 !call opgrad(ta1%x, ta2%x, ta3%x, mu%x, c_Xh)
274 call dudxyz(ta1%x, mu%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
275 call dudxyz(ta2%x, mu%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
276 call dudxyz(ta3%x, mu%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
277
278#ifdef HAVE_HIP
279 call pnpn_prs_stress_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
280 wa1%x_d, wa2%x_d, wa3%x_d, &
281 s11%x_d, s22%x_d, s33%x_d, s12%x_d, s13%x_d, s23%x_d, &
282 f_x%x_d, f_y%x_d, f_z%x_d, &
283 c_xh%B_d, c_xh%h1_d, rho%x_d, n)
284#elif HAVE_CUDA
285 call pnpn_prs_stress_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
286 wa1%x_d, wa2%x_d, wa3%x_d, &
287 s11%x_d, s22%x_d, s33%x_d, s12%x_d, s13%x_d, s23%x_d, &
288 f_x%x_d, f_y%x_d, f_z%x_d, &
289 c_xh%B_d, c_xh%h1_d, rho%x_d, n)
290#else
291 call neko_error('No device backend configured')
292#endif
293
294 call gs_xh%op(ta1, gs_op_add)
295 call gs_xh%op(ta2, gs_op_add)
296 call gs_xh%op(ta3, gs_op_add)
297
298 call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_xh%Binv_d, gdim, n)
299
300 ! Compute the components of the divergence of the rhs
301 call cdtp(wa1%x, ta1%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
302 call cdtp(wa2%x, ta2%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
303 call cdtp(wa3%x, ta3%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
304
305 ! The laplacian of the pressure
306 call ax%compute(p_res%x, p%x, c_xh, p%msh, p%Xh)
307
308#ifdef HAVE_HIP
309 call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
310#elif HAVE_CUDA
311 call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
312#elif HAVE_OPENCL
313 call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
314#endif
315
316 !
317 ! Surface velocity terms
318 !
319 call device_rzero(wa1%x_d, n)
320 call device_rzero(wa2%x_d, n)
321 call device_rzero(wa3%x_d, n)
322
323 call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, &
324 ta1%x_d , ta2%x_d, ta3%x_d)
325
326 dtbd = bd / dt
327 call device_rzero(ta1%x_d, n)
328 call device_rzero(ta2%x_d, n)
329 call device_rzero(ta3%x_d, n)
330
331 call bc_prs_surface%apply_surfvec_dev(ta1%x_d, ta2%x_d, ta3%x_d, &
332 u%x_D, v%x_d, w%x_d)
333
334#ifdef HAVE_HIP
335 call pnpn_prs_stress_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, &
336 wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
337#elif HAVE_CUDA
338 call pnpn_prs_stress_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, &
339 wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
340#else
341 call neko_error('No device backend configured')
342#endif
343
344 call neko_scratch_registry%relinquish_field(temp_indices)
345
347
348 subroutine pnpn_vel_res_stress_device_compute(Ax, u, v, w, u_res, v_res, &
349 w_res, p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
350 class(ax_t), intent(in) :: Ax
351 type(mesh_t), intent(inout) :: msh
352 type(space_t), intent(inout) :: Xh
353 type(field_t), intent(inout) :: p, u, v, w
354 type(field_t), intent(inout) :: u_res, v_res, w_res
355 type(field_t), intent(in) :: f_x, f_y, f_z
356 type(coef_t), intent(inout) :: c_Xh
357 type(field_t), intent(in) :: mu
358 type(field_t), intent(in) :: rho
359 real(kind=rp), intent(in) :: bd
360 real(kind=rp), intent(in) :: dt
361 real(kind=rp) :: bddt
362 integer :: temp_indices(3)
363 type(field_t), pointer :: ta1, ta2, ta3
364 integer, intent(in) :: n
365 integer :: i
366
367 call device_copy(c_xh%h1_d, mu%x_d, n)
368 call device_copy(c_xh%h2_d, rho%x_d, n)
369
370 bddt = bd / dt
371 call device_cmult(c_xh%h2_d, bddt, n)
372
373 c_xh%ifh2 = .true.
374
375 ! Viscous stresses
376 call ax%compute_vector(u_res%x, v_res%x, w_res%x, u%x, v%x, w%x, c_xh,&
377 msh, xh)
378
379 call neko_scratch_registry%request_field(ta1, temp_indices(1))
380 call neko_scratch_registry%request_field(ta2, temp_indices(2))
381 call neko_scratch_registry%request_field(ta3, temp_indices(3))
382
383 ! Pressure gradient
384 call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_xh)
385
386 ! Sum all the terms
387#ifdef HAVE_HIP
388 call pnpn_vel_res_update_hip(u_res%x_d, v_res%x_d, w_res%x_d, &
389 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
390#elif HAVE_CUDA
391 call pnpn_vel_res_update_cuda(u_res%x_d, v_res%x_d, w_res%x_d, &
392 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
393#elif HAVE_OPENCL
394 call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
395 ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
396#endif
397
398 call neko_scratch_registry%relinquish_field(temp_indices)
399
401
402end module pnpn_res_stress_device
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Dirichlet condition applied in the facet normal direction.
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:80
Residuals in the Pn-Pn formulation (device version)
subroutine pnpn_vel_res_stress_device_compute(ax, u, v, w, u_res, v_res, w_res, p, f_x, f_y, f_z, c_xh, msh, xh, mu, rho, bd, dt, n)
subroutine pnpn_prs_res_stress_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh, gs_xh, bc_prs_surface, bc_sym_surface, ax, bd, dt, mu, rho, event)
Defines Pressure and velocity residuals in the Pn-Pn formulation.
Definition pnpn_res.f90:34
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
Definition pnpn_res.c:108
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
Definition pnpn_res.c:44
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.c:82
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.c:135
void pnpn_prs_res_part2_cuda(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
Definition pnpn_res.cu:63
void pnpn_vel_res_update_cuda(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)
Definition pnpn_res.cu:92
void pnpn_prs_stress_res_part1_cuda(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *s11, void *s22, void *s33, void *s12, void *s13, void *s23, void *f_u, void *f_v, void *f_w, void *B, void *h1, void *rho, int *n)
void pnpn_prs_stress_res_part3_cuda(void *p_res, void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, real *dtbd, int *n)
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Dirichlet condition in facet normal direction.
Device implementation of the pressure residual for the PnPn fluid with full viscous stress formulatio...
Device implementation of the velocity residual for the PnPn fluid with full viscous stress formulatio...
Abstract type to compute pressure residual.
Definition pnpn_res.f90:48
Abstract type to compute velocity residual.
Definition pnpn_res.f90:54
The function space for the SEM solution fields.
Definition space.f90:62