Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module opr_cpu
35 use num_types, only : rp, dp, xp
36 use space, only : space_t
37 use coefs, only : coef_t
38 use math, only : sub3, copy, rzero, pi
39 use gather_scatter, only : gs_op_add
41 use mathops, only : opcolv
42 implicit none
43 private
44
45 public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
46 opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
47 opr_cpu_convect_scalar, opr_cpu_set_convect_rst, &
49
50
51 interface
52 module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
53 type(coef_t), intent(in), target :: coef
54 real(kind=rp), intent(inout), &
55 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
56 real(kind=rp), intent(in), &
57 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
58 u, dr, ds, dt
59 end subroutine opr_cpu_dudxyz
60
61 module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
62 type(coef_t), intent(in) :: coef
63 integer, intent(in) :: e_start, e_end
64 real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
65 real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
66 real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
67 real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
68 end subroutine opr_cpu_opgrad
69
70 module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
71 type(coef_t), intent(in) :: coef
72 integer, intent(in) :: e_start, e_end
73 real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
74 real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
75 real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
76 real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
77 real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
78 end subroutine opr_cpu_cdtp
79
80 module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
81 coef, e_start, e_end)
82 type(space_t), intent(in) :: Xh
83 type(coef_t), intent(in) :: coef
84 integer, intent(in) :: e_start, e_end
85 real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
86 real(kind=rp), intent(in) :: &
87 u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
88 real(kind=rp), intent(in) :: &
89 vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
90 real(kind=rp), intent(in) :: &
91 vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
92 real(kind=rp), intent(in) :: &
93 vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
94 end subroutine opr_cpu_conv1
95
96 module subroutine opr_cpu_convect_scalar(du, u, cr, cs, ct, xh_gll, &
97 xh_gl, coef_gll, coef_gl, gll_to_gl)
98 type(space_t), intent(in) :: Xh_GL
99 type(space_t), intent(in) :: Xh_GLL
100 type(coef_t), intent(in) :: coef_GLL
101 type(coef_t), intent(in) :: coef_GL
102 type(interpolator_t), intent(inout) :: GLL_to_GL
103 real(kind=rp), intent(inout) :: &
104 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
105 real(kind=rp), intent(inout) :: &
106 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
107 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
108 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
109 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
110
111 end subroutine opr_cpu_convect_scalar
112
113 module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
114 xh, coef)
115 type(space_t), intent(inout) :: Xh
116 type(coef_t), intent(inout) :: coef
117 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
118 intent(inout) :: cr, cs, ct
119 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
120 intent(in) :: cx, cy, cz
121 end subroutine opr_cpu_set_convect_rst
122 end interface
123
124contains
125
126 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
127 type(coef_t), intent(in) :: c_xh
128 real(kind=rp), intent(inout), &
129 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w1
130 real(kind=rp), intent(inout), &
131 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w2
132 real(kind=rp), intent(inout), &
133 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w3
134 real(kind=rp), intent(in), &
135 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u1
136 real(kind=rp), intent(in), &
137 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u2
138 real(kind=rp), intent(in), &
139 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u3
140 real(kind=rp), intent(inout), &
141 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work1
142 real(kind=rp), intent(inout), &
143 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work2
144 integer :: gdim, n
145
146 n = c_xh%dof%size()
147 gdim = c_xh%msh%gdim
148
149 ! this%work1=dw/dy ; this%work2=dv/dz
150 call opr_cpu_dudxyz(work1, u3, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
151 if (gdim .eq. 3) then
152 call opr_cpu_dudxyz(work2, u2, c_xh%drdz, c_xh%dsdz, &
153 c_xh%dtdz, c_xh)
154 call sub3(w1, work1, work2, n)
155 else
156 call copy(w1, work1, n)
157 end if
158 ! this%work1=du/dz ; this%work2=dw/dx
159 if (gdim .eq. 3) then
160 call opr_cpu_dudxyz(work1, u1, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
161 call opr_cpu_dudxyz(work2, u3, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
162 call sub3(w2, work1, work2, n)
163 else
164 call rzero(work1, n)
165 call opr_cpu_dudxyz(work2, u3, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
166 call sub3(w2, work1, work2, n)
167 end if
168 ! this%work1=dv/dx ; this%work2=du/dy
169 call opr_cpu_dudxyz(work1, u2, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
170 call opr_cpu_dudxyz(work2, u1, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
171 call sub3(w3, work1, work2, n)
172 !! BC dependent, Needs to change if cyclic
173
174 call opcolv(w1, w2, w3, c_xh%B, gdim, n)
175 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1, w2, w3, 1, c_xh)
176 call c_xh%gs_h%op(w1, n, gs_op_add)
177 call c_xh%gs_h%op(w2, n, gs_op_add)
178 call c_xh%gs_h%op(w3, n, gs_op_add)
179 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1, w2, w3, 0, c_xh)
180 call opcolv(w1, w2, w3, c_xh%Binv, gdim, n)
181
182 end subroutine opr_cpu_curl
183
184 function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
185 type(space_t) :: xh
186 type(coef_t) :: coef
187 integer :: nelv, gdim
188 real(kind=rp) :: dt
189 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
190 real(kind=rp) :: cflr, cfls, cflt, cflm
191 real(kind=rp) :: ur, us, ut
192 real(kind=rp) :: cfl
193 integer :: i, j, k, e
194 cfl = 0d0
195 if (gdim .eq. 3) then
196 !$omp parallel do reduction(max:cfl) private(e, k, j, i, ur, us, ut, &
197 !$omp& cflr, cfls, cflt, cflm)
198 do e = 1, nelv
199 do k = 1, xh%lz
200 do j = 1, xh%ly
201 do i = 1, xh%lx
202 ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
203 + v(i,j,k,e)*coef%drdy(i,j,k,e) &
204 + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
205 * coef%jacinv(i,j,k,e)
206 us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
207 + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
208 + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
209 * coef%jacinv(i,j,k,e)
210 ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
211 + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
212 + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
213 * coef%jacinv(i,j,k,e)
214
215 cflr = abs(dt*ur*xh%dr_inv(i))
216 cfls = abs(dt*us*xh%ds_inv(j))
217 cflt = abs(dt*ut*xh%dt_inv(k))
218
219 cflm = cflr + cfls + cflt
220 cfl = max(cfl, cflm)
221 end do
222 end do
223 end do
224 end do
225 !$omp end parallel do
226 else
227 !$omp parallel do reduction(max:cfl) private(e, j, i, ur, us, &
228 !$omp& cflr, cfls, cflm)
229 do e = 1, nelv
230 do j = 1, xh%ly
231 do i = 1, xh%lx
232 ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
233 + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
234 us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
235 + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
236
237 cflr = abs(dt*ur*xh%dr_inv(i))
238 cfls = abs(dt*us*xh%ds_inv(j))
239
240 cflm = cflr + cfls
241 cfl = max(cfl, cflm)
242
243 end do
244 end do
245 end do
246 !$omp end parallel do
247 end if
248 end function opr_cpu_cfl
249
250 subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
251 type(coef_t), intent(in) :: coef
252 real(kind=rp), intent(inout), &
253 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: lambda2
254 real(kind=rp), intent(in), &
255 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: u
256 real(kind=rp), intent(in), &
257 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: v
258 real(kind=rp), intent(in), &
259 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: w
260 real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
261 integer :: e, i
262 real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
263 real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
264 real(kind=xp) :: a11, a22, a33, a12, a13, a23
265 real(kind=xp) :: msk1, msk2, msk3
266
267 do e = 1, coef%msh%nelv
268 call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
269 u(1,1,1,e), coef,e,e)
270 call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
271 v(1,1,1,e), coef,e,e)
272 call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
273 w(1,1,1,e), coef,e,e)
274
275 do i = 1, coef%Xh%lxyz
276 s11 = grad(i,1,1)
277 s22 = grad(i,2,2)
278 s33 = grad(i,3,3)
279
280
281 s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
282 s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
283 s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
284
285 o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
286 o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
287 o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
288
289 a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
290 a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
291 a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
292
293 a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
294 a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
295 a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
296
297
298 b = -(a11 + a22 + a33)
299 c = -(a12*a12 + a13*a13 + a23*a23 &
300 - a11 * a22 - a11 * a33 - a22 * a33)
301 d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
302 - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
303
304
305 q = (3.0_xp * c - b*b) / 9.0_xp
306 r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
307 theta = acos( r / sqrt(-q*q*q) )
308
309 eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
310 eigen(2) = 2.0_xp * sqrt(-q) * &
311 cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
312 eigen(3) = 2.0_xp * sqrt(-q) * &
313 cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
314 msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
315 .and. eigen(1) .le. eigen(3) .or. eigen(3) &
316 .le. eigen(1) .and. eigen(1) .le. eigen(2) )
317 msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
318 .and. eigen(2) .le. eigen(3) .or. eigen(3) &
319 .le. eigen(2) .and. eigen(2) .le. eigen(1))
320 msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
321 .and. eigen(3) .le. eigen(2) .or. eigen(2) &
322 .le. eigen(3) .and. eigen(3) .le. eigen(1))
323
324 l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
325 lambda2(i, 1, 1, e) = l2 / real(coef%B(i, 1, 1, e)**2, xp)
326 end do
327 end do
328
329 end subroutine opr_cpu_lambda2
330
331 subroutine opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
332 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
333 integer, intent(in) :: idir
334 type(coef_t), intent(in) :: coef
335 integer :: i, j, ncyc
336 real(kind=rp) :: vnor, vtan
337
338 ncyc = coef%cyc_msk(0) - 1
339
340 do i = 1, ncyc
341 j = coef%cyc_msk(i)
342
343 if (idir .eq. 1) then
344 vnor = vx(j) * coef%R11(i) + vy(j) * coef%R12(i)
345 vtan = -vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
346 else if (idir .eq. 0) then
347 vnor = vx(j) * coef%R11(i) - vy(j) * coef%R12(i)
348 vtan = vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
349 end if
350
351 vx(j) = vnor
352 vy(j) = vtan
353
354 end do
355 end subroutine opr_cpu_rotate_cyc_r1
356
357
358 subroutine opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
359 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
360 integer, intent(in) :: idir
361 type(coef_t), intent(in) :: coef
362 integer :: i, j, ncyc
363 real(kind=rp) :: vnor, vtan
364
365 ncyc = coef%cyc_msk(0) - 1
366
367 do i = 1, ncyc
368 j = coef%cyc_msk(i)
369
370 if (idir .eq. 1) then
371 vnor = vx(j, 1, 1, 1) * coef%R11(i) + vy(j, 1, 1, 1) * coef%R12(i)
372 vtan = -vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
373 else if (idir .eq. 0) then
374 vnor = vx(j, 1, 1, 1) * coef%R11(i) - vy(j, 1, 1, 1) * coef%R12(i)
375 vtan = vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
376 end if
377
378 vx(j, 1, 1, 1) = vnor
379 vy(j, 1, 1, 1) = vtan
380
381 end do
382
383 end subroutine opr_cpu_rotate_cyc_r4
384
385end module opr_cpu
double real
Coefficients.
Definition coef.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
real(kind=rp), parameter, public pi
Definition math.f90:76
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:961
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:101
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:127
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:332
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:185
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:251
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:359
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63
#define max(a, b)
Definition tensor.cu:40