Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module opr_cpu
35 use num_types, only : rp, dp, xp
36 use space, only : space_t
37 use coefs, only : coef_t
38 use math, only : sub3, copy, rzero, pi
39 use gather_scatter, only : gs_op_add
41 use mathops, only : opcolv
42 implicit none
43 private
44
45 public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
46 opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
47 opr_cpu_convect_scalar, opr_cpu_set_convect_rst, &
49
50
51 interface
52 module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
53 type(coef_t), intent(in), target :: coef
54 real(kind=rp), intent(inout), &
55 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
56 real(kind=rp), intent(in), &
57 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
58 u, dr, ds, dt
59 end subroutine opr_cpu_dudxyz
60
61 module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
62 type(coef_t), intent(in) :: coef
63 integer, intent(in) :: e_start, e_end
64 real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
65 real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
66 real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
67 real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
68 end subroutine opr_cpu_opgrad
69
70 module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
71 type(coef_t), intent(in) :: coef
72 integer, intent(in) :: e_start, e_end
73 real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
74 real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
75 real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
76 real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
77 real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
78 end subroutine opr_cpu_cdtp
79
80 module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
81 coef, e_start, e_end)
82 type(space_t), intent(in) :: Xh
83 type(coef_t), intent(in) :: coef
84 integer, intent(in) :: e_start, e_end
85 real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
86 real(kind=rp), intent(in) :: &
87 u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
88 real(kind=rp), intent(in) :: &
89 vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
90 real(kind=rp), intent(in) :: &
91 vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
92 real(kind=rp), intent(in) :: &
93 vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
94 end subroutine opr_cpu_conv1
95
96 module subroutine opr_cpu_convect_scalar(du, u, cr, cs, ct, xh_gll, &
97 xh_gl, coef_gll, coef_gl, gll_to_gl)
98 type(space_t), intent(in) :: Xh_GL
99 type(space_t), intent(in) :: Xh_GLL
100 type(coef_t), intent(in) :: coef_GLL
101 type(coef_t), intent(in) :: coef_GL
102 type(interpolator_t), intent(inout) :: GLL_to_GL
103 real(kind=rp), intent(inout) :: &
104 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
105 real(kind=rp), intent(inout) :: &
106 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
107 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
108 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
109 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
110
111 end subroutine opr_cpu_convect_scalar
112
113 module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
114 xh, coef)
115 type(space_t), intent(inout) :: Xh
116 type(coef_t), intent(inout) :: coef
117 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
118 intent(inout) :: cr, cs, ct
119 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
120 intent(in) :: cx, cy, cz
121 end subroutine opr_cpu_set_convect_rst
122 end interface
123
124contains
125
126 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
127 type(coef_t), intent(in) :: c_xh
128 real(kind=rp), intent(inout), &
129 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w1
130 real(kind=rp), intent(inout), &
131 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w2
132 real(kind=rp), intent(inout), &
133 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w3
134 real(kind=rp), intent(in), &
135 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u1
136 real(kind=rp), intent(in), &
137 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u2
138 real(kind=rp), intent(in), &
139 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u3
140 real(kind=rp), intent(inout), &
141 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work1
142 real(kind=rp), intent(inout), &
143 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work2
144 integer :: gdim, n
145
146 n = c_xh%dof%size()
147 gdim = c_xh%msh%gdim
148
149 ! this%work1=dw/dy ; this%work2=dv/dz
150 call opr_cpu_dudxyz(work1, u3, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
151 if (gdim .eq. 3) then
152 call opr_cpu_dudxyz(work2, u2, c_xh%drdz, c_xh%dsdz, &
153 c_xh%dtdz, c_xh)
154 call sub3(w1, work1, work2, n)
155 else
156 call copy(w1, work1, n)
157 end if
158 ! this%work1=du/dz ; this%work2=dw/dx
159 if (gdim .eq. 3) then
160 call opr_cpu_dudxyz(work1, u1, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
161 call opr_cpu_dudxyz(work2, u3, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
162 call sub3(w2, work1, work2, n)
163 else
164 call rzero(work1, n)
165 call opr_cpu_dudxyz(work2, u3, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
166 call sub3(w2, work1, work2, n)
167 end if
168 ! this%work1=dv/dx ; this%work2=du/dy
169 call opr_cpu_dudxyz(work1, u2, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
170 call opr_cpu_dudxyz(work2, u1, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
171 call sub3(w3, work1, work2, n)
172 !! BC dependent, Needs to change if cyclic
173
174 call opcolv(w1, w2, w3, c_xh%B, gdim, n)
175 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1, w2, w3, 1, c_xh)
176 call c_xh%gs_h%op(w1, n, gs_op_add)
177 call c_xh%gs_h%op(w2, n, gs_op_add)
178 call c_xh%gs_h%op(w3, n, gs_op_add)
179 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1, w2, w3, 0, c_xh)
180 call opcolv(w1, w2, w3, c_xh%Binv, gdim, n)
181
182 end subroutine opr_cpu_curl
183
184 function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
185 type(space_t) :: xh
186 type(coef_t) :: coef
187 integer :: nelv, gdim
188 real(kind=rp) :: dt
189 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
190 real(kind=rp) :: cflr, cfls, cflt, cflm
191 real(kind=rp) :: ur, us, ut
192 real(kind=rp) :: cfl
193 integer :: i, j, k, e
194 cfl = 0d0
195 if (gdim .eq. 3) then
196 do e = 1, nelv
197 do k = 1, xh%lz
198 do j = 1, xh%ly
199 do i = 1, xh%lx
200 ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
201 + v(i,j,k,e)*coef%drdy(i,j,k,e) &
202 + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
203 * coef%jacinv(i,j,k,e)
204 us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
205 + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
206 + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
207 * coef%jacinv(i,j,k,e)
208 ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
209 + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
210 + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
211 * coef%jacinv(i,j,k,e)
212
213 cflr = abs(dt*ur*xh%dr_inv(i))
214 cfls = abs(dt*us*xh%ds_inv(j))
215 cflt = abs(dt*ut*xh%dt_inv(k))
216
217 cflm = cflr + cfls + cflt
218 cfl = max(cfl, cflm)
219 end do
220 end do
221 end do
222 end do
223 else
224 do e = 1, nelv
225 do j = 1, xh%ly
226 do i = 1, xh%lx
227 ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
228 + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
229 us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
230 + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
231
232 cflr = abs(dt*ur*xh%dr_inv(i))
233 cfls = abs(dt*us*xh%ds_inv(j))
234
235 cflm = cflr + cfls
236 cfl = max(cfl, cflm)
237
238 end do
239 end do
240 end do
241 end if
242 end function opr_cpu_cfl
243
244 subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
245 type(coef_t), intent(in) :: coef
246 real(kind=rp), intent(inout), &
247 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: lambda2
248 real(kind=rp), intent(in), &
249 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: u
250 real(kind=rp), intent(in), &
251 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: v
252 real(kind=rp), intent(in), &
253 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: w
254 real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
255 integer :: e, i
256 real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
257 real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
258 real(kind=xp) :: a11, a22, a33, a12, a13, a23
259 real(kind=xp) :: msk1, msk2, msk3
260
261 do e = 1, coef%msh%nelv
262 call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
263 u(1,1,1,e), coef,e,e)
264 call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
265 v(1,1,1,e), coef,e,e)
266 call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
267 w(1,1,1,e), coef,e,e)
268
269 do i = 1, coef%Xh%lxyz
270 s11 = grad(i,1,1)
271 s22 = grad(i,2,2)
272 s33 = grad(i,3,3)
273
274
275 s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
276 s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
277 s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
278
279 o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
280 o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
281 o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
282
283 a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
284 a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
285 a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
286
287 a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
288 a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
289 a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
290
291
292 b = -(a11 + a22 + a33)
293 c = -(a12*a12 + a13*a13 + a23*a23 &
294 - a11 * a22 - a11 * a33 - a22 * a33)
295 d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
296 - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
297
298
299 q = (3.0_xp * c - b*b) / 9.0_xp
300 r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
301 theta = acos( r / sqrt(-q*q*q) )
302
303 eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
304 eigen(2) = 2.0_xp * sqrt(-q) * &
305 cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
306 eigen(3) = 2.0_xp * sqrt(-q) * &
307 cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
308 msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
309 .and. eigen(1) .le. eigen(3) .or. eigen(3) &
310 .le. eigen(1) .and. eigen(1) .le. eigen(2) )
311 msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
312 .and. eigen(2) .le. eigen(3) .or. eigen(3) &
313 .le. eigen(2) .and. eigen(2) .le. eigen(1))
314 msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
315 .and. eigen(3) .le. eigen(2) .or. eigen(2) &
316 .le. eigen(3) .and. eigen(3) .le. eigen(1))
317
318 l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
319 lambda2(i, 1, 1, e) = l2 / real(coef%B(i, 1, 1, e)**2, xp)
320 end do
321 end do
322
323 end subroutine opr_cpu_lambda2
324
325 subroutine opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
326 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
327 integer, intent(in) :: idir
328 type(coef_t), intent(in) :: coef
329 integer :: i, j, ncyc
330 real(kind=rp) :: vnor, vtan
331
332 ncyc = coef%cyc_msk(0) - 1
333
334 do i = 1, ncyc
335 j = coef%cyc_msk(i)
336
337 if (idir .eq. 1) then
338 vnor = vx(j) * coef%R11(i) + vy(j) * coef%R12(i)
339 vtan = -vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
340 else if (idir .eq. 0) then
341 vnor = vx(j) * coef%R11(i) - vy(j) * coef%R12(i)
342 vtan = vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
343 end if
344
345 vx(j) = vnor
346 vy(j) = vtan
347
348 end do
349 end subroutine opr_cpu_rotate_cyc_r1
350
351
352 subroutine opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
353 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
354 integer, intent(in) :: idir
355 type(coef_t), intent(in) :: coef
356 integer :: i, j, ncyc
357 real(kind=rp) :: vnor, vtan
358
359 ncyc = coef%cyc_msk(0) - 1
360
361 do i = 1, ncyc
362 j = coef%cyc_msk(i)
363
364 if (idir .eq. 1) then
365 vnor = vx(j, 1, 1, 1) * coef%R11(i) + vy(j, 1, 1, 1) * coef%R12(i)
366 vtan = -vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
367 else if (idir .eq. 0) then
368 vnor = vx(j, 1, 1, 1) * coef%R11(i) - vy(j, 1, 1, 1) * coef%R12(i)
369 vtan = vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
370 end if
371
372 vx(j, 1, 1, 1) = vnor
373 vy(j, 1, 1, 1) = vtan
374
375 end do
376
377 end subroutine opr_cpu_rotate_cyc_r4
378
379end module opr_cpu
double real
Coefficients.
Definition coef.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
real(kind=rp), parameter, public pi
Definition math.f90:76
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:961
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:101
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:127
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:326
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:185
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:245
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:353
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63
#define max(a, b)
Definition tensor.cu:40