Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module opr_cpu
35 use num_types, only : rp, dp, xp
36 use space, only : space_t
37 use coefs, only : coef_t
38 use math, only : sub3, copy, rzero, pi
39 use field, only : field_t
40 use gather_scatter, only : gs_op_add
42 use mathops, only : opcolv
43 implicit none
44 private
45
46 public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
47 opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
48 opr_cpu_convect_scalar, opr_cpu_set_convect_rst, &
50
51
52 interface
53 module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
54 type(coef_t), intent(in), target :: coef
55 real(kind=rp), intent(inout), &
56 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
57 real(kind=rp), intent(in), &
58 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
59 u, dr, ds, dt
60 end subroutine opr_cpu_dudxyz
61
62 module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
63 type(coef_t), intent(in) :: coef
64 integer, intent(in) :: e_start, e_end
65 real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
66 real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
67 real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
68 real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
69 end subroutine opr_cpu_opgrad
70
71 module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
72 type(coef_t), intent(in) :: coef
73 integer, intent(in) :: e_start, e_end
74 real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
75 real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
76 real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
77 real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
78 real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
79 end subroutine opr_cpu_cdtp
80
81 module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
82 coef, e_start, e_end)
83 type(space_t), intent(in) :: Xh
84 type(coef_t), intent(in) :: coef
85 integer, intent(in) :: e_start, e_end
86 real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
87 real(kind=rp), intent(inout) :: &
88 u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
89 real(kind=rp), intent(inout) :: &
90 vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
91 real(kind=rp), intent(inout) :: &
92 vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
93 real(kind=rp), intent(inout) :: &
94 vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
95 end subroutine opr_cpu_conv1
96
97 module subroutine opr_cpu_convect_scalar(du, u, cr, cs, ct, xh_gll, &
98 xh_gl, coef_gll, coef_gl, gll_to_gl)
99 type(space_t), intent(in) :: Xh_GL
100 type(space_t), intent(in) :: Xh_GLL
101 type(coef_t), intent(in) :: coef_GLL
102 type(coef_t), intent(in) :: coef_GL
103 type(interpolator_t), intent(inout) :: GLL_to_GL
104 real(kind=rp), intent(inout) :: &
105 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
106 real(kind=rp), intent(inout) :: &
107 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
108 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
109 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
110 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
111
112 end subroutine opr_cpu_convect_scalar
113
114 module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
115 xh, coef)
116 type(space_t), intent(inout) :: Xh
117 type(coef_t), intent(inout) :: coef
118 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
119 intent(inout) :: cr, cs, ct
120 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
121 intent(in) :: cx, cy, cz
122 end subroutine opr_cpu_set_convect_rst
123 end interface
124
125contains
126
127 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
128 type(field_t), intent(inout) :: w1
129 type(field_t), intent(inout) :: w2
130 type(field_t), intent(inout) :: w3
131 type(field_t), intent(in) :: u1
132 type(field_t), intent(in) :: u2
133 type(field_t), intent(in) :: u3
134 type(field_t), intent(inout) :: work1
135 type(field_t), intent(inout) :: work2
136 type(coef_t), intent(in) :: c_xh
137 integer :: gdim, n
138
139 n = w1%dof%size()
140 gdim = c_xh%msh%gdim
141
142 ! this%work1=dw/dy ; this%work2=dv/dz
143 call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
144 if (gdim .eq. 3) then
145 call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
146 c_xh%dtdz, c_xh)
147 call sub3(w1%x, work1%x, work2%x, n)
148 else
149 call copy(w1%x, work1%x, n)
150 end if
151 ! this%work1=du/dz ; this%work2=dw/dx
152 if (gdim .eq. 3) then
153 call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
154 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
155 call sub3(w2%x, work1%x, work2%x, n)
156 else
157 call rzero(work1%x, n)
158 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
159 call sub3(w2%x, work1%x, work2%x, n)
160 end if
161 ! this%work1=dv/dx ; this%work2=du/dy
162 call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
163 call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
164 call sub3(w3%x, work1%x, work2%x, n)
165 !! BC dependent, Needs to change if cyclic
166
167 call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
168 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
169 call c_xh%gs_h%op(w1, gs_op_add)
170 call c_xh%gs_h%op(w2, gs_op_add)
171 call c_xh%gs_h%op(w3, gs_op_add)
172 if (c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
173 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
174
175 end subroutine opr_cpu_curl
176
177 function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
178 type(space_t) :: xh
179 type(coef_t) :: coef
180 integer :: nelv, gdim
181 real(kind=rp) :: dt
182 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
183 real(kind=rp) :: cflr, cfls, cflt, cflm
184 real(kind=rp) :: ur, us, ut
185 real(kind=rp) :: cfl
186 integer :: i, j, k, e
187 cfl = 0d0
188 if (gdim .eq. 3) then
189 do e = 1, nelv
190 do k = 1, xh%lz
191 do j = 1, xh%ly
192 do i = 1, xh%lx
193 ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
194 + v(i,j,k,e)*coef%drdy(i,j,k,e) &
195 + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
196 * coef%jacinv(i,j,k,e)
197 us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
198 + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
199 + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
200 * coef%jacinv(i,j,k,e)
201 ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
202 + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
203 + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
204 * coef%jacinv(i,j,k,e)
205
206 cflr = abs(dt*ur*xh%dr_inv(i))
207 cfls = abs(dt*us*xh%ds_inv(j))
208 cflt = abs(dt*ut*xh%dt_inv(k))
209
210 cflm = cflr + cfls + cflt
211 cfl = max(cfl, cflm)
212 end do
213 end do
214 end do
215 end do
216 else
217 do e = 1, nelv
218 do j = 1, xh%ly
219 do i = 1, xh%lx
220 ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
221 + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
222 us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
223 + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
224
225 cflr = abs(dt*ur*xh%dr_inv(i))
226 cfls = abs(dt*us*xh%ds_inv(j))
227
228 cflm = cflr + cfls
229 cfl = max(cfl, cflm)
230
231 end do
232 end do
233 end do
234 end if
235 end function opr_cpu_cfl
236
237 subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
238 type(coef_t), intent(in) :: coef
239 type(field_t), intent(inout) :: lambda2
240 type(field_t), intent(in) :: u, v, w
241 real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
242 integer :: e, i
243 real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
244 real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
245 real(kind=xp) :: a11, a22, a33, a12, a13, a23
246 real(kind=xp) :: msk1, msk2, msk3
247
248 do e = 1, coef%msh%nelv
249 call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
250 u%x(1,1,1,e), coef,e,e)
251 call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
252 v%x(1,1,1,e), coef,e,e)
253 call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
254 w%x(1,1,1,e), coef,e,e)
255
256 do i = 1, coef%Xh%lxyz
257 s11 = grad(i,1,1)
258 s22 = grad(i,2,2)
259 s33 = grad(i,3,3)
260
261
262 s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
263 s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
264 s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
265
266 o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
267 o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
268 o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
269
270 a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
271 a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
272 a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
273
274 a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
275 a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
276 a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
277
278
279 b = -(a11 + a22 + a33)
280 c = -(a12*a12 + a13*a13 + a23*a23 &
281 - a11 * a22 - a11 * a33 - a22 * a33)
282 d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
283 - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
284
285
286 q = (3.0_xp * c - b*b) / 9.0_xp
287 r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
288 theta = acos( r / sqrt(-q*q*q) )
289
290 eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
291 eigen(2) = 2.0_xp * sqrt(-q) * &
292 cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
293 eigen(3) = 2.0_xp * sqrt(-q) * &
294 cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
295 msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
296 .and. eigen(1) .le. eigen(3) .or. eigen(3) &
297 .le. eigen(1) .and. eigen(1) .le. eigen(2) )
298 msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
299 .and. eigen(2) .le. eigen(3) .or. eigen(3) &
300 .le. eigen(2) .and. eigen(2) .le. eigen(1))
301 msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
302 .and. eigen(3) .le. eigen(2) .or. eigen(2) &
303 .le. eigen(3) .and. eigen(3) .le. eigen(1))
304
305 l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
306 lambda2%x(i, 1, 1, e) = l2 / real(coef%B(i, 1, 1, e)**2, xp)
307 end do
308 end do
309
310 end subroutine opr_cpu_lambda2
311
312 subroutine opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
313 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
314 integer, intent(in) :: idir
315 type(coef_t), intent(in) :: coef
316 integer :: i, j, ncyc
317 real(kind=rp) :: vnor, vtan
318
319 ncyc = coef%cyc_msk(0) - 1
320
321 do i = 1, ncyc
322 j = coef%cyc_msk(i)
323
324 if (idir .eq. 1) then
325 vnor = vx(j) * coef%R11(i) + vy(j) * coef%R12(i)
326 vtan = -vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
327 else if (idir .eq. 0) then
328 vnor = vx(j) * coef%R11(i) - vy(j) * coef%R12(i)
329 vtan = vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
330 end if
331
332 vx(j) = vnor
333 vy(j) = vtan
334
335 end do
336 end subroutine opr_cpu_rotate_cyc_r1
337
338
339 subroutine opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
340 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
341 integer, intent(in) :: idir
342 type(coef_t), intent(in) :: coef
343 integer :: i, j, ncyc
344 real(kind=rp) :: vnor, vtan
345
346 ncyc = coef%cyc_msk(0) - 1
347
348 do i = 1, ncyc
349 j = coef%cyc_msk(i)
350
351 if (idir .eq. 1) then
352 vnor = vx(j, 1, 1, 1) * coef%R11(i) + vy(j, 1, 1, 1) * coef%R12(i)
353 vtan = -vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
354 else if (idir .eq. 0) then
355 vnor = vx(j, 1, 1, 1) * coef%R11(i) - vy(j, 1, 1, 1) * coef%R12(i)
356 vtan = vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
357 end if
358
359 vx(j, 1, 1, 1) = vnor
360 vy(j, 1, 1, 1) = vtan
361
362 end do
363
364 end subroutine opr_cpu_rotate_cyc_r4
365
366end module opr_cpu
double real
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
real(kind=rp), parameter, public pi
Definition math.f90:75
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:313
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:178
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:238
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:340
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63
#define max(a, b)
Definition tensor.cu:40