Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module opr_cpu
35 use num_types, only : rp, dp, xp
36 use space, only : space_t
37 use coefs, only : coef_t
38 use math, only : sub3, copy, rzero, pi
39 use field, only : field_t
40 use gather_scatter, only : gs_op_add
42 use mathops, only : opcolv
43 implicit none
44 private
45
46 public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
47 opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
48 opr_cpu_convect_scalar, opr_cpu_set_convect_rst, &
50
51
52 interface
53 module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
54 type(coef_t), intent(in), target :: coef
55 real(kind=rp), intent(inout), &
56 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
57 real(kind=rp), intent(in), &
58 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
59 u, dr, ds, dt
60 end subroutine opr_cpu_dudxyz
61
62 module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
63 type(coef_t), intent(in) :: coef
64 integer, intent(in) :: e_start, e_end
65 real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
66 real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
67 real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
68 real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
69 end subroutine opr_cpu_opgrad
70
71 module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
72 type(coef_t), intent(in) :: coef
73 integer, intent(in) :: e_start, e_end
74 real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
75 real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
76 real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
77 real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
78 real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
79 end subroutine opr_cpu_cdtp
80
81 module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
82 coef, e_start, e_end)
83 type(space_t), intent(in) :: Xh
84 type(coef_t), intent(in) :: coef
85 integer, intent(in) :: e_start, e_end
86 real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
87 real(kind=rp), intent(inout) :: &
88 u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
89 real(kind=rp), intent(inout) :: &
90 vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
91 real(kind=rp), intent(inout) :: &
92 vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
93 real(kind=rp), intent(inout) :: &
94 vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
95 end subroutine opr_cpu_conv1
96
97 module subroutine opr_cpu_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, &
98 coef_gll, coef_gl, gll_to_gl)
99 type(space_t), intent(in) :: Xh_GL
100 type(space_t), intent(in) :: Xh_GLL
101 type(coef_t), intent(in) :: coef_GLL
102 type(coef_t), intent(in) :: coef_GL
103 type(interpolator_t), intent(inout) :: GLL_to_GL
104 real(kind=rp), intent(inout) :: &
105 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
106 real(kind=rp), intent(inout) :: &
107 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
108 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
109 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
110 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
111
112 end subroutine opr_cpu_convect_scalar
113
114 module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
115 xh, coef)
116 type(space_t), intent(inout) :: Xh
117 type(coef_t), intent(inout) :: coef
118 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
119 intent(inout) :: cr, cs, ct
120 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
121 intent(in) :: cx, cy, cz
122 end subroutine opr_cpu_set_convect_rst
123 end interface
124
125contains
126
127 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
128 type(field_t), intent(inout) :: w1
129 type(field_t), intent(inout) :: w2
130 type(field_t), intent(inout) :: w3
131 type(field_t), intent(in) :: u1
132 type(field_t), intent(in) :: u2
133 type(field_t), intent(in) :: u3
134 type(field_t), intent(inout) :: work1
135 type(field_t), intent(inout) :: work2
136 type(coef_t), intent(in) :: c_xh
137 integer :: gdim, n
138
139 n = w1%dof%size()
140 gdim = c_xh%msh%gdim
141
142 ! this%work1=dw/dy ; this%work2=dv/dz
143 call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
144 if (gdim .eq. 3) then
145 call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
146 c_xh%dtdz, c_xh)
147 call sub3(w1%x, work1%x, work2%x, n)
148 else
149 call copy(w1%x, work1%x, n)
150 end if
151 ! this%work1=du/dz ; this%work2=dw/dx
152 if (gdim .eq. 3) then
153 call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &
154 c_xh%dtdz, c_xh)
155 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
156 c_xh%dtdx, c_xh)
157 call sub3(w2%x, work1%x, work2%x, n)
158 else
159 call rzero(work1%x, n)
160 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
161 c_xh%dtdx, c_xh)
162 call sub3(w2%x, work1%x, work2%x, n)
163 end if
164 ! this%work1=dv/dx ; this%work2=du/dy
165 call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
166 call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
167 call sub3(w3%x, work1%x, work2%x, n)
168 !! BC dependent, Needs to change if cyclic
169
170 call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
171 if(c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
172 call c_xh%gs_h%op(w1, gs_op_add)
173 call c_xh%gs_h%op(w2, gs_op_add)
174 call c_xh%gs_h%op(w3, gs_op_add)
175 if(c_xh%cyclic) call opr_cpu_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
176 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
177
178 end subroutine opr_cpu_curl
179
180 function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
181 type(space_t) :: xh
182 type(coef_t) :: coef
183 integer :: nelv, gdim
184 real(kind=rp) :: dt
185 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
186 real(kind=rp) :: cflr, cfls, cflt, cflm
187 real(kind=rp) :: ur, us, ut
188 real(kind=rp) :: cfl
189 integer :: i, j, k, e
190 cfl = 0d0
191 if (gdim .eq. 3) then
192 do e = 1, nelv
193 do k = 1, xh%lz
194 do j = 1, xh%ly
195 do i = 1, xh%lx
196 ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
197 + v(i,j,k,e)*coef%drdy(i,j,k,e) &
198 + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
199 * coef%jacinv(i,j,k,e)
200 us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
201 + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
202 + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
203 * coef%jacinv(i,j,k,e)
204 ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
205 + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
206 + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
207 * coef%jacinv(i,j,k,e)
208
209 cflr = abs(dt*ur*xh%dr_inv(i))
210 cfls = abs(dt*us*xh%ds_inv(j))
211 cflt = abs(dt*ut*xh%dt_inv(k))
212
213 cflm = cflr + cfls + cflt
214 cfl = max(cfl, cflm)
215 end do
216 end do
217 end do
218 end do
219 else
220 do e = 1, nelv
221 do j = 1, xh%ly
222 do i = 1, xh%lx
223 ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
224 + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
225 us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
226 + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
227
228 cflr = abs(dt*ur*xh%dr_inv(i))
229 cfls = abs(dt*us*xh%ds_inv(j))
230
231 cflm = cflr + cfls
232 cfl = max(cfl, cflm)
233
234 end do
235 end do
236 end do
237 end if
238 end function opr_cpu_cfl
239
240 subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
241 type(coef_t), intent(in) :: coef
242 type(field_t), intent(inout) :: lambda2
243 type(field_t), intent(in) :: u, v, w
244 real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
245 integer :: e, i
246 real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
247 real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
248 real(kind=xp) :: a11, a22, a33, a12, a13, a23
249 real(kind=xp) :: msk1, msk2, msk3
250
251 do e = 1, coef%msh%nelv
252 call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
253 u%x(1,1,1,e), coef,e,e)
254 call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
255 v%x(1,1,1,e), coef,e,e)
256 call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
257 w%x(1,1,1,e), coef,e,e)
258
259 do i = 1, coef%Xh%lxyz
260 s11 = grad(i,1,1)
261 s22 = grad(i,2,2)
262 s33 = grad(i,3,3)
263
264
265 s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
266 s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
267 s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
268
269 o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
270 o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
271 o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
272
273 a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
274 a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
275 a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
276
277 a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
278 a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
279 a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
280
281
282 b = -(a11 + a22 + a33)
283 c = -(a12*a12 + a13*a13 + a23*a23 &
284 - a11 * a22 - a11 * a33 - a22 * a33)
285 d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
286 - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
287
288
289 q = (3.0_xp * c - b*b) / 9.0_xp
290 r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
291 theta = acos( r / sqrt(-q*q*q) )
292
293 eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
294 eigen(2) = 2.0_xp * sqrt(-q) * cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
295 eigen(3) = 2.0_xp * sqrt(-q) * cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
296 msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
297 .and. eigen(1) .le. eigen(3) .or. eigen(3) &
298 .le. eigen(1) .and. eigen(1) .le. eigen(2) )
299 msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
300 .and. eigen(2) .le. eigen(3) .or. eigen(3) &
301 .le. eigen(2) .and. eigen(2) .le. eigen(1))
302 msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
303 .and. eigen(3) .le. eigen(2) .or. eigen(2) &
304 .le. eigen(3) .and. eigen(3) .le. eigen(1))
305
306 l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
307 lambda2%x(i,1,1,e) = l2/(real(coef%B(i,1,1,e)**2,xp))
308 end do
309 end do
310
311 end subroutine opr_cpu_lambda2
312
313 subroutine opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
314 use iso_c_binding
315 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
316 integer, intent(in) :: idir
317 type(coef_t), intent(in) :: coef
318 integer :: i, j, ncyc
319 real(kind=rp) :: vnor, vtan
320
321 ncyc = coef%cyc_msk(0) - 1
322
323 do i = 1, ncyc
324 j = coef%cyc_msk(i)
325
326 if (idir.eq.1) then
327 vnor = vx(j) * coef%R11(i) + vy(j) * coef%R12(i)
328 vtan = -vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
329 else if(idir.eq.0) then
330 vnor = vx(j) * coef%R11(i) - vy(j) * coef%R12(i)
331 vtan = vx(j) * coef%R12(i) + vy(j) * coef%R11(i)
332 end if
333
334 vx(j) = vnor
335 vy(j) = vtan
336
337 end do
338 end subroutine opr_cpu_rotate_cyc_r1
339
340
341 subroutine opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
342 use iso_c_binding
343 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
344 integer, intent(in) :: idir
345 type(coef_t), intent(in) :: coef
346 integer :: i, j, ncyc
347 real(kind=rp) :: vnor, vtan
348
349 ncyc = coef%cyc_msk(0) - 1
350
351 do i = 1, ncyc
352 j = coef%cyc_msk(i)
353
354 if (idir.eq.1) then
355 vnor = vx(j, 1, 1, 1) * coef%R11(i) + vy(j, 1, 1, 1) * coef%R12(i)
356 vtan =-vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
357 else if(idir.eq.0) then
358 vnor = vx(j, 1, 1, 1) * coef%R11(i) - vy(j, 1, 1, 1) * coef%R12(i)
359 vtan = vx(j, 1, 1, 1) * coef%R12(i) + vy(j, 1, 1, 1) * coef%R11(i)
360 end if
361
362 vx(j, 1, 1, 1) = vnor
363 vy(j, 1, 1, 1) = vtan
364
365 end do
366
367 end subroutine opr_cpu_rotate_cyc_r4
368
369end module opr_cpu
double real
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
real(kind=rp), parameter, public pi
Definition math.f90:75
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:314
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:181
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:241
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:342
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63
#define max(a, b)
Definition tensor.cu:40