Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_cpu.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module opr_cpu
35 use num_types, only : rp, dp, xp
36 use space, only : space_t
37 use coefs, only : coef_t
38 use math, only : sub3, copy, rzero, pi
39 use field, only : field_t
40 use gather_scatter, only : gs_op_add
42 use mathops, only : opcolv
43 implicit none
44 private
45
46 public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
47 opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
48 opr_cpu_convect_scalar, opr_cpu_set_convect_rst
49
50 interface
51 module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
52 type(coef_t), intent(in), target :: coef
53 real(kind=rp), intent(inout), &
54 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
55 real(kind=rp), intent(in), &
56 dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
57 u, dr, ds, dt
58 end subroutine opr_cpu_dudxyz
59
60 module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
61 type(coef_t), intent(in) :: coef
62 integer, intent(in) :: e_start, e_end
63 real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
64 real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
65 real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
66 real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
67 end subroutine opr_cpu_opgrad
68
69 module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
70 type(coef_t), intent(in) :: coef
71 integer, intent(in) :: e_start, e_end
72 real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
73 real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
74 real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
75 real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
76 real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
77 end subroutine opr_cpu_cdtp
78
79 module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
80 coef, e_start, e_end)
81 type(space_t), intent(in) :: Xh
82 type(coef_t), intent(in) :: coef
83 integer, intent(in) :: e_start, e_end
84 real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
85 real(kind=rp), intent(inout) :: &
86 u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
87 real(kind=rp), intent(inout) :: &
88 vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
89 real(kind=rp), intent(inout) :: &
90 vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
91 real(kind=rp), intent(inout) :: &
92 vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
93 end subroutine opr_cpu_conv1
94
95 module subroutine opr_cpu_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, &
96 coef_gll, coef_gl, gll_to_gl)
97 type(space_t), intent(in) :: Xh_GL
98 type(space_t), intent(in) :: Xh_GLL
99 type(coef_t), intent(in) :: coef_GLL
100 type(coef_t), intent(in) :: coef_GL
101 type(interpolator_t), intent(inout) :: GLL_to_GL
102 real(kind=rp), intent(inout) :: &
103 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
104 real(kind=rp), intent(inout) :: &
105 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
106 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
107 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
108 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
109
110 end subroutine opr_cpu_convect_scalar
111
112 module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
113 xh, coef)
114 type(space_t), intent(inout) :: Xh
115 type(coef_t), intent(inout) :: coef
116 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
117 intent(inout) :: cr, cs, ct
118 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
119 intent(in) :: cx, cy, cz
120 end subroutine opr_cpu_set_convect_rst
121 end interface
122
123contains
124
125 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
126 type(field_t), intent(inout) :: w1
127 type(field_t), intent(inout) :: w2
128 type(field_t), intent(inout) :: w3
129 type(field_t), intent(in) :: u1
130 type(field_t), intent(in) :: u2
131 type(field_t), intent(in) :: u3
132 type(field_t), intent(inout) :: work1
133 type(field_t), intent(inout) :: work2
134 type(coef_t), intent(in) :: c_xh
135 integer :: gdim, n
136
137 n = w1%dof%size()
138 gdim = c_xh%msh%gdim
139
140 ! this%work1=dw/dy ; this%work2=dv/dz
141 call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
142 if (gdim .eq. 3) then
143 call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
144 c_xh%dtdz, c_xh)
145 call sub3(w1%x, work1%x, work2%x, n)
146 else
147 call copy(w1%x, work1%x, n)
148 end if
149 ! this%work1=du/dz ; this%work2=dw/dx
150 if (gdim .eq. 3) then
151 call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &
152 c_xh%dtdz, c_xh)
153 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
154 c_xh%dtdx, c_xh)
155 call sub3(w2%x, work1%x, work2%x, n)
156 else
157 call rzero(work1%x, n)
158 call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
159 c_xh%dtdx, c_xh)
160 call sub3(w2%x, work1%x, work2%x, n)
161 end if
162 ! this%work1=dv/dx ; this%work2=du/dy
163 call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
164 call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
165 call sub3(w3%x, work1%x, work2%x, n)
166 !! BC dependent, Needs to change if cyclic
167
168 call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
169 call c_xh%gs_h%op(w1, gs_op_add)
170 call c_xh%gs_h%op(w2, gs_op_add)
171 call c_xh%gs_h%op(w3, gs_op_add)
172 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
173
174 end subroutine opr_cpu_curl
175
176 function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
177 type(space_t) :: xh
178 type(coef_t) :: coef
179 integer :: nelv, gdim
180 real(kind=rp) :: dt
181 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
182 real(kind=rp) :: cflr, cfls, cflt, cflm
183 real(kind=rp) :: ur, us, ut
184 real(kind=rp) :: cfl
185 integer :: i, j, k, e
186 cfl = 0d0
187 if (gdim .eq. 3) then
188 do e = 1, nelv
189 do k = 1, xh%lz
190 do j = 1, xh%ly
191 do i = 1, xh%lx
192 ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
193 + v(i,j,k,e)*coef%drdy(i,j,k,e) &
194 + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
195 * coef%jacinv(i,j,k,e)
196 us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
197 + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
198 + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
199 * coef%jacinv(i,j,k,e)
200 ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
201 + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
202 + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
203 * coef%jacinv(i,j,k,e)
204
205 cflr = abs(dt*ur*xh%dr_inv(i))
206 cfls = abs(dt*us*xh%ds_inv(j))
207 cflt = abs(dt*ut*xh%dt_inv(k))
208
209 cflm = cflr + cfls + cflt
210 cfl = max(cfl, cflm)
211 end do
212 end do
213 end do
214 end do
215 else
216 do e = 1, nelv
217 do j = 1, xh%ly
218 do i = 1, xh%lx
219 ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
220 + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
221 us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
222 + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
223
224 cflr = abs(dt*ur*xh%dr_inv(i))
225 cfls = abs(dt*us*xh%ds_inv(j))
226
227 cflm = cflr + cfls
228 cfl = max(cfl, cflm)
229
230 end do
231 end do
232 end do
233 end if
234 end function opr_cpu_cfl
235
236 subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
237 type(coef_t), intent(in) :: coef
238 type(field_t), intent(inout) :: lambda2
239 type(field_t), intent(in) :: u, v, w
240 real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
241 integer :: e, i
242 real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
243 real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
244 real(kind=xp) :: a11, a22, a33, a12, a13, a23
245 real(kind=xp) :: msk1, msk2, msk3
246
247 do e = 1, coef%msh%nelv
248 call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
249 u%x(1,1,1,e), coef,e,e)
250 call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
251 v%x(1,1,1,e), coef,e,e)
252 call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
253 w%x(1,1,1,e), coef,e,e)
254
255 do i = 1, coef%Xh%lxyz
256 s11 = grad(i,1,1)
257 s22 = grad(i,2,2)
258 s33 = grad(i,3,3)
259
260
261 s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
262 s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
263 s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
264
265 o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
266 o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
267 o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
268
269 a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
270 a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
271 a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
272
273 a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
274 a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
275 a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
276
277
278 b = -(a11 + a22 + a33)
279 c = -(a12*a12 + a13*a13 + a23*a23 &
280 - a11 * a22 - a11 * a33 - a22 * a33)
281 d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
282 - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
283
284
285 q = (3.0_xp * c - b*b) / 9.0_xp
286 r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
287 theta = acos( r / sqrt(-q*q*q) )
288
289 eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
290 eigen(2) = 2.0_xp * sqrt(-q) * cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
291 eigen(3) = 2.0_xp * sqrt(-q) * cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
292 msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
293 .and. eigen(1) .le. eigen(3) .or. eigen(3) &
294 .le. eigen(1) .and. eigen(1) .le. eigen(2) )
295 msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
296 .and. eigen(2) .le. eigen(3) .or. eigen(3) &
297 .le. eigen(2) .and. eigen(2) .le. eigen(1))
298 msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
299 .and. eigen(3) .le. eigen(2) .or. eigen(2) &
300 .le. eigen(3) .and. eigen(3) .le. eigen(1))
301
302 l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
303 lambda2%x(i,1,1,e) = l2/(real(coef%B(i,1,1,e)**2,xp))
304 end do
305 end do
306
307 end subroutine opr_cpu_lambda2
308
309
310
311end module opr_cpu
double real
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
real(kind=rp), parameter, public pi
Definition math.f90:75
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:126
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:177
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:237
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63
#define max(a, b)
Definition tensor.cu:40