Neko  0.9.0
A portable framework for high-order spectral element flow simulations
opr_cpu.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module opr_cpu
35  use num_types, only : rp, dp, xp
36  use space, only : space_t
37  use coefs, only : coef_t
38  use math, only : sub3, copy, rzero, pi
39  use field, only : field_t
40  use gather_scatter, only : gs_op_add
41  use interpolation, only : interpolator_t
42  use mathops, only : opcolv
43  implicit none
44  private
45 
46  public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
47  opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &
48  opr_cpu_convect_scalar, opr_cpu_set_convect_rst
49 
50  interface
51  module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
52  type(coef_t), intent(in), target :: coef
53  real(kind=rp), intent(inout), &
54  dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du
55  real(kind=rp), intent(in), &
56  dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
57  u, dr, ds, dt
58  end subroutine opr_cpu_dudxyz
59 
60  module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
61  type(coef_t), intent(in) :: coef
62  integer, intent(in) :: e_start, e_end
63  real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)
64  real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)
65  real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)
66  real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)
67  end subroutine opr_cpu_opgrad
68 
69  module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)
70  type(coef_t), intent(in) :: coef
71  integer, intent(in) :: e_start, e_end
72  real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)
73  real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)
74  real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)
75  real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)
76  real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)
77  end subroutine opr_cpu_cdtp
78 
79  module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &
80  coef, e_start, e_end)
81  type(space_t), intent(in) :: Xh
82  type(coef_t), intent(in) :: coef
83  integer, intent(in) :: e_start, e_end
84  real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)
85  real(kind=rp), intent(inout) :: &
86  u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
87  real(kind=rp), intent(inout) :: &
88  vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
89  real(kind=rp), intent(inout) :: &
90  vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
91  real(kind=rp), intent(inout) :: &
92  vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)
93  end subroutine opr_cpu_conv1
94 
95  module subroutine opr_cpu_convect_scalar(du, u, c, xh_gll, xh_gl, &
96  coef_gll, coef_gl, gll_to_gl)
97  type(space_t), intent(in) :: Xh_GL
98  type(space_t), intent(in) :: Xh_GLL
99  type(coef_t), intent(in) :: coef_GLL
100  type(coef_t), intent(in) :: coef_GL
101  type(interpolator_t), intent(inout) :: GLL_to_GL
102  real(kind=rp), intent(inout) :: &
103  du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
104  real(kind=rp), intent(inout) :: &
105  u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
106  real(kind=rp), intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)
107 
108  end subroutine opr_cpu_convect_scalar
109 
110  module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &
111  xh, coef)
112  type(space_t), intent(inout) :: Xh
113  type(coef_t), intent(inout) :: coef
114  real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
115  intent(inout) :: cr, cs, ct
116  real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
117  intent(in) :: cx, cy, cz
118  end subroutine opr_cpu_set_convect_rst
119  end interface
120 
121 contains
122 
123  subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
124  type(field_t), intent(inout) :: w1
125  type(field_t), intent(inout) :: w2
126  type(field_t), intent(inout) :: w3
127  type(field_t), intent(inout) :: u1
128  type(field_t), intent(inout) :: u2
129  type(field_t), intent(inout) :: u3
130  type(field_t), intent(inout) :: work1
131  type(field_t), intent(inout) :: work2
132  type(coef_t), intent(in) :: c_xh
133  integer :: gdim, n
134 
135  n = w1%dof%size()
136  gdim = c_xh%msh%gdim
137 
138  ! this%work1=dw/dy ; this%work2=dv/dz
139  call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
140  if (gdim .eq. 3) then
141  call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
142  c_xh%dtdz, c_xh)
143  call sub3(w1%x, work1%x, work2%x, n)
144  else
145  call copy(w1%x, work1%x, n)
146  end if
147  ! this%work1=du/dz ; this%work2=dw/dx
148  if (gdim .eq. 3) then
149  call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &
150  c_xh%dtdz, c_xh)
151  call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
152  c_xh%dtdx, c_xh)
153  call sub3(w2%x, work1%x, work2%x, n)
154  else
155  call rzero(work1%x, n)
156  call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
157  c_xh%dtdx, c_xh)
158  call sub3(w2%x, work1%x, work2%x, n)
159  end if
160  ! this%work1=dv/dx ; this%work2=du/dy
161  call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
162  call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
163  call sub3(w3%x, work1%x, work2%x, n)
164  !! BC dependent, Needs to change if cyclic
165 
166  call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
167  call c_xh%gs_h%op(w1, gs_op_add)
168  call c_xh%gs_h%op(w2, gs_op_add)
169  call c_xh%gs_h%op(w3, gs_op_add)
170  call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
171 
172  end subroutine opr_cpu_curl
173 
174  function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
175  type(space_t) :: xh
176  type(coef_t) :: coef
177  integer :: nelv, gdim
178  real(kind=rp) :: dt
179  real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
180  real(kind=rp) :: cflr, cfls, cflt, cflm
181  real(kind=rp) :: ur, us, ut
182  real(kind=rp) :: cfl
183  integer :: i, j, k, e
184  cfl = 0d0
185  if (gdim .eq. 3) then
186  do e = 1, nelv
187  do k = 1, xh%lz
188  do j = 1, xh%ly
189  do i = 1, xh%lx
190  ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
191  + v(i,j,k,e)*coef%drdy(i,j,k,e) &
192  + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &
193  * coef%jacinv(i,j,k,e)
194  us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
195  + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
196  + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &
197  * coef%jacinv(i,j,k,e)
198  ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
199  + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
200  + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &
201  * coef%jacinv(i,j,k,e)
202 
203  cflr = abs(dt*ur*xh%dr_inv(i))
204  cfls = abs(dt*us*xh%ds_inv(j))
205  cflt = abs(dt*ut*xh%dt_inv(k))
206 
207  cflm = cflr + cfls + cflt
208  cfl = max(cfl, cflm)
209  end do
210  end do
211  end do
212  end do
213  else
214  do e = 1, nelv
215  do j = 1, xh%ly
216  do i = 1, xh%lx
217  ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
218  + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
219  us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
220  + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
221 
222  cflr = abs(dt*ur*xh%dr_inv(i))
223  cfls = abs(dt*us*xh%ds_inv(j))
224 
225  cflm = cflr + cfls
226  cfl = max(cfl, cflm)
227 
228  end do
229  end do
230  end do
231  end if
232  end function opr_cpu_cfl
233 
234  subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
235  type(coef_t), intent(in) :: coef
236  type(field_t), intent(inout) :: lambda2
237  type(field_t), intent(in) :: u, v, w
238  real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
239  integer :: e, i
240  real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2
241  real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
242  real(kind=xp) :: a11, a22, a33, a12, a13, a23
243  real(kind=xp) :: msk1, msk2, msk3
244 
245  do e = 1, coef%msh%nelv
246  call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
247  u%x(1,1,1,e), coef,e,e)
248  call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
249  v%x(1,1,1,e), coef,e,e)
250  call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
251  w%x(1,1,1,e), coef,e,e)
252 
253  do i = 1, coef%Xh%lxyz
254  s11 = grad(i,1,1)
255  s22 = grad(i,2,2)
256  s33 = grad(i,3,3)
257 
258 
259  s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))
260  s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))
261  s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))
262 
263  o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))
264  o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))
265  o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))
266 
267  a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
268  a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
269  a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
270 
271  a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
272  a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
273  a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
274 
275 
276  b = -(a11 + a22 + a33)
277  c = -(a12*a12 + a13*a13 + a23*a23 &
278  - a11 * a22 - a11 * a33 - a22 * a33)
279  d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &
280  - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
281 
282 
283  q = (3.0_xp * c - b*b) / 9.0_xp
284  r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp
285  theta = acos( r / sqrt(-q*q*q) )
286 
287  eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp
288  eigen(2) = 2.0_xp * sqrt(-q) * cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp
289  eigen(3) = 2.0_xp * sqrt(-q) * cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp
290  msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
291  .and. eigen(1) .le. eigen(3) .or. eigen(3) &
292  .le. eigen(1) .and. eigen(1) .le. eigen(2) )
293  msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
294  .and. eigen(2) .le. eigen(3) .or. eigen(3) &
295  .le. eigen(2) .and. eigen(2) .le. eigen(1))
296  msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
297  .and. eigen(3) .le. eigen(2) .or. eigen(2) &
298  .le. eigen(3) .and. eigen(3) .le. eigen(1))
299 
300  l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
301  lambda2%x(i,1,1,e) = l2/(real(coef%B(i,1,1,e)**2,xp))
302  end do
303  end do
304 
305  end subroutine opr_cpu_lambda2
306 
307 end module opr_cpu
double real
Definition: device_config.h:12
Coefficients.
Definition: coef.f90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
Definition: math.f90:60
real(kind=rp), parameter, public pi
Definition: math.f90:76
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition: math.f90:642
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition: mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition: mathops.f90:97
integer, parameter, public xp
Definition: num_types.f90:14
integer, parameter, public dp
Definition: num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators CPU backend.
Definition: opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_cpu.f90:124
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition: opr_cpu.f90:235
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_cpu.f90:175
Defines a function space.
Definition: space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition: space.f90:62
#define max(a, b)
Definition: tensor.cu:40