Neko  0.8.1
A portable framework for high-order spectral element flow simulations
operators.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module operators
37  use num_types, only : rp
47  use space, only : space_t
48  use coefs, only : coef_t
49  use field, only : field_t
50  use math, only : glsum, cmult, add2, cadd
51  use device, only : c_ptr, device_get_ptr
53  use comm
54  implicit none
55  private
56 
57  public :: dudxyz, opgrad, ortho, cdtp, conv1, curl, cfl,&
59 
60 contains
61 
69  subroutine dudxyz (du, u, dr, ds, dt, coef)
70  type(coef_t), intent(in), target :: coef
71  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
72  intent(inout) :: du
73  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
74  intent(in) :: u, dr, ds, dt
75 
76  if (neko_bcknd_sx .eq. 1) then
77  call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
78  else if (neko_bcknd_xsmm .eq. 1) then
79  call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
80  else if (neko_bcknd_device .eq. 1) then
81  call opr_device_dudxyz(du, u, dr, ds, dt, coef)
82  else
83  call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
84  end if
85 
86  end subroutine dudxyz
87 
99  subroutine opgrad(ux, uy, uz, u, coef, es, ee)
100  type(coef_t), intent(in) :: coef
101  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
102  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
103  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
104  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: u
105  integer, optional :: es, ee
106  integer :: eblk_start, eblk_end
107 
108  if (present(es)) then
109  eblk_start = es
110  else
111  eblk_start = 1
112  end if
113 
114  if (present(ee)) then
115  eblk_end = ee
116  else
117  eblk_end = coef%msh%nelv
118  end if
119 
120  if (neko_bcknd_sx .eq. 1) then
121  call opr_sx_opgrad(ux, uy, uz, u, coef)
122  else if (neko_bcknd_xsmm .eq. 1) then
123  call opr_xsmm_opgrad(ux, uy, uz, u, coef)
124  else if (neko_bcknd_device .eq. 1) then
125  call opr_device_opgrad(ux, uy, uz, u, coef)
126  else
127  call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
128  end if
129 
130  end subroutine opgrad
131 
136  subroutine ortho(x, n, glb_n)
137  integer, intent(in) :: n
138  integer, intent(in) :: glb_n
139  real(kind=rp), dimension(n), intent(inout) :: x
140  real(kind=rp) :: rlam
141 
142  rlam = glsum(x, n)/glb_n
143  call cadd(x, -rlam, n)
144 
145  end subroutine ortho
146 
156  subroutine cdtp (dtx, x, dr, ds, dt, coef)
157  type(coef_t), intent(in) :: coef
158  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: dtx
159  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: x
160  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dr
161  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: ds
162  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dt
163 
164  if (neko_bcknd_sx .eq. 1) then
165  call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
166  else if (neko_bcknd_xsmm .eq. 1) then
167  call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
168  else if (neko_bcknd_device .eq. 1) then
169  call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
170  else
171  call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef)
172  end if
173 
174  end subroutine cdtp
175 
186  subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
187  type(space_t), intent(inout) :: xh
188  type(coef_t), intent(inout) :: coef
189  real(kind=rp), intent(inout) :: du(xh%lxyz,coef%msh%nelv)
190  real(kind=rp), intent(inout) :: u(xh%lx,xh%ly,xh%lz,coef%msh%nelv)
191  real(kind=rp), intent(inout) :: vx(xh%lx,xh%ly,xh%lz,coef%msh%nelv)
192  real(kind=rp), intent(inout) :: vy(xh%lx,xh%ly,xh%lz,coef%msh%nelv)
193  real(kind=rp), intent(inout) :: vz(xh%lx,xh%ly,xh%lz,coef%msh%nelv)
194  integer, optional :: es, ee
195  integer :: eblk_end, eblk_start
196 
197  associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
198  if (present(es)) then
199  eblk_start = es
200  else
201  eblk_start = 1
202  end if
203 
204  if (present(ee)) then
205  eblk_end = ee
206  else
207  eblk_end = coef%msh%nelv
208  end if
209 
210  if (neko_bcknd_sx .eq. 1) then
211  call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
212  else if (neko_bcknd_xsmm .eq. 1) then
213  call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
214  else if (neko_bcknd_device .eq. 1) then
215  call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
216  else
217  call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
218  end if
219  end associate
220 
221  end subroutine conv1
222 
223  !! Compute the curl fo a vector field.
224  !! @param w1 Will store the x component of the curl.
225  !! @param w2 Will store the y component of the curl.
226  !! @param w3 Will store the z component of the curl.
227  !! @param u1 The x component of the vector field.
228  !! @param u2 The y component of the vector field.
229  !! @param u3 The z component of the vector field.
230  !! @param work1 A temporary array for computations.
231  !! @param work2 A temporary array for computations.
232  !! @param coef The SEM coefficients.
233  subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
234  type(field_t), intent(inout) :: w1
235  type(field_t), intent(inout) :: w2
236  type(field_t), intent(inout) :: w3
237  type(field_t), intent(inout) :: u1
238  type(field_t), intent(inout) :: u2
239  type(field_t), intent(inout) :: u3
240  type(field_t), intent(inout) :: work1
241  type(field_t), intent(inout) :: work2
242  type(coef_t), intent(in) :: coef
243 
244  if (neko_bcknd_sx .eq. 1) then
245  call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
246  else if (neko_bcknd_xsmm .eq. 1) then
247  call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
248  else if (neko_bcknd_device .eq. 1) then
249  call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
250  else
251  call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
252  end if
253 
254  end subroutine curl
255 
256  !! Compute the CFL number
257  !! @param dt The timestep.
258  !! @param u The x component of velocity.
259  !! @param v The y component of velocity.
260  !! @param w The z component of velocity.
261  !! @param Xh The SEM function space.
262  !! @param coef The SEM coefficients.
263  !! @param nelv The total number of elements.
264  !! @param gdim Number of geometric dimensions.
265  function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
266  type(space_t), intent(in) :: xh
267  type(coef_t), intent(in) :: coef
268  integer, intent(in) :: nelv, gdim
269  real(kind=rp), intent(in) :: dt
270  real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv), intent(in) :: u, v, w
271  real(kind=rp) :: cfl
272  integer :: ierr
273 
274  if (neko_bcknd_sx .eq. 1) then
275  cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv, gdim)
276  else if (neko_bcknd_device .eq. 1) then
277  cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
278  else
279  cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
280  end if
281 
282  if (.not. neko_device_mpi) then
283  call mpi_allreduce(mpi_in_place, cfl, 1, &
284  mpi_real_precision, mpi_max, neko_comm, ierr)
285  end if
286 
287  end function cfl
288 
301  subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
302  type(field_t), intent(in) :: u, v, w
303  type(coef_t), intent(in) :: coef
304  real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
305  real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
306  real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
307  real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
308  real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
309  real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
310 
311  type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
312 
313  integer :: nelv, lxyz
314 
315  if (neko_bcknd_device .eq. 1) then
316  s11_d = device_get_ptr(s11)
317  s22_d = device_get_ptr(s22)
318  s33_d = device_get_ptr(s33)
319  s12_d = device_get_ptr(s12)
320  s23_d = device_get_ptr(s23)
321  s13_d = device_get_ptr(s13)
322  endif
323 
324  nelv = u%msh%nelv
325  lxyz = u%Xh%lxyz
326 
327  ! we use s11 as a work array here
328  call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
329  call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
330  if (neko_bcknd_device .eq. 1) then
331  call device_add2(s12_d, s11_d, nelv*lxyz)
332  else
333  call add2(s12, s11, nelv*lxyz)
334  endif
335 
336  call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
337  call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
338  if (neko_bcknd_device .eq. 1) then
339  call device_add2(s13_d, s11_d, nelv*lxyz)
340  else
341  call add2(s13, s11, nelv*lxyz)
342  endif
343 
344  call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
345  call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
346  if (neko_bcknd_device .eq. 1) then
347  call device_add2(s23_d, s11_d, nelv*lxyz)
348  else
349  call add2(s23, s11, nelv*lxyz)
350  endif
351 
352  call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
353  call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
354  call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
355 
356  if (neko_bcknd_device .eq. 1) then
357  call device_cmult(s11_d, 2.0_rp, nelv*lxyz)
358  call device_cmult(s22_d, 2.0_rp, nelv*lxyz)
359  call device_cmult(s33_d, 2.0_rp, nelv*lxyz)
360  else
361  call cmult(s11, 2.0_rp, nelv*lxyz)
362  call cmult(s22, 2.0_rp, nelv*lxyz)
363  call cmult(s33, 2.0_rp, nelv*lxyz)
364  endif
365 
366  end subroutine strain_rate
367 
374  subroutine lambda2op(lambda2, u, v, w, coef)
375  type(coef_t), intent(in) :: coef
376  type(field_t), intent(inout) :: lambda2
377  type(field_t), intent(in) :: u, v, w
378 
379  if (neko_bcknd_sx .eq. 1) then
380  call opr_sx_lambda2(lambda2, u, v, w, coef)
381  else if (neko_bcknd_device .eq. 1) then
382  call opr_device_lambda2(lambda2, u, v, w, coef)
383  else
384  call opr_cpu_lambda2(lambda2, u, v, w, coef)
385  end if
386 
387  end subroutine lambda2op
388 
389 end module operators
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
subroutine, public device_add2(a_d, b_d, n)
subroutine, public device_cmult(a_d, c, n)
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Defines a field.
Definition: field.f90:34
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
Definition: math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition: math.f90:246
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition: math.f90:258
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition: math.f90:282
subroutine, public add2(a, b, n)
Vector addition .
Definition: math.f90:503
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_sx
Definition: neko_config.f90:39
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
logical, parameter neko_device_mpi
Definition: neko_config.f90:46
integer, parameter neko_bcknd_xsmm
Definition: neko_config.f90:40
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators.
Definition: operators.f90:34
subroutine, public cdtp(dtx, x, dr, ds, dt, coef)
Apply D^T to a scalar field, where D is the derivative matrix.
Definition: operators.f90:157
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the gradient of a scalar field.
Definition: operators.f90:100
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
Definition: operators.f90:234
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute double the strain rate tensor, i.e du_i/dx_j + du_j/dx_i.
Definition: operators.f90:302
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
Definition: operators.f90:375
subroutine, public ortho(x, n, glb_n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
Definition: operators.f90:137
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition: operators.f90:70
real(kind=rp) function, public cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: operators.f90:266
subroutine, public conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
Compute the advection term.
Definition: operators.f90:187
Operators CPU backend.
Definition: opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_cpu.f90:422
subroutine, public opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_cpu.f90:55
subroutine, public opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end)
Definition: opr_cpu.f90:316
subroutine, public opr_cpu_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_cpu.f90:259
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition: opr_cpu.f90:526
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_cpu.f90:469
subroutine, public opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
Definition: opr_cpu.f90:112
Operators accelerator backends.
Definition: opr_device.F90:34
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_device.F90:666
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_device.F90:433
subroutine, public opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_device.F90:469
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_device.F90:326
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_device.F90:516
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
Definition: opr_device.F90:361
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Definition: opr_device.F90:403
Operators SX-Aurora backend.
Definition: opr_sx.f90:2
subroutine, public opr_sx_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_sx.f90:290
subroutine, public opr_sx_opgrad(ux, uy, uz, u, coef)
Definition: opr_sx.f90:79
subroutine, public opr_sx_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_sx.f90:25
real(kind=rp) function, public opr_sx_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_sx.f90:436
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_sx.f90:389
subroutine, public opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_sx.f90:219
subroutine, public opr_sx_lambda2(lambda2, u, v, w, coef)
Definition: opr_sx.f90:547
Operators libxsmm backend.
Definition: opr_xsmm.F90:61
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_xsmm.F90:234
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_xsmm.F90:88
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition: opr_xsmm.F90:140
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_xsmm.F90:359
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_xsmm.F90:288
Defines a function space.
Definition: space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
The function space for the SEM solution fields.
Definition: space.f90:62