39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
42 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
43 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
61 use,
intrinsic :: iso_c_binding, only : c_ptr
77 subroutine dudxyz (du, u, dr, ds, dt, coef)
78 type(
coef_t),
intent(in),
target :: coef
79 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(inout) :: du
80 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(in) :: u, dr, ds, dt
83 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
89 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
100 subroutine div(res, ux, uy, uz, coef)
101 type(
coef_t),
intent(in),
target :: coef
102 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(inout) :: res
103 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(in) :: ux, uy, uz
115 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
118 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
122 call add2(res, work%x, work%size())
126 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
130 call add2(res, work%x, work%size())
143 subroutine grad(ux, uy, uz, u, coef)
144 type(
coef_t),
intent(in) :: coef
145 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
146 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
147 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
148 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
150 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
151 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
152 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
168 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
169 type(
coef_t),
intent(in) :: coef
170 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
171 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
172 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
173 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
174 integer,
optional :: es, ee
175 integer :: eblk_start, eblk_end
177 if (
present(es))
then
183 if (
present(ee))
then
186 eblk_end = coef%msh%nelv
190 call opr_sx_opgrad(ux, uy, uz, u, coef)
196 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
205 subroutine ortho(x, glb_n_points, n)
206 integer,
intent(in) :: n
207 integer(kind=i8),
intent(in) :: glb_n_points
208 real(kind=
rp),
dimension(n),
intent(inout) :: x
216 c =
glsum(x, n)/glb_n_points
233 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
234 type(
coef_t),
intent(in) :: coef
235 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
236 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
237 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
238 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
239 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
240 integer,
optional :: es, ee
241 integer :: eblk_start, eblk_end
243 if (
present(es))
then
249 if (
present(ee))
then
252 eblk_end = coef%msh%nelv
256 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
262 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
277 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
278 type(
space_t),
intent(in) :: xh
279 type(
coef_t),
intent(in) :: coef
280 real(kind=
rp),
intent(inout) :: du(xh%lxyz, coef%msh%nelv)
281 real(kind=
rp),
intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
282 real(kind=
rp),
intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
283 real(kind=
rp),
intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
284 real(kind=
rp),
intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
285 integer,
optional :: es, ee
286 integer :: eblk_end, eblk_start
288 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
289 if (
present(es))
then
295 if (
present(ee))
then
298 eblk_end = coef%msh%nelv
302 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
308 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
331 type(space_t),
intent(in) :: xh_gl
332 type(space_t),
intent(in) :: xh_gll
333 type(coef_t),
intent(in) :: coef_GLL
334 type(coef_t),
intent(in) :: coef_GL
335 type(interpolator_t),
intent(inout) :: GLL_to_GL
336 real(kind=rp),
intent(inout) :: &
337 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
338 real(kind=rp),
intent(inout) :: &
339 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
340 real(kind=rp),
intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)
342 if (neko_bcknd_sx .eq. 1)
then
343 call opr_sx_convect_scalar(du, u, c, xh_gll, xh_gl, &
344 coef_gll, coef_gl, gll_to_gl)
345 else if (neko_bcknd_xsmm .eq. 1)
then
346 call opr_xsmm_convect_scalar(du, u, c, xh_gll, xh_gl, &
347 coef_gll, coef_gl, gll_to_gl)
349 call opr_cpu_convect_scalar(du, u, c, xh_gll, xh_gl, &
350 coef_gll, coef_gl, gll_to_gl)
333 type(coef_t),
intent(in) :: coef_GLL
…
365 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
366 type(field_t),
intent(inout) :: w1
367 type(field_t),
intent(inout) :: w2
368 type(field_t),
intent(inout) :: w3
369 type(field_t),
intent(in) :: u1
370 type(field_t),
intent(in) :: u2
371 type(field_t),
intent(in) :: u3
372 type(field_t),
intent(inout) :: work1
373 type(field_t),
intent(inout) :: work2
374 type(coef_t),
intent(in) :: coef
375 type(c_ptr),
optional,
intent(inout) :: event
377 if (neko_bcknd_sx .eq. 1)
then
378 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
379 else if (neko_bcknd_xsmm .eq. 1)
then
380 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
381 else if (neko_bcknd_device .eq. 1)
then
382 if (
present(event))
then
383 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
384 work1, work2, coef, event)
386 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
389 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
369 type(field_t),
intent(in) :: u1
…
403 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
404 type(space_t),
intent(in) :: xh
405 type(coef_t),
intent(in) :: coef
406 integer,
intent(in) :: nelv, gdim
407 real(kind=rp),
intent(in) :: dt
408 real(kind=rp),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv),
intent(in) :: u, v, w
412 if (neko_bcknd_sx .eq. 1)
then
413 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
414 else if (neko_bcknd_device .eq. 1)
then
415 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
417 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
420 if (.not. neko_device_mpi)
then
421 call mpi_allreduce(mpi_in_place,
cfl, 1, &
422 mpi_real_precision, mpi_max, neko_comm, ierr)
439 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
440 type(field_t),
intent(in) :: u, v, w
441 type(coef_t),
intent(in) :: coef
442 real(kind=rp),
intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
443 real(kind=rp),
intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
444 real(kind=rp),
intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
445 real(kind=rp),
intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
446 real(kind=rp),
intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
447 real(kind=rp),
intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
449 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
451 integer :: nelv, lxyz
453 if (neko_bcknd_device .eq. 1)
then
454 s11_d = device_get_ptr(s11)
455 s22_d = device_get_ptr(s22)
456 s33_d = device_get_ptr(s33)
457 s12_d = device_get_ptr(s12)
458 s23_d = device_get_ptr(s23)
459 s13_d = device_get_ptr(s13)
466 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
467 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
468 if (neko_bcknd_device .eq. 1)
then
469 call device_add2(s12_d, s11_d, nelv*lxyz)
471 call add2(s12, s11, nelv*lxyz)
474 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
475 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
476 if (neko_bcknd_device .eq. 1)
then
477 call device_add2(s13_d, s11_d, nelv*lxyz)
479 call add2(s13, s11, nelv*lxyz)
482 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
483 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
484 if (neko_bcknd_device .eq. 1)
then
485 call device_add2(s23_d, s11_d, nelv*lxyz)
487 call add2(s23, s11, nelv*lxyz)
490 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
491 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
492 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
494 if (neko_bcknd_device .eq. 1)
then
495 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
496 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
497 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
499 call cmult(s12, 0.5_rp, nelv*lxyz)
500 call cmult(s13, 0.5_rp, nelv*lxyz)
501 call cmult(s23, 0.5_rp, nelv*lxyz)
512 subroutine lambda2op(lambda2, u, v, w, coef)
513 type(coef_t),
intent(in) :: coef
514 type(field_t),
intent(inout) ::
lambda2
515 type(field_t),
intent(in) :: u, v, w
517 if (neko_bcknd_sx .eq. 1)
then
518 call opr_sx_lambda2(
lambda2, u, v, w, coef)
519 else if (neko_bcknd_device .eq. 1)
then
520 call opr_device_lambda2(
lambda2, u, v, w, coef)
522 call opr_cpu_lambda2(
lambda2, u, v, w, coef)
538 type(space_t),
intent(inout) :: xh
539 type(coef_t),
intent(inout) :: coef
540 real(kind=rp),
dimension(Xh%lxyz, coef%msh%nelv), &
541 intent(inout) :: cr, cs, ct
542 real(kind=rp),
dimension(Xh%lxyz, coef%msh%nelv), &
543 intent(in) :: cx, cy, cz
545 if (neko_bcknd_sx .eq. 1)
then
546 call opr_sx_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
547 else if (neko_bcknd_xsmm .eq. 1)
then
548 call opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
550 call opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
570 subroutine runge_kutta(phi, c_r1, c_r23, c_r4, Xh_GLL, Xh_GL, coef, &
571 coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
572 type(space_t),
intent(in) :: xh_gll
573 type(space_t),
intent(inout) :: xh_gl
574 type(coef_t),
intent(in) :: coef
575 type(coef_t),
intent(inout) :: coef_gl
576 type(interpolator_t) :: gll_to_gl
577 real(kind=rp),
intent(inout) :: tau, dtau
578 integer,
intent(in) :: n, nel, n_gl
579 real(kind=rp),
dimension(n),
intent(inout) :: phi
580 real(kind=rp),
dimension(3 * n_GL),
intent(inout) :: c_r1, c_r23, c_r4
581 real(kind=rp) :: c1, c2, c3
583 real(kind=rp),
dimension(n) :: u1, r1, r2, r3, r4
584 real(kind=rp),
dimension(n_GL) :: u1_gl
592 call invcol3 (u1, phi, coef%B, n)
593 call gll_to_gl%map(u1_gl, u1, nel, xh_gl)
596 call col2(r1, coef%B, n)
599 call add3s2 (u1, phi, r1, c1, c2, n)
600 call invcol2 (u1, coef%B, n)
601 call gll_to_gl%map(u1_gl, u1, nel, xh_gl)
604 call col2(r2, coef%B, n)
607 call add3s2 (u1, phi, r2, c1, c2, n)
608 call invcol2 (u1, coef%B, n)
609 call gll_to_gl%map(u1_gl, u1, nel, xh_gl)
612 call col2(r3, coef%B, n)
615 call add3s2 (u1, phi, r3, c1, c3, n)
616 call invcol2 (u1, coef%B, n)
617 call gll_to_gl%map(u1_gl, u1, nel, xh_gl)
620 call col2(r4, coef%B, n)
625 phi(i) = phi(i) + c1 * (r1(i) + r4(i)) + c2 * (r2(i) + r3(i))
574 type(coef_t),
intent(in) :: coef
…
541 intent(inout) :: cr, cs, ct
…
443 real(kind=rp),
intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
…
407 real(kind=rp),
intent(in) :: dt
…
281 real(kind=
rp),
intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
…
237 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
…
172 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
…
147 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
…
102 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(inout) :: res
…
77 subroutine dudxyz (du, u, dr, ds, dt, coef)
…
Return the device pointer for an associated Fortran array.
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public invcol2(a, b, n)
Vector division .
subroutine, public cadd(a, s, n)
Add a scalar to vector .
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public invcol3(a, b, c, n)
Invert a vector .
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
integer, parameter, public rp
Global precision used in computations.
subroutine, public runge_kutta(phi, c_r1, c_r23, c_r4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine convect_scalar(du, u, c, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Operators accelerator backends.
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Operators libxsmm backend.
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
subroutine, public opr_xsmm_convect_scalar(du, u, c, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.