39       opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
 
   42       opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
 
   43       opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
 
   64  use mpi_f08, 
only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
 
   65  use, 
intrinsic :: iso_c_binding, only : c_ptr
 
   81  subroutine dudxyz (du, u, dr, ds, dt, coef)
 
   82    type(
coef_t), 
intent(in), 
target :: coef
 
   83    real(kind=
rp), 
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
         coef%msh%nelv), 
intent(inout) :: du
 
   84    real(kind=
rp), 
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
         coef%msh%nelv), 
intent(in) :: u, dr, ds, dt
 
   87       call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
 
   93       call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
 
  104  subroutine div(res, ux, uy, uz, coef)
 
  105    type(
coef_t), 
intent(in), 
target :: coef
 
  106    real(kind=
rp), 
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
         coef%msh%nelv), 
intent(inout) :: res
 
  107    real(kind=
rp), 
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
         coef%msh%nelv), 
intent(in) :: ux, uy, uz
 
  119    call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
 
  122    call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
 
  126       call add2(res, work%x, work%size())
 
  130    call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
 
  134       call add2(res, work%x, work%size())
 
  147  subroutine grad(ux, uy, uz, u, coef)
 
  148    type(
coef_t), 
intent(in) :: coef
 
  149    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: ux
 
  150    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: uy
 
  151    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: uz
 
  152    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(in) :: u
 
  154    call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
 
  155    call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
 
  156    call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
 
  172  subroutine opgrad(ux, uy, uz, u, coef, es, ee)
 
  173    type(
coef_t), 
intent(in) :: coef
 
  174    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: ux
 
  175    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: uy
 
  176    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: uz
 
  177    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(in) :: u
 
  178    integer, 
optional :: es, ee
 
  179    integer :: eblk_start, eblk_end
 
  181    if (
present(es)) 
then 
  187    if (
present(ee)) 
then 
  190       eblk_end = coef%msh%nelv
 
  194       call opr_sx_opgrad(ux, uy, uz, u, coef)
 
  200       call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
 
  209  subroutine ortho(x, glb_n_points, n)
 
  210    integer, 
intent(in) :: n
 
  211    integer(kind=i8), 
intent(in) :: glb_n_points
 
  212    real(kind=
rp), 
dimension(n), 
intent(inout) :: x
 
  220       c = 
glsum(x, n)/glb_n_points
 
  237  subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
 
  238    type(
coef_t), 
intent(in) :: coef
 
  239    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: dtx
 
  240    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(inout) :: x
 
  241    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(in) :: dr
 
  242    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(in) :: ds
 
  243    real(kind=
rp), 
dimension(coef%Xh%lxyz, coef%msh%nelv), 
intent(in) :: dt
 
  244    integer, 
optional :: es, ee
 
  245    integer :: eblk_start, eblk_end
 
  247    if (
present(es)) 
then 
  253    if (
present(ee)) 
then 
  256       eblk_end = coef%msh%nelv
 
  260       call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
 
  266       call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
 
  281  subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
 
  282    type(
space_t), 
intent(in) :: xh
 
  283    type(
coef_t), 
intent(in) :: coef
 
  284    real(kind=
rp), 
intent(inout) :: du(xh%lxyz, coef%msh%nelv)
 
  285    real(kind=
rp), 
intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
 
  286    real(kind=
rp), 
intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
 
  287    real(kind=
rp), 
intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
 
  288    real(kind=
rp), 
intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
 
  289    integer, 
optional :: es, ee
 
  290    integer :: eblk_end, eblk_start
 
  292    associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
 
  293      if (
present(es)) 
then 
  299      if (
present(ee)) 
then 
  302         eblk_end = coef%msh%nelv
 
  306         call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
 
  312         call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
 
  335  subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
 
  337    type(space_t), 
intent(in) :: xh_gl
 
  338    type(space_t), 
intent(in) :: xh_gll
 
  339    type(coef_t), 
intent(in) :: coef_GLL
 
  340    type(coef_t), 
intent(in) :: coef_GL
 
  341    type(interpolator_t), 
intent(inout) :: GLL_to_GL
 
  342    real(kind=rp), 
intent(inout) :: &
 
  343         du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
 
  344    real(kind=rp), 
intent(inout) :: &
 
  345         u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
 
  346    type(field_t), 
intent(inout) :: cr, cs, ct
 
  349    if (neko_bcknd_sx .eq. 1) 
then 
  350       call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
 
  351            xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
  352    else if (neko_bcknd_xsmm .eq. 1) 
then 
  353       call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
 
  354            xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
  355    else if (neko_bcknd_device .eq. 1) 
then 
  356       u_d = device_get_ptr(u)
 
  357       call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
 
  358            xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
  360       call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
 
  361            xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
 
  376  subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
 
  377    type(field_t), 
intent(inout) :: w1
 
  378    type(field_t), 
intent(inout) :: w2
 
  379    type(field_t), 
intent(inout) :: w3
 
  380    type(field_t), 
intent(in) :: u1
 
  381    type(field_t), 
intent(in) :: u2
 
  382    type(field_t), 
intent(in) :: u3
 
  383    type(field_t), 
intent(inout) :: work1
 
  384    type(field_t), 
intent(inout) :: work2
 
  385    type(coef_t), 
intent(in) :: coef
 
  386    type(c_ptr), 
optional, 
intent(inout) :: event
 
  388    if (neko_bcknd_sx .eq. 1) 
then 
  389       call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
 
  390    else if (neko_bcknd_xsmm .eq. 1) 
then 
  391       call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
 
  392    else if (neko_bcknd_device .eq. 1) 
then 
  393       if (
present(event)) 
then 
  394          call opr_device_curl(w1, w2, w3, u1, u2, u3, &
 
  395               work1, work2, coef, event)
 
  397          call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
 
  400       call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
 
 
  414  function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
 
  415    type(space_t), 
intent(in) :: xh
 
  416    type(coef_t), 
intent(in) :: coef
 
  417    integer, 
intent(in) :: nelv, gdim
 
  418    real(kind=rp), 
intent(in) :: dt
 
  419    real(kind=rp), 
dimension(Xh%lx, Xh%ly, Xh%lz, nelv), 
intent(in) :: u, v, w
 
  423    if (neko_bcknd_sx .eq. 1) 
then 
  424       cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
 
  425    else if (neko_bcknd_device .eq. 1) 
then 
  426       cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
 
  428       cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
 
  431    if (.not. neko_device_mpi) 
then 
  432       call mpi_allreduce(mpi_in_place, 
cfl, 1, &
 
  433            mpi_real_precision, mpi_max, neko_comm, ierr)
 
 
  446    type(space_t), 
intent(in) :: xh
 
  447    type(coef_t), 
intent(in) :: coef
 
  448    integer, 
intent(in) :: nelv, gdim
 
  449    real(kind=rp), 
intent(in) :: dt
 
  450    real(kind=rp), 
dimension(Xh%lx, Xh%ly, Xh%lz, nelv), 
intent(in) :: max_wave_speed
 
  453    type(field_t), 
pointer :: zero_vector
 
  456    n = xh%lx * xh%ly * xh%lz * nelv
 
  459    call neko_scratch_registry%request_field(zero_vector, ind)
 
  462    call field_rzero(zero_vector)
 
  465    cfl_compressible = 
cfl(dt, max_wave_speed, zero_vector%x, zero_vector%x, xh, coef, nelv, gdim)
 
  468    call neko_scratch_registry%relinquish_field(ind)
 
  484  subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
 
  485    type(field_t), 
intent(in) :: u, v, w
 
  486    type(coef_t), 
intent(in) :: coef
 
  487    real(kind=rp), 
intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  488    real(kind=rp), 
intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  489    real(kind=rp), 
intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  490    real(kind=rp), 
intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  491    real(kind=rp), 
intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  492    real(kind=rp), 
intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
 
  494    type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
 
  496    integer :: nelv, lxyz
 
  498    if (neko_bcknd_device .eq. 1) 
then 
  499       s11_d = device_get_ptr(s11)
 
  500       s22_d = device_get_ptr(s22)
 
  501       s33_d = device_get_ptr(s33)
 
  502       s12_d = device_get_ptr(s12)
 
  503       s23_d = device_get_ptr(s23)
 
  504       s13_d = device_get_ptr(s13)
 
  511    call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
 
  512    call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
 
  513    if (neko_bcknd_device .eq. 1) 
then 
  514       call device_add2(s12_d, s11_d, nelv*lxyz)
 
  516       call add2(s12, s11, nelv*lxyz)
 
  519    call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
 
  520    call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
 
  521    if (neko_bcknd_device .eq. 1) 
then 
  522       call device_add2(s13_d, s11_d, nelv*lxyz)
 
  524       call add2(s13, s11, nelv*lxyz)
 
  527    call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
 
  528    call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
 
  529    if (neko_bcknd_device .eq. 1) 
then 
  530       call device_add2(s23_d, s11_d, nelv*lxyz)
 
  532       call add2(s23, s11, nelv*lxyz)
 
  535    call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
 
  536    call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
 
  537    call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
 
  539    if (neko_bcknd_device .eq. 1) 
then 
  540       call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
 
  541       call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
 
  542       call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
 
  544       call cmult(s12, 0.5_rp, nelv*lxyz)
 
  545       call cmult(s13, 0.5_rp, nelv*lxyz)
 
  546       call cmult(s23, 0.5_rp, nelv*lxyz)
 
  557  subroutine lambda2op(lambda2, u, v, w, coef)
 
  558    type(coef_t), 
intent(in) :: coef
 
  559    type(field_t), 
intent(inout) :: 
lambda2 
  560    type(field_t), 
intent(in) :: u, v, w
 
  562    if (neko_bcknd_sx .eq. 1) 
then 
  563       call opr_sx_lambda2(
lambda2, u, v, w, coef)
 
  564    else if (neko_bcknd_device .eq. 1) 
then 
  565       call opr_device_lambda2(
lambda2, u, v, w, coef)
 
  567       call opr_cpu_lambda2(
lambda2, u, v, w, coef)
 
  583    type(space_t), 
intent(inout) :: xh
 
  584    type(coef_t), 
intent(inout) :: coef
 
  585    type(field_t), 
intent(inout) :: cr, cs, ct
 
  586    real(kind=rp), 
dimension(Xh%lxyz, coef%msh%nelv), &
 
  587         intent(in) :: cx, cy, cz
 
  588    type(c_ptr) :: cx_d, cy_d, cz_d
 
  590    if (neko_bcknd_sx .eq. 1) 
then 
  591       call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
 
  592    else if (neko_bcknd_xsmm .eq. 1) 
then 
  593       call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
 
  594    else if (neko_bcknd_device .eq. 1) 
then 
  595       cx_d = device_get_ptr(cx)
 
  596       cy_d = device_get_ptr(cy)
 
  597       cz_d = device_get_ptr(cz)
 
  598       call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
 
  599       cx_d, cy_d, cz_d, xh, coef)
 
  601       call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
 
  621  subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
 
  622       coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
 
  623    type(space_t), 
intent(in) :: xh_gll
 
  624    type(space_t), 
intent(inout) :: xh_gl
 
  625    type(coef_t), 
intent(in) :: coef
 
  626    type(coef_t), 
intent(inout) :: coef_gl
 
  627    type(interpolator_t) :: gll_to_gl
 
  628    real(kind=rp), 
intent(inout) :: tau, dtau
 
  629    integer, 
intent(in) :: n, nel, n_gl
 
  630    type(field_t), 
intent(inout) :: phi
 
  631    type(field_list_t) :: conv_k1, conv_k23, conv_k4
 
  632    real(kind=rp) :: c1, c2, c3
 
  633    type(field_t), 
pointer :: u1, k1, k2, k3, k4
 
  634    real(kind=rp), 
dimension(n_GL) :: u1_gl
 
  635    integer :: ind(5), i, e
 
  636    type(c_ptr) :: u1_gl_d
 
  638    call neko_scratch_registry%request_field(u1, ind(1))
 
  639    call neko_scratch_registry%request_field(k1, ind(2))
 
  640    call neko_scratch_registry%request_field(k2, ind(3))
 
  641    call neko_scratch_registry%request_field(k3, ind(4))
 
  642    call neko_scratch_registry%request_field(k4, ind(5))
 
  648    if (neko_bcknd_device .eq. 1) 
then 
  649       call device_map(u1_gl, u1_gl_d, n_gl)
 
  652       call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
 
  653       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  655                           conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
 
  656                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  657       call device_col2(k1%x_d, coef%B_d, n)
 
  660       call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
 
  661       call device_invcol2(u1%x_d, coef%B_d, n)
 
  662       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  664                           conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
 
  665                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  666       call device_col2(k2%x_d, coef%B_d, n)
 
  669       call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
 
  670       call device_invcol2(u1%x_d, coef%B_d, n)
 
  671       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  673                           conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
 
  674                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  675       call device_col2(k3%x_d, coef%B_d, n)
 
  678       call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
 
  679       call device_invcol2(u1%x_d, coef%B_d, n)
 
  680       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  682                           conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
 
  683                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  684       call device_col2(k4%x_d, coef%B_d, n)
 
  689       call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
 
  692       call device_free(u1_gl_d)
 
  697       call invcol3(u1%x, phi%x, coef%B, n)
 
  698       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  700                           conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
 
  701                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  702       call col2(k1%x, coef%B, n)
 
  705       call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
 
  706       call invcol2(u1%x, coef%B, n)
 
  707       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  709                           conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
 
  710                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  711       call col2(k2%x, coef%B, n)
 
  714       call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
 
  715       call invcol2(u1%x, coef%B, n)
 
  716       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  718                           conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
 
  719                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  720       call col2(k3%x, coef%B, n)
 
  723       call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
 
  724       call invcol2(u1%x, coef%B, n)
 
  725       call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
 
  727                           conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
 
  728                           xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
 
  729       call col2(k4%x, coef%B, n)
 
  733       call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
 
  736    call neko_scratch_registry%relinquish_field(ind)
 
 
 
 
 
 
 
 
 
 
 
 
 
Return the device pointer for an associated Fortran array.
 
Map a Fortran array to a device (allocate and associate)
 
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
 
type(mpi_comm), public neko_comm
MPI communicator.
 
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
 
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
 
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
 
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
 
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
 
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
 
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
 
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
 
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
 
Device abstraction, common interface for various accelerators.
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
subroutine, public field_rzero(a, n)
Zero a real vector.
 
Routines to interpolate between different spaces.
 
A simulation component that computes lambda2 The values are stored in the field registry under the na...
 
subroutine, public cmult(a, c, n)
Multiplication by constant c .
 
subroutine, public invcol2(a, b, n)
Vector division .
 
subroutine, public cadd(a, s, n)
Add a scalar to vector .
 
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
 
subroutine, public add2(a, b, n)
Vector addition .
 
subroutine, public invcol3(a, b, c, n)
Invert a vector .
 
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
 
subroutine, public col2(a, b, n)
Vector multiplication .
 
subroutine, public copy(a, b, n)
Copy a vector .
 
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
 
subroutine, public rzero(a, n)
Zero a real vector.
 
integer, parameter neko_bcknd_sx
 
integer, parameter neko_bcknd_device
 
logical, parameter neko_device_mpi
 
integer, parameter neko_bcknd_xsmm
 
integer, parameter, public i8
 
integer, parameter, public rp
Global precision used in computations.
 
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
 
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
 
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
 
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
 
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
 
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
 
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
 
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
 
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
 
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
 
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
 
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
 
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
 
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
 
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
 
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
 
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
 
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
 
Operators accelerator backends.
 
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
 
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
 
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
 
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
 
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
 
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
 
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
 
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
 
Operators SX-Aurora backend.
 
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
 
Operators libxsmm backend.
 
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
 
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
 
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
 
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
 
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
 
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
 
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
 
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
 
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
 
Defines a function space.
 
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
 
field_list_t, To be able to group fields together
 
Interpolation between two space::space_t.
 
The function space for the SEM solution fields.