d6/d05/opr__xsmm_8F90_source.html

! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.

!

! The UChicago Argonne, LLC as Operator of Argonne National

! Laboratory holds copyright in the Software. The copyright holder

! reserves all rights except those expressly granted to licensees,

! and U.S. Government license rights.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

! 1. Redistributions of source code must retain the above copyright

! notice, this list of conditions and the disclaimer below.

!

! 2. Redistributions in binary form must reproduce the above copyright

! notice, this list of conditions and the disclaimer (as noted below)

! in the documentation and/or other materials provided with the

! distribution.

!

! 3. Neither the name of ANL nor the names of its contributors

! may be used to endorse or promote products derived from this software

! without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF

! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED

! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

!

! Additional BSD Notice

! ---------------------

! 1. This notice is required to be provided under our contract with

! the U.S. Department of Energy (DOE). This work was produced at

! Argonne National Laboratory under Contract

! No. DE-AC02-06CH11357 with the DOE.

!

! 2. Neither the United States Government nor UCHICAGO ARGONNE,

! LLC nor any of their employees, makes any warranty,

! express or implied, or assumes any liability or responsibility for the

! accuracy, completeness, or usefulness of any information, apparatus,

! product, or process disclosed, or represents that its use would not

! infringe privately-owned rights.

!

! 3. Also, reference herein to any specific commercial products, process,

! or services by trade name, trademark, manufacturer or otherwise does

! not necessarily constitute or imply its endorsement, recommendation,

! or favoring by the United States Government or UCHICAGO ARGONNE LLC.

! The views and opinions of authors expressed

! herein do not necessarily state or reflect those of the United States

! Government or UCHICAGO ARGONNE, LLC, and shall

! not be used for advertising or product endorsement purposes.

!

module opr_xsmm

  use num_types, only : rp

  use mxm_wrapper, only : mxm

  use space, only : space_t

  use coefs, only : coef_t

  use math, only : rzero, col2, col3, sub3, add2, addcol3, invcol2, copy

  use mesh, only : mesh_t

  use field, only : field_t

  use interpolation, only : interpolator_t

  use gather_scatter, only : gs_t, gs_op_add

  use mathops, only : opcolv

#ifdef HAVE_LIBXSMM

  use libxsmm, only: libxsmm_mmcall => libxsmm_dmmcall_abc, &

                     libxsmm_dmmfunction, libxsmm_dispatch, &

                     libxsmm_prefetch_auto

#endif

  implicit none

  private


  public :: opr_xsmm_dudxyz, opr_xsmm_opgrad, opr_xsmm_cdtp, opr_xsmm_conv1, &

            opr_xsmm_curl, opr_xsmm_convect_scalar, opr_xsmm_set_convect_rst


#ifdef HAVE_LIBXSMM

  type(libxsmm_dmmfunction), private :: lgrad_xmm1

  type(libxsmm_dmmfunction), private :: lgrad_xmm2

  type(libxsmm_dmmfunction), private :: lgrad_xmm3

#endif


contains


  subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)

    type(coef_t), intent(in), target :: coef

    real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
                             coef%msh%nelv), intent(inout) ::  du

    real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
                             coef%msh%nelv), intent(in) ::  u, dr, ds, dt

#ifdef HAVE_LIBXSMM

    real(kind=rp) :: drst(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz)

    type(space_t), pointer :: xh

    type(mesh_t), pointer :: msh

    integer :: e, k, lxy, lyz, lxyz

    type(libxsmm_dmmfunction), save :: dudxyz_xmm1

    type(libxsmm_dmmfunction), save :: dudxyz_xmm2

    type(libxsmm_dmmfunction), save :: dudxyz_xmm3

    logical, save :: dudxyz_xsmm_init = .false.


    xh => coef%Xh

    msh => coef%msh

    lxy  = xh%lx*xh%ly

    lyz  = xh%ly*xh%lz

    lxyz = xh%lx*xh%ly*xh%lz


    if (.not. dudxyz_xsmm_init) then

       call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       dudxyz_xsmm_init = .true.

    end if


    do e = 1, msh%nelv

       if (msh%gdim .eq. 2) then

          call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)

          call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)

          call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)

          call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)

       else

          call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))

          call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)

          do k = 1, xh%lz

             call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))

          end do

          call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)

          call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)

          call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)

       end if

    end do

    call col2(du, coef%jacinv, coef%dof%n_dofs)


#endif

  end subroutine opr_xsmm_dudxyz


  subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) …

  subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef)

    type(coef_t), intent(in) :: coef


    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u

#ifdef HAVE_LIBXSMM

    real(kind=rp) :: ur(coef%Xh%lxyz)

    real(kind=rp) :: us(coef%Xh%lxyz)

    real(kind=rp) :: ut(coef%Xh%lxyz)

    logical, save :: lgrad_xsmm_init = .false.

    integer, save :: init_size = 0

    integer :: e, i, n

    n = coef%Xh%lx - 1


    if ((.not. lgrad_xsmm_init) .or. &

         (init_size .gt. 0 .and. init_size .ne. n)) then

       call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       lgrad_xsmm_init = .true.

       init_size = n

    end if


    do e = 1, coef%msh%nelv

       if (coef%msh%gdim .eq. 3) then

          call local_grad3_xsmm(ur, us, ut, u(1,e), n, coef%Xh%dx, coef%Xh%dxt)

          do i = 1, coef%Xh%lxyz

             ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &

                  + us(i) * coef%dsdx(i,1,1,e) &

                  + ut(i) * coef%dtdx(i,1,1,e) )

             uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &

                  + us(i) * coef%dsdy(i,1,1,e) &

                  + ut(i) * coef%dtdy(i,1,1,e) )

             uz(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdz(i,1,1,e) &

                  + us(i) * coef%dsdz(i,1,1,e) &

                  + ut(i) * coef%dtdz(i,1,1,e) )

          end do

       else


          call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)


          do i = 1, coef%Xh%lxyz

             ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &

                  + us(i) * coef%dsdx(i,1,1,e) )

             uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &

                  + us(i) * coef%dsdy(i,1,1,e) )

          end do

       end if

    end do

#endif

  end subroutine opr_xsmm_opgrad


    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux …

  subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)

    integer, intent(in) :: n


    real(kind=rp), intent(inout) :: ur(0:n, 0:n, 0:n)

    real(kind=rp), intent(inout) :: us(0:n, 0:n, 0:n)

    real(kind=rp), intent(inout) :: ut(0:n, 0:n, 0:n)

    real(kind=rp), intent(in) :: u(0:n, 0:n, 0:n)

    real(kind=rp), intent(in) :: d(0:n, 0:n)

    real(kind=rp), intent(in) :: dt(0:n, 0:n)

#ifdef HAVE_LIBXSMM

    integer :: m1, m2, k


    m1 = n + 1

    m2 = m1*m1


    call libxsmm_mmcall(lgrad_xmm1, d, u, ur)

    do k = 0, n

       call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))

    end do

    call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)

#endif


  end subroutine local_grad3_xsmm


    real(kind=rp), intent(inout) :: ur(0:n, 0:n, 0:n) …

  subroutine local_grad2(ur, us, u, n, D, Dt)

    integer, intent(in) :: n


    real(kind=rp), intent(inout) :: ur(0:n, 0:n)

    real(kind=rp), intent(inout) :: us(0:n, 0:n)

    real(kind=rp), intent(in) :: u(0:n, 0:n)

    real(kind=rp), intent(in) :: d(0:n, 0:n)

    real(kind=rp), intent(in) :: dt(0:n, 0:n)

    integer :: m1


    m1 = n + 1


    call mxm(d, m1, u, m1, ur, m1)

    call mxm(u, m1, dt, m1, us, m1)


  end subroutine local_grad2


    real(kind=rp), intent(inout) :: ur(0:n, 0:n) …

  subroutine opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)

    type(coef_t), intent(in) :: coef


    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds

    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt

#ifdef HAVE_LIBXSMM

    real(kind=rp) :: wx(coef%Xh%lxyz)

    real(kind=rp) :: ta1(coef%Xh%lxyz)

    real(kind=rp) :: ta2(coef%Xh%lxyz)

    real(kind=rp) :: ta3(coef%Xh%lxyz)

    integer :: e, i1, i2, n1, n2, iz

    type(space_t), pointer :: xh


    type(libxsmm_dmmfunction), save :: cdtp_xmm1

    type(libxsmm_dmmfunction), save :: cdtp_xmm2

    type(libxsmm_dmmfunction), save :: cdtp_xmm3

    logical, save :: cdtp_xsmm_init = .false.


    xh => coef%Xh

    n1 = xh%lx*xh%ly

    n2 = xh%lx*xh%ly


    if (.not. cdtp_xsmm_init) then

       call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       cdtp_xsmm_init = .true.

    end if


    do e = 1, coef%msh%nelv

       call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)

       call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)

       call col3(ta1, wx, dr(1,e), xh%lxyz)

       call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))

       call col3 (ta1, wx, ds(1,e), xh%lxyz)

       i1 = 1

       i2 = 1

       do iz = 1, xh%lz

          call libxsmm_mmcall(cdtp_xmm2, ta1(i2), xh%dy, ta2(i1))

          i1 = i1 + n1

          i2 = i2 + n2

       end do

       call add2(dtx(1,e), ta2, xh%lxyz)

       call col3(ta1, wx, dt(1,e), xh%lxyz)

       call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)

       call add2 (dtx(1,e), ta2, xh%lxyz)

    end do

#endif

  end subroutine opr_xsmm_cdtp


    real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx …

  subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)

    type(space_t), intent(in) :: xh


    type(coef_t), intent(in) :: coef

    integer, intent(in) :: nelv, gdim

    real(kind=rp), intent(inout) ::  du(xh%lxyz, nelv)

    real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) ::  u

    real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) ::  vx

    real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) ::  vy

    real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) ::  vz

#ifdef HAVE_LIBXSMM

    !   Store the inverse jacobian to speed this operation up

    real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz) :: dudr, duds, dudt

    integer :: ie, iz, i


    type(libxsmm_dmmfunction), save :: conv1_xmm1

    type(libxsmm_dmmfunction), save :: conv1_xmm2

    type(libxsmm_dmmfunction), save :: conv1_xmm3

    logical, save :: conv1_xsmm_init = .false.


    if (.not. conv1_xsmm_init) then

       call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       conv1_xsmm_init = .true.

    end if


    !   Compute vel.grad(u)

    do ie = 1, nelv

       if (gdim .eq. 3) then

          call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1, ie), dudr)

          do iz = 1, xh%lz

             call libxsmm_mmcall(conv1_xmm2, u(1,1, iz, ie), xh%dyt,&

                                 duds(1,1, iz))

          end do

          call libxsmm_mmcall(conv1_xmm3, u(1,1,1, ie), xh%dzt, dudt)

          do i = 1, xh%lxyz

             du(i, ie) = coef%jacinv(i,1,1, ie) * ( &

                  vx(i,1,1, ie) * ( &

                  coef%drdx(i,1,1, ie) * dudr(i,1,1) &

                  + coef%dsdx(i,1,1, ie) * duds(i,1,1) &

                  + coef%dtdx(i,1,1, ie) * dudt(i,1,1)) &

                  + vy(i,1,1, ie) * ( &

                  coef%drdy(i,1,1, ie) * dudr(i,1,1) &

                  + coef%dsdy(i,1,1, ie) * duds(i,1,1) &

                  + coef%dtdy(i,1,1, ie) * dudt(i,1,1)) &

                  + vz(i,1,1, ie) * ( &

                  coef%drdz(i,1,1, ie) * dudr(i,1,1) &

                  + coef%dsdz(i,1,1, ie) * duds(i,1,1) &

                  + coef%dtdz(i,1,1, ie) * dudt(i,1,1)))

          end do

       else

          !        2D

          call mxm(xh%dx, xh%lx, u(1,1,1, ie), xh%lx, dudr, xh%lyz)

          call mxm(u(1,1,1, ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)

          do i = 1, xh%lxyz

             du(i, ie) = coef%jacinv(i,1,1, ie) * ( &

                  vx(i,1,1, ie) * ( &

                  coef%drdx(i,1,1, ie) * dudr(i,1,1) &

                  + coef%dsdx(i,1,1, ie) * duds(i,1,1)) &

                  + vy(i,1,1, ie) * ( &

                  coef%drdy(i,1,1, ie) * dudr(i,1,1) &

                  + coef%dsdy(i,1,1, ie) * duds(i,1,1)))

          end do

       end if

    end do


#endif


  end subroutine opr_xsmm_conv1


    type(coef_t), intent(in) :: coef …

  subroutine opr_xsmm_convect_scalar(du, u, c, Xh_GLL, Xh_GL, coef_GLL, &

                                     coef_GL, GLL_to_GL)


    type(space_t), intent(in) :: xh_gl

    type(space_t), intent(in) :: xh_gll

    type(coef_t), intent(in) :: coef_gll

    type(coef_t), intent(in) :: coef_gl

    type(interpolator_t), intent(inout) :: gll_to_gl

    real(kind=rp), intent(inout) :: du(xh_gll%lx, xh_gll%ly, xh_gll%lz, &

                                       coef_gl%msh%nelv)

    real(kind=rp), intent(inout) :: u(xh_gl%lxyz, coef_gl%msh%nelv)

    real(kind=rp), intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)

    real(kind=rp) :: ur(xh_gl%lxyz)

    real(kind=rp) :: us(xh_gl%lxyz)

    real(kind=rp) :: ut(xh_gl%lxyz)

    real(kind=rp) :: ud(xh_gl%lxyz, coef_gl%msh%nelv)

    logical, save :: lgrad_xsmm_init = .false.

    integer, save :: init_size = 0

    integer :: e, i, n, n_gll

    n = coef_gl%Xh%lx - 1

    n_gll = coef_gll%msh%nelv * xh_gll%lxyz


#ifdef HAVE_LIBXSMM

    if ((.not. lgrad_xsmm_init) .or. &

         (init_size .gt. 0 .and. init_size .ne. n)) then

       call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &

            alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)

       lgrad_xsmm_init = .true.

       init_size = n

    end if


    do e = 1, coef_gll%msh%nelv

       call local_grad3_xsmm(ur, us, ut, u(1,e), n, xh_gl%dx, xh_gl%dxt)

       do i = 1, xh_gl%lxyz

          ud(i,e) = c(i,e,1) * ur(i) + c(i,e,2) * us(i) + c(i,e,3) * ut(i)

       end do

    end do

#endif

    call gll_to_gl%map(du, ud, coef_gl%msh%nelv, xh_gll)

    call coef_gll%gs_h%op(du, n_gll, gs_op_add)

    call col2(du, coef_gll%Binv, n_gll)

  end subroutine opr_xsmm_convect_scalar


    type(space_t), intent(in) :: xh_gl …

  subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)

    type(field_t), intent(inout) :: w1


    type(field_t), intent(inout) :: w2

    type(field_t), intent(inout) :: w3

    type(field_t), intent(inout) :: u1

    type(field_t), intent(inout) :: u2

    type(field_t), intent(inout) :: u3

    type(field_t), intent(inout) :: work1

    type(field_t), intent(inout) :: work2

    type(coef_t), intent(in)  :: c_xh

    integer :: gdim, n


    n = w1%dof%size()

    gdim = c_xh%msh%gdim


    !     this%work1=dw/dy ; this%work2=dv/dz

    call opr_xsmm_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)

    if (gdim .eq. 3) then

       call opr_xsmm_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &

                            c_xh%dtdz, c_xh)

       call sub3(w1%x, work1%x, work2%x, n)

    else

       call copy(w1%x, work1%x, n)

    end if

    !     this%work1=du/dz ; this%work2=dw/dx

    if (gdim .eq. 3) then

       call opr_xsmm_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &

                            c_xh%dtdz, c_xh)

       call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &

                            c_xh%dtdx, c_xh)

       call sub3(w2%x, work1%x, work2%x, n)

    else

       call rzero (work1%x, n)

       call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &

                            c_xh%dtdx, c_xh)

       call sub3(w2%x, work1%x, work2%x, n)

    end if

    !     this%work1=dv/dx ; this%work2=du/dy

    call opr_xsmm_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)

    call opr_xsmm_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)

    call sub3(w3%x, work1%x, work2%x, n)

    !!    BC dependent, Needs to change if cyclic


    call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)

    call c_xh%gs_h%op(w1, gs_op_add)

    call c_xh%gs_h%op(w2, gs_op_add)

    call c_xh%gs_h%op(w3, gs_op_add)

    call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)


  end subroutine opr_xsmm_curl


    type(field_t), intent(inout) :: w2 …

  subroutine opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)

    type(space_t), intent(inout) :: xh


    type(coef_t), intent(inout) :: coef

    real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &

                   intent(inout) :: cr, cs, ct

    real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &

                   intent(in) :: cx, cy, cz

    integer :: e, i, t, nxyz


    associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &

      dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &

      dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &

      nelv => coef%msh%nelv, lx => xh%lx, w3 => xh%w3)

      nxyz = lx * lx * lx

      do e = 1, nelv

         do i = 1, nxyz

            cr(i,e) = w3(i,1,1) * (cx(i,e) * drdx(i,1,1,e) &

                        + cy(i,e) * drdy(i,1,1,e) &

                        + cz(i,e) * drdz(i,1,1,e))

            cs(i,e) = w3(i,1,1) * (cx(i,e) * dsdx(i,1,1,e) &

                        + cy(i,e) * dsdy(i,1,1,e) &

                        + cz(i,e) * dsdz(i,1,1,e))

            ct(i,e) = w3(i,1,1) * (cx(i,e) * dtdx(i,1,1,e) &

                        + cy(i,e) * dtdy(i,1,1,e) &

                        + cz(i,e) * dtdz(i,1,1,e))

         end do

      end do

    end associate


  end subroutine opr_xsmm_set_convect_rst


    type(coef_t), intent(inout) :: coef …

end module opr_xsmm


coefs
Coefficients.
Definition coef.f90:34

field
Defines a field.
Definition field.f90:34

gather_scatter
Gather-scatter.
Definition gather_scatter.f90:34

interpolation
Routines to interpolate between different spaces.
Definition interpolation.f90:34

math
Definition math.f90:60

math::invcol2
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:715

math::sub3
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:642

math::add2
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:587

math::addcol3
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:801

math::col2
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:729

math::copy
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:239

math::col3
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:742

math::rzero
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:195

mathops
Collection of vector field operations operating on  and . Note that in general the indices  and ....
Definition mathops.f90:65

mathops::opcolv
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97

mesh
Defines a mesh.
Definition mesh.f90:34

mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition mxm_wrapper.F90:2

mxm_wrapper::mxm
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product  for contiguously packed matrices A,B, and C.
Definition mxm_wrapper.F90:29

num_types
Definition num_types.f90:1

num_types::i2
integer, parameter, public i2
Definition num_types.f90:5

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

opr_xsmm
Operators libxsmm backend.
Definition opr_xsmm.F90:61

opr_xsmm::local_grad3_xsmm
subroutine local_grad3_xsmm(ur, us, ut, u, n, d, dt)
Definition opr_xsmm.F90:203

opr_xsmm::opr_xsmm_conv1
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297

opr_xsmm::opr_xsmm_cdtp
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242

opr_xsmm::opr_xsmm_curl
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:416

opr_xsmm::opr_xsmm_dudxyz
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92

opr_xsmm::opr_xsmm_opgrad
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146

opr_xsmm::opr_xsmm_set_convect_rst
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:467

opr_xsmm::opr_xsmm_convect_scalar
subroutine, public opr_xsmm_convect_scalar(du, u, c, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371

opr_xsmm::local_grad2
subroutine local_grad2(ur, us, u, n, d, dt)
Definition opr_xsmm.F90:226

space
Defines a function space.
Definition space.f90:34

coefs::coef_t
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55

field::field_t
Definition field.f90:46

gather_scatter::gs_t
Definition gather_scatter.f90:58

interpolation::interpolator_t
Interpolation between two space::space_t.
Definition interpolation.f90:51

mesh::mesh_t
Definition mesh.f90:64

space::space_t
The function space for the SEM solution fields.
Definition space.f90:62