82    type(
mesh_t), 
intent(inout) :: msh
 
   83    type(
space_t), 
intent(inout) :: Xh
 
   84    type(
coef_t), 
intent(inout) :: coef
 
   85    real(kind=
rp), 
intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
 
   86    real(kind=
rp), 
intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
 
   88    real(kind=
rp) :: dudr(xh%lx,xh%ly,xh%lz)
 
   89    real(kind=
rp) :: duds(xh%lx,xh%ly,xh%lz)
 
   90    real(kind=
rp) :: dudt(xh%lx,xh%ly,xh%lz)
 
   91    real(kind=
rp) :: tmp1(xh%lx,xh%ly,xh%lz)
 
   92    real(kind=
rp) :: tmp2(xh%lx,xh%ly,xh%lz)
 
   93    real(kind=
rp) :: tmp3(xh%lx,xh%ly,xh%lz)
 
   94    real(kind=
rp) :: tm1(xh%lx,xh%ly,xh%lz)
 
   95    real(kind=
rp) :: tm2(xh%lx,xh%ly,xh%lz)
 
   96    real(kind=
rp) :: tm3(xh%lx,xh%ly,xh%lz)
 
   97    integer :: e, k, lxy, lxz, lyz, lxyz
 
   98    type(libxsmm_dmmfunction), 
save :: ax_helm_xmm1
 
   99    type(libxsmm_dmmfunction), 
save :: ax_helm_xmm2
 
  100    type(libxsmm_dmmfunction), 
save :: ax_helm_xmm3
 
  101    integer, 
save :: ax_helm_xsmm_lx = 0
 
  102    logical, 
save :: ax_helm_xsmm_init = .false.
 
  107    lxyz = xh%lx*xh%ly*xh%lz
 
  109    if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx)) 
then 
  110       call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
 
  111            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
 
  112       call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &
 
  113            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
 
  114       call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
 
  115            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
 
  116       ax_helm_xsmm_init = .true.
 
  117       ax_helm_xsmm_lx = xh%lx
 
  122       if(msh%gdim .eq. 2) 
then 
  123          call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)
 
  124          call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
 
  125          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
 
  126          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
 
  127          if (msh%dfrmd_el(e)) 
then 
  128             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
 
  129             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
 
  131          call col2(tmp1, coef%h1(1,1,1,e), lxyz)
 
  132          call col2(tmp2, coef%h1(1,1,1,e), lxyz)
 
  133          call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)
 
  134          call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)
 
  135          call add3(w(1,1,1,e), tm1, tm2, lxyz)
 
  139          call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)
 
  141             call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))
 
  143          call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)
 
  144          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
 
  145          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
 
  146          call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
 
  147          if (msh%dfrmd_el(e)) 
then 
  148             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
 
  149             call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
 
  150             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
 
  151             call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
 
  152             call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
 
  153             call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
 
  155          call col2(tmp1, coef%h1(1,1,1,e), lxyz)
 
  156          call col2(tmp2, coef%h1(1,1,1,e), lxyz)
 
  157          call col2(tmp3, coef%h1(1,1,1,e), lxyz)
 
  158          call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)
 
  160             call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))
 
  162          call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)
 
  163          call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
 
  167    if (coef%ifh2) 
call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)