69 use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc
82 type(
mesh_t),
intent(inout) :: msh
83 type(
space_t),
intent(inout) :: Xh
84 type(
coef_t),
intent(inout) :: coef
85 real(kind=
rp),
intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
86 real(kind=
rp),
intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
88 real(kind=
rp) :: dudr(xh%lx,xh%ly,xh%lz)
89 real(kind=
rp) :: duds(xh%lx,xh%ly,xh%lz)
90 real(kind=
rp) :: dudt(xh%lx,xh%ly,xh%lz)
91 real(kind=
rp) :: tmp1(xh%lx,xh%ly,xh%lz)
92 real(kind=
rp) :: tmp2(xh%lx,xh%ly,xh%lz)
93 real(kind=
rp) :: tmp3(xh%lx,xh%ly,xh%lz)
94 real(kind=
rp) :: tm1(xh%lx,xh%ly,xh%lz)
95 real(kind=
rp) :: tm2(xh%lx,xh%ly,xh%lz)
96 real(kind=
rp) :: tm3(xh%lx,xh%ly,xh%lz)
97 integer :: e, k, lxy, lxz, lyz, lxyz
99 type(libxsmm_dmmfunction),
save :: ax_helm_xmm1
100 type(libxsmm_dmmfunction),
save :: ax_helm_xmm2
101 type(libxsmm_dmmfunction),
save :: ax_helm_xmm3
102 integer,
save :: ax_helm_xsmm_lx = 0
103 logical,
save :: ax_helm_xsmm_init = .false.
108 lxyz = xh%lx*xh%ly*xh%lz
110 if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx))
then
111 call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
112 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
113 call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &
114 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
115 call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
116 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
117 ax_helm_xsmm_init = .true.
118 ax_helm_xsmm_lx = xh%lx
123 if(msh%gdim .eq. 2)
then
124 call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)
125 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
126 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
127 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
128 if (msh%dfrmd_el(e))
then
129 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
130 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
132 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
133 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
134 call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)
135 call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)
136 call add3(w(1,1,1,e), tm1, tm2, lxyz)
140 call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)
142 call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))
144 call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)
145 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
146 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
147 call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
148 if (msh%dfrmd_el(e))
then
149 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
150 call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
151 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
152 call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
153 call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
154 call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
156 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
157 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
158 call col2(tmp3, coef%h1(1,1,1,e), lxyz)
159 call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)
161 call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))
163 call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)
164 call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
168 if (coef%ifh2)
call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)
subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
Defines a Matrix-vector product.
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Base type for a matrix-vector product providing .
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.