69 use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc
82 type(
mesh_t),
intent(inout) :: msh
83 type(
space_t),
intent(inout) :: Xh
84 type(
coef_t),
intent(inout) :: coef
85 real(kind=
rp),
intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
86 real(kind=
rp),
intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
88 real(kind=
rp) :: dudr(xh%lx,xh%ly,xh%lz)
89 real(kind=
rp) :: duds(xh%lx,xh%ly,xh%lz)
90 real(kind=
rp) :: dudt(xh%lx,xh%ly,xh%lz)
91 real(kind=
rp) :: tmp1(xh%lx,xh%ly,xh%lz)
92 real(kind=
rp) :: tmp2(xh%lx,xh%ly,xh%lz)
93 real(kind=
rp) :: tmp3(xh%lx,xh%ly,xh%lz)
94 real(kind=
rp) :: tm1(xh%lx,xh%ly,xh%lz)
95 real(kind=
rp) :: tm2(xh%lx,xh%ly,xh%lz)
96 real(kind=
rp) :: tm3(xh%lx,xh%ly,xh%lz)
97 integer :: e, k, lxy, lxz, lyz, lxyz
98 type(libxsmm_dmmfunction),
save :: ax_helm_xmm1
99 type(libxsmm_dmmfunction),
save :: ax_helm_xmm2
100 type(libxsmm_dmmfunction),
save :: ax_helm_xmm3
101 integer,
save :: ax_helm_xsmm_lx = 0
102 logical,
save :: ax_helm_xsmm_init = .false.
107 lxyz = xh%lx*xh%ly*xh%lz
109 if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx))
then
110 call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
111 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
112 call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &
113 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
114 call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
115 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
116 ax_helm_xsmm_init = .true.
117 ax_helm_xsmm_lx = xh%lx
122 if(msh%gdim .eq. 2)
then
123 call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)
124 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
125 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
126 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
127 if (msh%dfrmd_el(e))
then
128 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
129 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
131 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
132 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
133 call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)
134 call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)
135 call add3(w(1,1,1,e), tm1, tm2, lxyz)
139 call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)
141 call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))
143 call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)
144 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
145 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
146 call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
147 if (msh%dfrmd_el(e))
then
148 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
149 call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
150 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
151 call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
152 call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
153 call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
155 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
156 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
157 call col2(tmp3, coef%h1(1,1,1,e), lxyz)
158 call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)
160 call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))
162 call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)
163 call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
167 if (coef%ifh2)
call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)
subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Matrix-vector product for a Helmholtz problem.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.