Neko  0.9.99
A portable framework for high-order spectral element flow simulations
ax_helm_xsmm.F90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
61  use ax_helm, only : ax_helm_t
62  use num_types, only : rp
63  use coefs, only : coef_t
64  use space, only : space_t
65  use mesh, only : mesh_t
66  use mxm_wrapper
67  use num_types
68 #ifdef HAVE_LIBXSMM
69  use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc
70 #endif
71  implicit none
72  private
73 
74  type, public, extends(ax_helm_t) :: ax_helm_xsmm_t
75  contains
76  procedure, nopass :: compute => ax_helm_xsmm_compute
77  end type ax_helm_xsmm_t
78 
79 contains
80 
81  subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
82  type(mesh_t), intent(inout) :: msh
83  type(space_t), intent(inout) :: Xh
84  type(coef_t), intent(inout) :: coef
85  real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
86  real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
87 #ifdef HAVE_LIBXSMM
88  real(kind=rp) :: dudr(xh%lx,xh%ly,xh%lz)
89  real(kind=rp) :: duds(xh%lx,xh%ly,xh%lz)
90  real(kind=rp) :: dudt(xh%lx,xh%ly,xh%lz)
91  real(kind=rp) :: tmp1(xh%lx,xh%ly,xh%lz)
92  real(kind=rp) :: tmp2(xh%lx,xh%ly,xh%lz)
93  real(kind=rp) :: tmp3(xh%lx,xh%ly,xh%lz)
94  real(kind=rp) :: tm1(xh%lx,xh%ly,xh%lz)
95  real(kind=rp) :: tm2(xh%lx,xh%ly,xh%lz)
96  real(kind=rp) :: tm3(xh%lx,xh%ly,xh%lz)
97  integer :: e, k, lxy, lxz, lyz, lxyz
98  type(libxsmm_dmmfunction), save :: ax_helm_xmm1
99  type(libxsmm_dmmfunction), save :: ax_helm_xmm2
100  type(libxsmm_dmmfunction), save :: ax_helm_xmm3
101  integer, save :: ax_helm_xsmm_lx = 0
102  logical, save :: ax_helm_xsmm_init = .false.
103 
104  lxy = xh%lx*xh%ly
105  lxz = xh%lx*xh%lz
106  lyz = xh%ly*xh%lz
107  lxyz = xh%lx*xh%ly*xh%lz
108 
109  if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx)) then
110  call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
111  alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
112  call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &
113  alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
114  call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
115  alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
116  ax_helm_xsmm_init = .true.
117  ax_helm_xsmm_lx = xh%lx
118  end if
119 
120 
121  do e = 1, msh%nelv
122  if(msh%gdim .eq. 2) then
123  call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)
124  call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
125  call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
126  call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
127  if (msh%dfrmd_el(e)) then
128  call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
129  call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
130  end if
131  call col2(tmp1, coef%h1(1,1,1,e), lxyz)
132  call col2(tmp2, coef%h1(1,1,1,e), lxyz)
133  call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)
134  call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)
135  call add3(w(1,1,1,e), tm1, tm2, lxyz)
136 
137  ! 3D evaluation!
138  else
139  call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)
140  do k = 1,xh%lz
141  call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))
142  end do
143  call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)
144  call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
145  call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
146  call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
147  if (msh%dfrmd_el(e)) then
148  call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
149  call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
150  call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
151  call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
152  call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
153  call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
154  end if
155  call col2(tmp1, coef%h1(1,1,1,e), lxyz)
156  call col2(tmp2, coef%h1(1,1,1,e), lxyz)
157  call col2(tmp3, coef%h1(1,1,1,e), lxyz)
158  call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)
159  do k = 1,xh%lz
160  call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))
161  end do
162  call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)
163  call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
164  end if
165  end do
166 
167  if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)
168 #endif
169 
170  end subroutine ax_helm_xsmm_compute
171 
172 end module ax_helm_xsmm
subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
Coefficients.
Definition: coef.f90:34
Defines a mesh.
Definition: mesh.f90:34
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
Matrix-vector product for a Helmholtz problem.
Definition: ax_helm.f90:44
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
The function space for the SEM solution fields.
Definition: space.f90:62