Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
ax_helm_xsmm.F90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61 use ax_helm, only : ax_helm_t
62 use num_types, only : rp
63 use coefs, only : coef_t
64 use space, only : space_t
65 use mesh, only : mesh_t
66 use mxm_wrapper
67 use num_types
68#ifdef HAVE_LIBXSMM
69 use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc
70#endif
71 implicit none
72 private
73
74 type, public, extends(ax_helm_t) :: ax_helm_xsmm_t
75 contains
76 procedure, nopass :: compute => ax_helm_xsmm_compute
77 end type ax_helm_xsmm_t
78
79contains
80
81 subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
82 type(mesh_t), intent(inout) :: msh
83 type(space_t), intent(inout) :: Xh
84 type(coef_t), intent(inout) :: coef
85 real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
86 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
87#ifdef HAVE_LIBXSMM
88 real(kind=rp) :: dudr(xh%lx,xh%ly,xh%lz)
89 real(kind=rp) :: duds(xh%lx,xh%ly,xh%lz)
90 real(kind=rp) :: dudt(xh%lx,xh%ly,xh%lz)
91 real(kind=rp) :: tmp1(xh%lx,xh%ly,xh%lz)
92 real(kind=rp) :: tmp2(xh%lx,xh%ly,xh%lz)
93 real(kind=rp) :: tmp3(xh%lx,xh%ly,xh%lz)
94 real(kind=rp) :: tm1(xh%lx,xh%ly,xh%lz)
95 real(kind=rp) :: tm2(xh%lx,xh%ly,xh%lz)
96 real(kind=rp) :: tm3(xh%lx,xh%ly,xh%lz)
97 integer :: e, k, lxy, lxz, lyz, lxyz
98 type(libxsmm_dmmfunction), save :: ax_helm_xmm1
99 type(libxsmm_dmmfunction), save :: ax_helm_xmm2
100 type(libxsmm_dmmfunction), save :: ax_helm_xmm3
101 integer, save :: ax_helm_xsmm_lx = 0
102 logical, save :: ax_helm_xsmm_init = .false.
103
104 lxy = xh%lx*xh%ly
105 lxz = xh%lx*xh%lz
106 lyz = xh%ly*xh%lz
107 lxyz = xh%lx*xh%ly*xh%lz
108
109 if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx)) then
110 call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
111 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
112 call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &
113 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
114 call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
115 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
116 ax_helm_xsmm_init = .true.
117 ax_helm_xsmm_lx = xh%lx
118 end if
119
120
121 do e = 1, msh%nelv
122 if(msh%gdim .eq. 2) then
123 call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)
124 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
125 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
126 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
127 if (msh%dfrmd_el(e)) then
128 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
129 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
130 end if
131 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
132 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
133 call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)
134 call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)
135 call add3(w(1,1,1,e), tm1, tm2, lxyz)
136
137 ! 3D evaluation!
138 else
139 call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)
140 do k = 1,xh%lz
141 call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))
142 end do
143 call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)
144 call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
145 call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
146 call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
147 if (msh%dfrmd_el(e)) then
148 call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
149 call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
150 call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
151 call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
152 call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
153 call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
154 end if
155 call col2(tmp1, coef%h1(1,1,1,e), lxyz)
156 call col2(tmp2, coef%h1(1,1,1,e), lxyz)
157 call col2(tmp3, coef%h1(1,1,1,e), lxyz)
158 call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)
159 do k = 1,xh%lz
160 call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))
161 end do
162 call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)
163 call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
164 end if
165 end do
166
167 if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)
168#endif
169
170 end subroutine ax_helm_xsmm_compute
171
172end module ax_helm_xsmm
subroutine ax_helm_xsmm_compute(w, u, coef, msh, xh)
Coefficients.
Definition coef.f90:34
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Matrix-vector product for a Helmholtz problem.
Definition ax_helm.f90:44
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62