dd/de0/ax__helm__xsmm_8F90_source.html

! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.

!

! The UChicago Argonne, LLC as Operator of Argonne National

! Laboratory holds copyright in the Software. The copyright holder

! reserves all rights except those expressly granted to licensees,

! and U.S. Government license rights.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

! 1. Redistributions of source code must retain the above copyright

! notice, this list of conditions and the disclaimer below.

!

! 2. Redistributions in binary form must reproduce the above copyright

! notice, this list of conditions and the disclaimer (as noted below)

! in the documentation and/or other materials provided with the

! distribution.

!

! 3. Neither the name of ANL nor the names of its contributors

! may be used to endorse or promote products derived from this software

! without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF

! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED

! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

!

! Additional BSD Notice

! ---------------------

! 1. This notice is required to be provided under our contract with

! the U.S. Department of Energy (DOE). This work was produced at

! Argonne National Laboratory under Contract

! No. DE-AC02-06CH11357 with the DOE.

!

! 2. Neither the United States Government nor UCHICAGO ARGONNE,

! LLC nor any of their employees, makes any warranty,

! express or implied, or assumes any liability or responsibility for the

! accuracy, completeness, or usefulness of any information, apparatus,

! product, or process disclosed, or represents that its use would not

! infringe privately-owned rights.

!

! 3. Also, reference herein to any specific commercial products, process,

! or services by trade name, trademark, manufacturer or otherwise does

! not necessarily constitute or imply its endorsement, recommendation,

! or favoring by the United States Government or UCHICAGO ARGONNE LLC.

! The views and opinions of authors expressed

! herein do not necessarily state or reflect those of the United States

! Government or UCHICAGO ARGONNE, LLC, and shall

! not be used for advertising or product endorsement purposes.

!

module ax_helm_xsmm

  use ax_helm, only : ax_helm_t

  use num_types, only : rp

  use coefs, only : coef_t

  use space, only : space_t

  use mesh, only : mesh_t

  use mxm_wrapper

  use num_types

#ifdef HAVE_LIBXSMM

  use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc

#endif

  implicit none

  private


  type, public, extends(ax_helm_t) :: ax_helm_xsmm_t

   contains

     procedure, nopass :: compute => ax_helm_xsmm_compute

  end type ax_helm_xsmm_t

  type, public, extends(ax_helm_t) :: ax_helm_xsmm_t …


contains


  subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)

    type(mesh_t), intent(in) :: msh

    type(space_t), intent(in) :: Xh

    type(coef_t), intent(in) :: coef

    real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)

    real(kind=rp), intent(in) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)

#ifdef HAVE_LIBXSMM

    real(kind=rp) :: dudr(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: duds(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: dudt(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tmp1(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tmp2(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tmp3(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tm1(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tm2(xh%lx,xh%ly,xh%lz)

    real(kind=rp) :: tm3(xh%lx,xh%ly,xh%lz)

    integer :: e, k, lxy, lxz, lyz, lxyz

    type(libxsmm_dmmfunction), save :: ax_helm_xmm1

    type(libxsmm_dmmfunction), save :: ax_helm_xmm2

    type(libxsmm_dmmfunction), save :: ax_helm_xmm3

    integer, save :: ax_helm_xsmm_lx = 0

    logical, save :: ax_helm_xsmm_init = .false.


    lxy = xh%lx*xh%ly

    lxz = xh%lx*xh%lz

    lyz = xh%ly*xh%lz

    lxyz = xh%lx*xh%ly*xh%lz


    if (.not. ax_helm_xsmm_init .or. (ax_helm_xsmm_lx .ne. xh%lx)) then

       call libxsmm_dispatch(ax_helm_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &

            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)

       call libxsmm_dispatch(ax_helm_xmm2, xh%lx, xh%ly, xh%ly, &

            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)

       call libxsmm_dispatch(ax_helm_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &

            alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)

       ax_helm_xsmm_init = .true.

       ax_helm_xsmm_lx = xh%lx

    end if


    do e = 1, msh%nelv

       if(msh%gdim .eq. 2) then

          call mxm(xh%dx, xh%lx,u(1,1,1,e), xh%lx, dudr, lyz)

          call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, duds, xh%ly)

          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)

          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)

          if (msh%dfrmd_el(e)) then

             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)

             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)

          end if

          call col2(tmp1, coef%h1(1,1,1,e), lxyz)

          call col2(tmp2, coef%h1(1,1,1,e), lxyz)

          call mxm(xh%dxt, xh%lx, tmp1, xh%lx, tm1, lyz)

          call mxm(tmp2, xh%lx, xh%dy, xh%ly, tm2, xh%ly)

          call add3(w(1,1,1,e), tm1, tm2, lxyz)


          ! 3D evaluation!

       else

          call libxsmm_mmcall(ax_helm_xmm1, xh%dx, u(1,1,1,e), dudr)

          do k = 1,xh%lz

             call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), xh%dyt, duds(1,1,k))

          end do

          call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), xh%dzt, dudt)

          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)

          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)

          call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)

          if (msh%dfrmd_el(e)) then

             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)

             call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)

             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)

             call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)

             call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)

             call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)

          end if

          call col2(tmp1, coef%h1(1,1,1,e), lxyz)

          call col2(tmp2, coef%h1(1,1,1,e), lxyz)

          call col2(tmp3, coef%h1(1,1,1,e), lxyz)

          call libxsmm_mmcall(ax_helm_xmm1, xh%dxt, tmp1, tm1)

          do k = 1,xh%lz

             call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), xh%dy, tm2(1,1,k))

          end do

          call libxsmm_mmcall(ax_helm_xmm3, tmp3, xh%dz, tm3)

          call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)

       end if

    end do


    if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)

#endif


  subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh) …

  end subroutine ax_helm_xsmm_compute


end module ax_helm_xsmm

ax_helm_xsmm
Definition ax_helm_xsmm.F90:60

ax_helm_xsmm::ax_helm_xsmm_compute
subroutine ax_helm_xsmm_compute(w, u, coef, msh, xh)
Definition ax_helm_xsmm.F90:82

ax_helm
Definition ax_helm.f90:33

coefs
Coefficients.
Definition coef.f90:34

mesh
Defines a mesh.
Definition mesh.f90:34

mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition mxm_wrapper.F90:2

mxm_wrapper::mxm
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product  for contiguously packed matrices A,B, and C.
Definition mxm_wrapper.F90:29

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

space
Defines a function space.
Definition space.f90:34

ax_helm::ax_helm_t
Matrix-vector product for a Helmholtz problem.
Definition ax_helm.f90:44

ax_helm_xsmm::ax_helm_xsmm_t
Definition ax_helm_xsmm.F90:74

coefs::coef_t
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55

mesh::mesh_t
Definition mesh.f90:64

space::space_t
The function space for the SEM solution fields.
Definition space.f90:62