d1/d85/tensor_8f90_source.html

! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.

!

! The UChicago Argonne, LLC as Operator of Argonne National

! Laboratory holds copyright in the Software. The copyright holder

! reserves all rights except those expressly granted to licensees,

! and U.S. Government license rights.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

! 1. Redistributions of source code must retain the above copyright

! notice, this list of conditions and the disclaimer below.

!

! 2. Redistributions in binary form must reproduce the above copyright

! notice, this list of conditions and the disclaimer (as noted below)

! in the documentation and/or other materials provided with the

! distribution.

!

! 3. Neither the name of ANL nor the names of its contributors

! may be used to endorse or promote products derived from this software

! without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF

! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED

! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

!

! Additional BSD Notice

! ---------------------

! 1. This notice is required to be provided under our contract with

! the U.S. Department of Energy (DOE). This work was produced at

! Argonne National Laboratory under Contract

! No. DE-AC02-06CH11357 with the DOE.

!

! 2. Neither the United States Government nor UCHICAGO ARGONNE,

! LLC nor any of their employees, makes any warranty,

! express or implied, or assumes any liability or responsibility for the

! accuracy, completeness, or usefulness of any information, apparatus,

! product, or process disclosed, or represents that its use would not

! infringe privately-owned rights.

!

! 3. Also, reference herein to any specific commercial products, process,

! or services by trade name, trademark, manufacturer or otherwise does

! not necessarily constitute or imply its endorsement, recommendation,

! or favoring by the United States Government or UCHICAGO ARGONNE LLC.

! The views and opinions of authors expressed

! herein do not necessarily state or reflect those of the United States

! Government or UCHICAGO ARGONNE, LLC, and shall

! not be used for advertising or product endorsement purposes.

!

module tensor

  use tensor_xsmm

  use tensor_cpu

  use tensor_sx

  use tensor_device

  use num_types, only : rp

  use mxm_wrapper

  use neko_config

  use device

  use, intrinsic :: iso_c_binding, only : c_ptr

  implicit none

  private


  interface transpose

     module procedure trsp, trsp1

  interface transpose …

  end interface transpose


  interface triple_tensor_product

     module procedure triple_tensor_product_scalar, triple_tensor_product_vector

  interface triple_tensor_product …

  end interface triple_tensor_product


  public :: tensr3, transpose, trsp, trsp1, &

     tnsr2d_el, tnsr3d_el, tnsr3d, tnsr1_3d, addtnsr, &

     triple_tensor_product, tnsr3d_el_list


contains


  subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)

    integer :: nv

    integer :: nu

    real(kind=rp), intent(inout) :: v(nv, nv, nv)

    real(kind=rp), intent(inout) :: u(nu, nu, nu)

    real(kind=rp), intent(inout) :: w(nu*nu*nv)

    real(kind=rp), intent(inout) :: a(nv, nu)

    real(kind=rp), intent(inout) :: bt(nu, nv)

    real(kind=rp), intent(inout) :: ct(nu, nv)

    integer :: j, k, l, nunu, nvnv, nunv


    nunu = nu**2

    nvnv = nv**2

    nunv = nu*nv


    call mxm(a, nv, u, nu, v, nunu)

    k = 1

    l = 1

    do j = 1, nu

       call mxm(v(k, 1, 1), nv, bt, nu, w(l), nv)

       k = k + nunv

       l = l + nvnv

    end do

    call mxm(w, nvnv, ct, nu, v, nv)


  subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w) …

  end subroutine tensr3


  subroutine trsp(a, lda, b, ldb)

    integer, intent(in) :: lda

    integer, intent(in) :: ldb

    real(kind=rp), intent(inout) :: a(lda, ldb)

    real(kind=rp), intent(in) :: b(ldb, lda)

    integer :: i, j


    do j = 1, ldb

       do i = 1, lda

          a(i, j) = b(j, i)

       end do

    end do


  subroutine trsp(a, lda, b, ldb) …

  end subroutine trsp


  subroutine trsp1(a, n)

    integer, intent(in) :: n

    real(kind=rp), intent(inout) :: a(n, n)

    real(kind=rp) :: tmp

    integer :: i, j


    do j = 1, n

       do i = j + 1, n

          tmp = a(i, j)

          a(i, j) = a(j, i)

          a(j, i) = tmp

       end do

    end do


  subroutine trsp1(a, n) …

  end subroutine trsp1


  subroutine tnsr2d_el(v, nv, u, nu, A, Bt)

    integer, intent(in) :: nv, nu

    real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)

    real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)


    if (neko_bcknd_sx .eq. 1) then

       call tnsr2d_el_sx(v, nv, u, nu, a, bt)

    else if (neko_bcknd_xsmm .eq. 1) then

       call tnsr2d_el_xsmm(v, nv, u, nu, a, bt)

    else

       call tnsr2d_el_cpu(v, nv, u, nu, a, bt)

    end if


  subroutine tnsr2d_el(v, nv, u, nu, A, Bt) …

  end subroutine tnsr2d_el


  subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)

    integer, intent(in) :: nv, nu

    real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)

    real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)


    if (neko_bcknd_sx .eq. 1) then

       call tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)

    else if (neko_bcknd_xsmm .eq. 1) then

       call tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)

    else

       call tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)

    end if


  subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct) …

  end subroutine tnsr3d_el


  subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)

    integer, intent(in) :: nv, nu, n_pt, el_list(n_pt)

    real(kind=rp), intent(inout) :: v(nv*nv*nv, n_pt), u(nu*nu*nu,1)

    real(kind=rp), intent(inout) :: a(nv,nu,n_pt),bt(nu, nv,n_pt),ct(nu,nv,n_pt)

    type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d, el_list_d

    integer :: i


    if (neko_bcknd_sx .eq. 1) then

       do i = 1, n_pt

          call tnsr3d_el_sx(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))

       end do

    else if (neko_bcknd_xsmm .eq. 1) then

       do i = 1, n_pt

          call tnsr3d_el_xsmm(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))

       end do

    else if (neko_bcknd_device .eq. 1) then

       v_d = device_get_ptr(v)

       u_d = device_get_ptr(u)

       a_d = device_get_ptr(a)

       bt_d = device_get_ptr(bt)

       ct_d = device_get_ptr(ct)

       el_list_d = device_get_ptr(el_list)

       call tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, el_list_d, n_pt)

    else

       do i = 1, n_pt

          !       Note the use of el_list(i) + 1, because of the gslib C interface

          call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))

       end do

    end if


  subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt) …

  end subroutine tnsr3d_el_list


  subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)

    integer, intent(in) :: nv, nu, nelv

    real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)

    real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)

    real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)

    type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d


    if (neko_bcknd_sx .eq. 1) then

       call tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)

    else if (neko_bcknd_xsmm .eq. 1) then

       call tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)

    else if (neko_bcknd_device .eq. 1) then

       v_d = device_get_ptr(v)

       u_d = device_get_ptr(u)

       a_d = device_get_ptr(a)

       bt_d = device_get_ptr(bt)

       ct_d = device_get_ptr(ct)

       call tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)

    else

       call tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)

    end if


  subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv) …

  end subroutine tnsr3d


  subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)

    integer, intent(inout) :: nv, nu, nelv

    real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)

    real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)


    if (neko_bcknd_sx .eq. 1) then

       call tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)

    else if (neko_bcknd_xsmm .eq. 1) then

       call tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)

    else

       call tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)

    end if


  subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv) …

  end subroutine tnsr1_3d


  subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)


    integer, intent(in) :: nx, ny, nz

    real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)

    real(kind=rp), intent(inout) ::  s(nx, ny, nz)

    real(kind=rp) :: hh

    integer :: ix, iy, iz


    do iz = 1,nz

       do iy = 1,ny

          hh = h2(iy)*h3(iz)

          do ix = 1,nx

             s(ix,iy,iz) = s(ix,iy,iz)+hh*h1(ix)

          end do

       end do

    end do


  subroutine addtnsr(s, h1, h2, h3, nx, ny, nz) …

  end subroutine addtnsr


  subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)

    real(kind=rp), intent(inout) :: v

    integer, intent(in) :: nu

    real(kind=rp), intent(inout) :: u(nu,nu,nu)

    real(kind=rp), intent(inout) :: hr(nu)

    real(kind=rp), intent(inout) :: hs(nu)

    real(kind=rp), intent(inout) :: ht(nu)


    ! Artificially reshape v into a 1-dimensional array

    ! since this is what tnsr3d_el needs as input argument

    real(kind=rp) :: vv(1)

    ! vv(1) = v


    call tnsr3d_el(vv,1,u,nu,hr,hs,ht)


    v = vv(1)


  subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht) …

  end subroutine triple_tensor_product_scalar


  subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)

    real(kind=rp), intent(inout) :: v(3)

    integer, intent(in) :: nu

    real(kind=rp), intent(inout) :: u1(nu,nu,nu)

    real(kind=rp), intent(inout) :: u2(nu,nu,nu)

    real(kind=rp), intent(inout) :: u3(nu,nu,nu)

    real(kind=rp), intent(inout) :: hr(nu)

    real(kind=rp), intent(inout) :: hs(nu)

    real(kind=rp), intent(inout) :: ht(nu)


    call triple_tensor_product_scalar(v(1), u1, nu, hr, hs, ht)

    call triple_tensor_product_scalar(v(2), u2, nu, hr, hs, ht)

    call triple_tensor_product_scalar(v(3), u3, nu, hr, hs, ht)


  subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht) …

  end subroutine triple_tensor_product_vector


end module tensor

device::device_get_ptr
Return the device pointer for an associated Fortran array.
Definition device.F90:95

tensor::transpose
Definition tensor.f90:74

tensor::triple_tensor_product
Definition tensor.f90:78

device
Device abstraction, common interface for various accelerators.
Definition device.F90:34

mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition mxm_wrapper.F90:2

mxm_wrapper::mxm
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product  for contiguously packed matrices A,B, and C.
Definition mxm_wrapper.F90:29

neko_config
Build configurations.
Definition neko_config.f90:34

neko_config::neko_bcknd_sx
integer, parameter neko_bcknd_sx
Definition neko_config.f90:39

neko_config::neko_bcknd_device
integer, parameter neko_bcknd_device
Definition neko_config.f90:44

neko_config::neko_bcknd_xsmm
integer, parameter neko_bcknd_xsmm
Definition neko_config.f90:40

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

tensor_cpu
Definition tensor_cpu.f90:1

tensor_cpu::tnsr3d_cpu
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_cpu.f90:878

tensor_cpu::tnsr2d_el_cpu
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
Definition tensor_cpu.f90:12

tensor_cpu::tnsr1_3d_cpu
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
Definition tensor_cpu.f90:1047

tensor_cpu::tnsr3d_el_cpu
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
Definition tensor_cpu.f90:23

tensor_device
Definition tensor_device.F90:33

tensor_device::tnsr3d_el_list_device
subroutine, public tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, elements, n_points)
Definition tensor_device.F90:110

tensor_device::tnsr3d_device
subroutine, public tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
Definition tensor_device.F90:96

tensor_sx
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2

tensor_sx::tnsr1_3d_sx
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:255

tensor_sx::tnsr3d_sx
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75

tensor_sx::tnsr2d_el_sx
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13

tensor_sx::tnsr3d_el_sx
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24

tensor_xsmm
Tensor operations libxsmm backend.
Definition tensor_xsmm.F90:61

tensor_xsmm::tnsr3d_el_xsmm
subroutine, public tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
Definition tensor_xsmm.F90:83

tensor_xsmm::tnsr3d_xsmm
subroutine, public tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_xsmm.F90:102

tensor_xsmm::tnsr2d_el_xsmm
subroutine, public tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
Definition tensor_xsmm.F90:72

tensor_xsmm::tnsr1_3d_xsmm
subroutine, public tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
Definition tensor_xsmm.F90:124

tensor
Tensor operations.
Definition tensor.f90:61

tensor::tnsr3d_el_list
subroutine, public tnsr3d_el_list(v, nv, u, nu, a, bt, ct, el_list, n_pt)
Tensor product  performed on a subset of the elements.
Definition tensor.f90:188

tensor::tnsr3d_el
subroutine, public tnsr3d_el(v, nv, u, nu, a, bt, ct)
Tensor product  performed on a single element.
Definition tensor.f90:171

tensor::trsp1
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137

tensor::addtnsr
subroutine, public addtnsr(s, h1, h2, h3, nx, ny, nz)
Maps and adds to S a tensor product form of the three functions H1,H2,H3. This is a single element ro...
Definition tensor.f90:266

tensor::trsp
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:121

tensor::triple_tensor_product_scalar
subroutine triple_tensor_product_scalar(v, u, nu, hr, hs, ht)
Computes the tensor product . This operation is usually performed for spectral interpolation of a sca...
Definition tensor.f90:299

tensor::tnsr3d
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product  performed on nelv elements.
Definition tensor.f90:223

tensor::tnsr1_3d
subroutine, public tnsr1_3d(v, nv, nu, a, bt, ct, nelv)
In place tensor product .
Definition tensor.f90:249

tensor::tensr3
subroutine, public tensr3(v, nv, u, nu, a, bt, ct, w)
Tensor product .
Definition tensor.f90:91

tensor::triple_tensor_product_vector
subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, hr, hs, ht)
Computes the tensor product on a vector field . This operation is usually performed for spectral inte...
Definition tensor.f90:335

tensor::tnsr2d_el
subroutine, public tnsr2d_el(v, nv, u, nu, a, bt)
Computes .
Definition tensor.f90:154