d0/da4/space_8f90_source.html

! Copyright (c) 2019-2022, The Neko Authors

! All rights reserved.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

!   * Redistributions of source code must retain the above copyright

!     notice, this list of conditions and the following disclaimer.

!

!   * Redistributions in binary form must reproduce the above

!     copyright notice, this list of conditions and the following

!     disclaimer in the documentation and/or other materials provided

!     with the distribution.

!

!   * Neither the name of the authors nor the names of its

!     contributors may be used to endorse or promote products derived

!     from this software without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

! POSSIBILITY OF SUCH DAMAGE.

!

module space

  use neko_config

  use num_types, only : rp

  use speclib

  use device

  use utils, only : neko_error

  use fast3d, only : setup_intp

  use math

  use tensor, only : trsp1

  use mxm_wrapper, only: mxm

  use, intrinsic :: iso_c_binding

  implicit none

  private


  integer, public, parameter :: gl = 0, gll = 1, gj = 2


  type, public :: space_t

     integer :: t

     integer :: lx

     integer :: ly

     integer :: lz

     integer :: lxy

     integer :: lyz

     integer :: lxz

     integer :: lxyz


     real(kind=rp), allocatable :: zg(:,:)


     real(kind=rp), allocatable :: dr_inv(:)

     real(kind=rp), allocatable :: ds_inv(:)

     real(kind=rp), allocatable :: dt_inv(:)


     real(kind=rp), allocatable :: wx(:)

     real(kind=rp), allocatable :: wy(:)

     real(kind=rp), allocatable :: wz(:)


     real(kind=rp), allocatable :: w3(:,:,:)


     real(kind=rp), allocatable :: dx(:,:)

     real(kind=rp), allocatable :: dy(:,:)

     real(kind=rp), allocatable :: dz(:,:)


     real(kind=rp), allocatable :: dxt(:,:)

     real(kind=rp), allocatable :: dyt(:,:)

     real(kind=rp), allocatable :: dzt(:,:)


     real(kind=rp), allocatable :: v(:,:)

     real(kind=rp), allocatable :: vt(:,:)

     real(kind=rp), allocatable :: vinv(:,:)

     real(kind=rp), allocatable :: vinvt(:,:)

     real(kind=rp), allocatable :: w(:,:)


     !

     ! Device pointers (if present)

     !

     type(c_ptr) :: dr_inv_d = c_null_ptr

     type(c_ptr) :: ds_inv_d = c_null_ptr

     type(c_ptr) :: dt_inv_d = c_null_ptr

     type(c_ptr) :: dxt_d = c_null_ptr

     type(c_ptr) :: dyt_d = c_null_ptr

     type(c_ptr) :: dzt_d = c_null_ptr

     type(c_ptr) :: dx_d = c_null_ptr

     type(c_ptr) :: dy_d = c_null_ptr

     type(c_ptr) :: dz_d = c_null_ptr

     type(c_ptr) :: wx_d = c_null_ptr

     type(c_ptr) :: wy_d = c_null_ptr

     type(c_ptr) :: wz_d = c_null_ptr

     type(c_ptr) :: zg_d = c_null_ptr

     type(c_ptr) :: w3_d = c_null_ptr

     type(c_ptr) :: v_d = c_null_ptr

     type(c_ptr) :: vt_d = c_null_ptr

     type(c_ptr) :: vinv_d = c_null_ptr

     type(c_ptr) :: vinvt_d = c_null_ptr

     type(c_ptr) :: w_d = c_null_ptr

   contains

     procedure, pass(s) :: init => space_init

     procedure, pass(s) :: free => space_free


  end type space_t

  type, public :: space_t …


  interface operator(.eq.)

     module procedure space_eq

  interface operator(.eq.) …

  end interface operator(.eq.)


  interface operator(.ne.)

     module procedure space_ne

  interface operator(.ne.) …

  end interface operator(.ne.)


  public :: operator(.eq.), operator(.ne.)


contains


  subroutine space_init(s, t, lx, ly, lz)

    class(space_t), intent(inout) :: s

    integer, intent(in) :: t

    integer, intent(in) :: lx

    integer, intent(in) :: ly

    integer, optional, intent(in) :: lz

    integer :: ix, iy, iz


    call space_free(s)


    s%lx = lx

    s%ly = ly

    s%t = t

    if (present(lz)) then

       if (lz .ne. 1) then

          s%lz = lz

          if (lx .ne. ly .or. lx .ne. lz) then

             call neko_error("Unsupported polynomial dimension")

          end if

       end if

    else

       if (lx .ne. ly) then

          call neko_error("Unsupported polynomial dimension")

       end if

       s%lz = 1

    end if

    s%lxy = s%ly*s%lx

    s%lyz = s%ly*s%lz

    s%lxz = s%lx*s%lz

    s%lxyz = s%lx*s%ly*s%lz


    allocate(s%zg(lx, 3))


    allocate(s%wx(s%lx))

    allocate(s%wy(s%ly))

    allocate(s%wz(s%lz))


    allocate(s%dr_inv(s%lx))

    allocate(s%ds_inv(s%ly))

    allocate(s%dt_inv(s%lz))


    allocate(s%w3(s%lx, s%ly, s%lz))


    allocate(s%dx(s%lx, s%lx))

    allocate(s%dy(s%ly, s%ly))

    allocate(s%dz(s%lz, s%lz))


    allocate(s%dxt(s%lx, s%lx))

    allocate(s%dyt(s%ly, s%ly))

    allocate(s%dzt(s%lz, s%lz))


    allocate(s%v(s%lx, s%lx))

    allocate(s%vt(s%lx, s%lx))

    allocate(s%vinv(s%lx, s%lx))

    allocate(s%vinvt(s%lx, s%lx))

    allocate(s%w(s%lx, s%lx))


    ! Call low-level routines to compute nodes and quadrature weights

    if (t .eq. gll) then

       call zwgll(s%zg(1,1), s%wx, s%lx)

       call zwgll(s%zg(1,2), s%wy, s%ly)

       if (s%lz .gt. 1) then

          call zwgll(s%zg(1,3), s%wz, s%lz)

       else

          s%zg(:,3) = 0d0

          s%wz = 1d0

       end if

    else if (t .eq. gl) then

       call zwgl(s%zg(1,1), s%wx, s%lx)

       call zwgl(s%zg(1,2), s%wy, s%ly)

       if (s%lz .gt. 1) then

          call zwgl(s%zg(1,3), s%wz, s%lz)

       else

          s%zg(:,3) = 0d0

          s%wz = 1d0

       end if

    else

       call neko_error("Invalid quadrature rule")

    end if


    do iz = 1, s%lz

       do iy = 1, s%ly

          do ix = 1, s%lx

             s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)

          end do

       end do

    end do

    if (t .eq. gll) then

       call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)

       call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)

       if (s%lz .gt. 1) then

          call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)

       else

          s%dz = 0d0

          s%dzt = 0d0

       end if

    else if (t .eq. gl) then

       call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)

       call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)

       if (s%lz .gt. 1) then

          call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)

       else

          s%dz = 0d0

          s%dzt = 0d0

       end if

    else

       call neko_error("Invalid quadrature rule")

    end if


    call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)

    call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)

    if (s%lz .gt. 1) then

       call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)

    else

       s%dt_inv = 0d0

    end if


    if (neko_bcknd_device .eq. 1) then

       call device_map(s%dr_inv, s%dr_inv_d, s%lx)

       call device_map(s%ds_inv, s%ds_inv_d, s%lx)

       call device_map(s%dt_inv, s%dt_inv_d, s%lx)

       call device_map(s%wx, s%wx_d, s%lx)

       call device_map(s%wy, s%wy_d, s%lx)

       call device_map(s%wz, s%wz_d, s%lx)

       call device_map(s%dx, s%dx_d, s%lxy)

       call device_map(s%dy, s%dy_d, s%lxy)

       call device_map(s%dz, s%dz_d, s%lxy)

       call device_map(s%dxt, s%dxt_d, s%lxy)

       call device_map(s%dyt, s%dyt_d, s%lxy)

       call device_map(s%dzt, s%dzt_d, s%lxy)

       call device_map(s%w3, s%w3_d, s%lxyz)

       call device_map(s%v,     s%v_d,     s%lxy)

       call device_map(s%vt,    s%vt_d,    s%lxy)

       call device_map(s%vinv,  s%vinv_d,  s%lxy)

       call device_map(s%vinvt, s%vinvt_d, s%lxy)

       call device_map(s%w,     s%w_d,     s%lxy)


       call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync=.false.)

       call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync=.false.)

       call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync=.false.)


       ix = s%lx * 3

       call device_map(s%zg, s%zg_d, ix)

       call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync=.false.)

    end if


    call space_generate_transformation_matrices(s)


  subroutine space_init(s, t, lx, ly, lz) …

  end subroutine space_init


  subroutine space_free(s)

    class(space_t), intent(inout) :: s


    if (allocated(s%zg)) then

       deallocate(s%zg)

    end if


    if (allocated(s%wx)) then

       deallocate(s%wx)

    end if


    if (allocated(s%wy)) then

       deallocate(s%wy)

    end if


    if (allocated(s%wz)) then

       deallocate(s%wz)

    end if


    if (allocated(s%w3)) then

       deallocate(s%w3)

    end if


    if (allocated(s%dx)) then

       deallocate(s%dx)

    end if


    if (allocated(s%dy)) then

       deallocate(s%dy)

    end if


    if (allocated(s%dz)) then

       deallocate(s%dz)

    end if


    if (allocated(s%dxt)) then

       deallocate(s%dxt)

    end if


    if (allocated(s%dyt)) then

       deallocate(s%dyt)

    end if


    if (allocated(s%dzt)) then

       deallocate(s%dzt)

    end if


    if (allocated(s%dr_inv)) then

       deallocate(s%dr_inv)

    end if


    if (allocated(s%ds_inv)) then

       deallocate(s%ds_inv)

    end if


    if (allocated(s%dt_inv)) then

       deallocate(s%dt_inv)

    end if


    if(allocated(s%v)) then

       deallocate(s%v)

    end if


    if(allocated(s%vt)) then

       deallocate(s%vt)

    end if


    if(allocated(s%vinv)) then

       deallocate(s%vinv)

    end if


    if(allocated(s%vinvt)) then

       deallocate(s%vinvt)

    end if


    if(allocated(s%w)) then

       deallocate(s%w)

    end if


    !

    ! Cleanup the device (if present)

    !


    if (c_associated(s%dr_inv_d)) then

       call device_free(s%dr_inv_d)

    end if


    if (c_associated(s%ds_inv_d)) then

       call device_free(s%ds_inv_d)

    end if


    if (c_associated(s%dt_inv_d)) then

       call device_free(s%dt_inv_d)

    end if


    if (c_associated(s%dxt_d)) then

       call device_free(s%dxt_d)

    end if


    if (c_associated(s%dyt_d)) then

       call device_free(s%dyt_d)

    end if


    if (c_associated(s%dzt_d)) then

       call device_free(s%dzt_d)

    end if


    if (c_associated(s%dx_d)) then

       call device_free(s%dx_d)

    end if


    if (c_associated(s%dy_d)) then

       call device_free(s%dy_d)

    end if


    if (c_associated(s%dz_d)) then

       call device_free(s%dz_d)

    end if


    if (c_associated(s%wx_d)) then

       call device_free(s%wx_d)

    end if


    if (c_associated(s%wy_d)) then

       call device_free(s%wy_d)

    end if


    if (c_associated(s%wz_d)) then

       call device_free(s%wz_d)

    end if


    if (c_associated(s%w3_d)) then

       call device_free(s%w3_d)

    end if


    if (c_associated(s%zg_d)) then

       call device_free(s%zg_d)

    end if


    if (c_associated(s%v_d)) then

       call device_free(s%v_d)

    end if


    if (c_associated(s%vt_d)) then

       call device_free(s%vt_d)

    end if


    if (c_associated(s%vinv_d)) then

       call device_free(s%vinv_d)

    end if


    if (c_associated(s%vinvt_d)) then

       call device_free(s%vinvt_d)

    end if


    if (c_associated(s%w_d)) then

       call device_free(s%w_d)

    end if


  subroutine space_free(s) …

  end subroutine space_free


  pure function space_eq(Xh, Yh) result(res)

    type(space_t), intent(in) :: xh

    type(space_t), intent(in) :: yh

    logical :: res


    if ( (xh%lx .eq. yh%lx) .and. &

         (xh%ly .eq. yh%ly) .and. &

         (xh%lz .eq. yh%lz) ) then

       res = .true.

    else

       res = .false.

    end if


  pure function space_eq(Xh, Yh) result(res) …

  end function space_eq


  pure function space_ne(Xh, Yh) result(res)

    type(space_t), intent(in) :: xh

    type(space_t), intent(in) :: yh

    logical :: res


    if ( (xh%lx .eq. yh%lx) .and. &

         (xh%ly .eq. yh%ly) .and. &

         (xh%lz .eq. yh%lz) ) then

       res = .false.

    else

       res = .true.

    end if


  pure function space_ne(Xh, Yh) result(res) …

  end function space_ne


  subroutine space_compute_dist(dx, x, lx)

    integer, intent(in) :: lx

    real(kind=rp), intent(inout) :: dx(lx), x(lx)

    integer :: i

    dx(1) = x(2) - x(1)

    do i = 2, lx - 1

       dx(i) = 0.5*(x(i+1) - x(i-1))

    enddo

    dx(lx) = x(lx) - x(lx-1)

    do i = 1, lx

       dx(i) = 1.0_rp / dx(i)

    end do

  subroutine space_compute_dist(dx, x, lx) …

  end subroutine space_compute_dist


  subroutine space_generate_transformation_matrices(Xh)

    type(space_t), intent(inout) :: Xh


    real(kind=rp) :: l(0:xh%lx-1)

    real(kind=rp) :: delta(xh%lx)

    integer :: i, kj, j, j2, kk


    associate(v=> xh%v, vt => xh%vt, &

      vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)

      ! Get the Legendre polynomials for each point

      ! Then proceed to compose the transform matrix

      kj = 0

      do j = 1, xh%lx

         l(0) = 1.

         l(1) = xh%zg(j,1)

         do j2 = 2, xh%lx-1

            l(j2) = ( (2*j2-1) * xh%zg(j,1) * l(j2-1) &

                  - (j2-1) * l(j2-2) ) / j2

         end do

         do kk = 1, xh%lx

            kj = kj+1

            v(kj,1) = l(kk-1)

         end do

      end do


      ! transpose the matrix

      call trsp1(v, xh%lx)


      ! Calculate the nominal scaling factors

      do i = 1, xh%lx

         delta(i) = 2.0_rp / (2*(i-1)+1)

      end do

      ! modify last entry

      delta(xh%lx) = 2.0_rp / (xh%lx-1)


      ! calculate the inverse to multiply the matrix

      do i = 1, xh%lx

         delta(i) = sqrt(1.0_rp / delta(i))

      end do

      ! scale the matrix

      do i = 1, xh%lx

         do j = 1, xh%lx

            v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights

         end do

      end do


      ! get the trasposed

      call copy(vt, v, xh%lx * xh%lx)

      call trsp1(vt, xh%lx)


      !populate the mass matrix

      kk = 1

      do i = 1, xh%lx

         do j = 1, xh%lx

            if (i .eq. j) then

               w(i,j) = xh%wx(kk)

               kk = kk+1

            else

               w(i,j) = 0

            end if

         end do

      end do


      !Get the inverse of the transform matrix

      call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)


      !get the transposed of the inverse

      call copy(vinvt, vinv, xh%lx * xh%lx)

      call trsp1(vinvt, xh%lx)

    end associate


    ! Copy the data to the GPU

    ! Move all this to space.f90 to for next version

    if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &

    (neko_bcknd_opencl .eq. 1)) then


       call device_memcpy(xh%v,     xh%v_d,     xh%lxy, &

                          host_to_device, sync=.false.)

       call device_memcpy(xh%vt,    xh%vt_d,    xh%lxy, &

                          host_to_device, sync=.false.)

       call device_memcpy(xh%vinv,  xh%vinv_d,  xh%lxy, &

                          host_to_device, sync=.false.)

       call device_memcpy(xh%vinvt, xh%vinvt_d, xh%lxy, &

                          host_to_device, sync=.false.)

       call device_memcpy(xh%w,     xh%w_d,     xh%lxy, &

                          host_to_device, sync=.false.)


    end if


  subroutine space_generate_transformation_matrices(Xh) …

  end subroutine space_generate_transformation_matrices


end module space

device::device_map
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71

device::device_memcpy
Copy data between host and device (or device and device)
Definition device.F90:65

utils::neko_error
Definition utils.f90:42

device
Device abstraction, common interface for various accelerators.
Definition device.F90:34

device::host_to_device
integer, parameter, public host_to_device
Definition device.F90:46

device::device_free
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200

fast3d
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61

fast3d::setup_intp
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition fast3d.f90:243

math
Definition math.f90:60

math::copy
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238

mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition mxm_wrapper.F90:2

mxm_wrapper::mxm
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product  for contiguously packed matrices A,B, and C.
Definition mxm_wrapper.F90:29

neko_config
Build configurations.
Definition neko_config.f90:34

neko_config::neko_bcknd_hip
integer, parameter neko_bcknd_hip
Definition neko_config.f90:42

neko_config::neko_bcknd_device
integer, parameter neko_bcknd_device
Definition neko_config.f90:44

neko_config::neko_bcknd_opencl
integer, parameter neko_bcknd_opencl
Definition neko_config.f90:43

neko_config::neko_bcknd_cuda
integer, parameter neko_bcknd_cuda
Definition neko_config.f90:41

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

space
Defines a function space.
Definition space.f90:34

space::space_ne
pure logical function space_ne(xh, yh)
Check if .
Definition space.f90:490

space::space_eq
pure logical function space_eq(xh, yh)
Check if .
Definition space.f90:473

space::gll
integer, parameter, public gll
Definition space.f90:48

space::space_compute_dist
subroutine space_compute_dist(dx, x, lx)
Definition space.f90:505

space::gj
integer, parameter, public gj
Definition space.f90:48

space::space_free
subroutine space_free(s)
Deallocate a space s.
Definition space.f90:310

space::space_init
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition space.f90:148

space::gl
integer, parameter, public gl
Definition space.f90:48

space::space_generate_transformation_matrices
subroutine space_generate_transformation_matrices(xh)
Generate spectral tranform matrices.
Definition space.f90:522

speclib
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148

speclib::dgll
subroutine dgll(d, dt, z, nz, nzd)
Definition speclib.f90:865

speclib::zwgll
subroutine zwgll(z, w, np)
Definition speclib.f90:169

speclib::zwgl
subroutine zwgl(z, w, np)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition speclib.f90:161

tensor
Tensor operations.
Definition tensor.f90:61

tensor::trsp1
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137

utils
Utilities.
Definition utils.f90:35

space::space_t
The function space for the SEM solution fields.
Definition space.f90:62