d0/da2/fdm_8f90_source.html

! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.

!

! The UChicago Argonne, LLC as Operator of Argonne National

! Laboratory holds copyright in the Software. The copyright holder

! reserves all rights except those expressly granted to licensees,

! and U.S. Government license rights.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

! 1. Redistributions of source code must retain the above copyright

! notice, this list of conditions and the disclaimer below.

!

! 2. Redistributions in binary form must reproduce the above copyright

! notice, this list of conditions and the disclaimer (as noted below)

! in the documentation and/or other materials provided with the

! distribution.

!

! 3. Neither the name of ANL nor the names of its contributors

! may be used to endorse or promote products derived from this software

! without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF

! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED

! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

!

! Additional BSD Notice

! ---------------------

! 1. This notice is required to be provided under our contract with

! the U.S. Department of Energy (DOE). This work was produced at

! Argonne National Laboratory under Contract

! No. DE-AC02-06CH11357 with the DOE.

!

! 2. Neither the United States Government nor UCHICAGO ARGONNE,

! LLC nor any of their employees, makes any warranty,

! express or implied, or assumes any liability or responsibility for the

! accuracy, completeness, or usefulness of any information, apparatus,

! product, or process disclosed, or represents that its use would not

! infringe privately-owned rights.

!

! 3. Also, reference herein to any specific commercial products, process,

! or services by trade name, trademark, manufacturer or otherwise does

! not necessarily constitute or imply its endorsement, recommendation,

! or favoring by the United States Government or UCHICAGO ARGONNE LLC.

! The views and opinions of authors expressed

! herein do not necessarily state or reflect those of the United States

! Government or UCHICAGO ARGONNE, LLC, and shall

! not be used for advertising or product endorsement purposes.

!

module fdm

  use neko_config

  use num_types

  use speclib

  use math

  use mesh

  use space

  use dofmap

  use gather_scatter

  use fast3d

  use tensor

  use fdm_sx

  use fdm_xsmm

  use fdm_cpu

  use fdm_device

  use device

  use utils

  use comm, only : pe_rank

  use, intrinsic :: iso_c_binding

  implicit none

  private


  type, public :: fdm_t

     real(kind=rp), allocatable :: s(:,:,:,:)

     real(kind=rp), allocatable :: d(:,:)

     type(c_ptr) :: s_d = c_null_ptr

     type(c_ptr) :: d_d = c_null_ptr

     real(kind=rp), allocatable :: len_lr(:), len_ls(:), len_lt(:)

     real(kind=rp), allocatable :: len_mr(:), len_ms(:), len_mt(:)

     real(kind=rp), allocatable :: len_rr(:), len_rs(:), len_rt(:)

     real(kind=rp), allocatable :: swplen(:,:,:,:)

     type(c_ptr) :: swplen_d = c_null_ptr

     type(space_t), pointer :: xh

     type(dofmap_t), pointer :: dof

     type(gs_t), pointer :: gs_h

     type(mesh_t), pointer :: msh

   contains

     procedure, pass(this) :: init => fdm_init

     procedure, pass(this) :: free => fdm_free

     procedure, pass(this) :: compute => fdm_compute

  end type fdm_t

  type, public :: fdm_t …


  interface sygv

     module procedure sp_sygv, dp_sygv, qp_sygv

  interface sygv …

  end interface sygv


contains


  subroutine fdm_init(this, Xh, dm, gs_h)

    class(fdm_t), intent(inout) :: this

    type(space_t), target, intent(inout) :: Xh

    type(dofmap_t), target, intent(inout) :: dm

    type(gs_t), target, intent(inout) :: gs_h

    !We only really use ah, bh

    real(kind=rp), dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh

    real(kind=rp), dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh

    integer :: nl, n, nelv


    n = xh%lx -1 !Polynomnial degree

    nl = xh%lx + 2 !Schwarz!

    nelv = dm%msh%nelv

    call fdm_free(this)

    allocate(this%s(nl*nl,2,dm%msh%gdim, dm%msh%nelv))

    allocate(this%d(nl**3,dm%msh%nelv))

    allocate(this%swplen(xh%lx, xh%lx, xh%lx,dm%msh%nelv))

    allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))

    allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))

    allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))


    ! Zeroing here enables easier debugging since then

    ! MPI messages in GS are deterministic

    call rzero(this%swplen, xh%lxyz * dm%msh%nelv)


    if (neko_bcknd_device .eq. 1) then

       call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv)

       call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv)

       call device_map(this%swplen,this%swplen_d, xh%lxyz*dm%msh%nelv)

    end if


    call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)

    this%Xh => xh

    this%dof => dm

    this%gs_h => gs_h

    this%msh => dm%msh


    call swap_lengths(this, dm%x, dm%y, dm%z, dm%msh%nelv, dm%msh%gdim)


    call fdm_setup_fast(this, ah, bh, nl, n)


    if (neko_bcknd_device .eq. 1) then

       call device_memcpy(this%s, this%s_d, &

            nl*nl*2*dm%msh%gdim*dm%msh%nelv, host_to_device, sync=.false.)

       call device_memcpy(this%d, this%d_d, &

            nl**dm%msh%gdim*dm%msh%nelv, host_to_device, sync=.false.)

       call device_memcpy(this%swplen, this%swplen_d, &

            xh%lxyz*dm%msh%nelv, host_to_device, sync=.false.)

    end if

  subroutine fdm_init(this, Xh, dm, gs_h) …

  end subroutine fdm_init


  subroutine swap_lengths(this, x, y, z, nelv, gdim)

    type(fdm_t), intent(inout) :: this

    integer, intent(in) :: gdim, nelv

    real(kind=rp), dimension(this%Xh%lxyz,nelv) , intent(in):: x, y, z

    integer :: j, k, e, n2, nz0, nzn, nx, lx1, n


    associate(l => this%swplen, xh =>this%Xh, &

         llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &

         lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &

         lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)

      lx1 = this%Xh%lx

      n2 = lx1 - 1

      nz0 = 1

      nzn = 1

      nx  = lx1 - 2

      if (gdim .eq. 3) then

         nz0 = 0

         nzn = n2

      end if

      call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&

           nx, n2, nz0, nzn, nelv, gdim)

      n = n2 + 1

      if (gdim .eq. 3) then

         do e = 1,nelv

            do j = 2,n2

               do k = 2,n2

                  l(1,k,j,e) = lmr(e)

                  l(n,k,j,e) = lmr(e)

                  l(k,1,j,e) = lms(e)

                  l(k,n,j,e) = lms(e)

                  l(k,j,1,e) = lmt(e)

                  l(k,j,n,e) = lmt(e)

               end do

            end do

         end do

         if (neko_bcknd_device .eq. 1) then

            call device_memcpy(l, this%swplen_d, this%dof%size(), &

                               host_to_device, sync=.false.)

            call this%gs_h%op(l, this%dof%size(), gs_op_add)

            call device_memcpy(l, this%swplen_d, this%dof%size(), &

                               device_to_host, sync=.false.)

         else

            call this%gs_h%op(l, this%dof%size(), gs_op_add)

         end if


         do e = 1,nelv

            llr(e) = l(1,2,2,e) - lmr(e)

            lrr(e) = l(n,2,2,e) - lmr(e)

            lls(e) = l(2,1,2,e) - lms(e)

            lrs(e) = l(2,n,2,e) - lms(e)

            llt(e) = l(2,2,1,e) - lmt(e)

            lrt(e) = l(2,2,n,e) - lmt(e)

         end do

      else

         do e = 1,nelv

            do j = 2,n2

               l(1,j,1,e) = lmr(e)

               l(n,j,1,e) = lmr(e)

               l(j,1,1,e) = lms(e)

               l(j,n,1,e) = lms(e)

            end do

         end do


         if (neko_bcknd_device .eq. 1) then

            call device_memcpy(l, this%swplen_d, this%dof%size(), &

                               host_to_device, sync=.false.)

            call this%gs_h%op(l, this%dof%size(), gs_op_add)

            call device_memcpy(l, this%swplen_d, this%dof%size(), &

                               device_to_host, sync=.true.)

         else

            call this%gs_h%op(l, this%dof%size(), gs_op_add)

         end if


         do e = 1,nelv

            llr(e) = l(1,2,1,e) - lmr(e)

            lrr(e) = l(n,2,1,e) - lmr(e)

            lls(e) = l(2,1,1,e) - lms(e)

            lrs(e) = l(2,n,1,e) - lms(e)

         end do

      end if

    end associate

  subroutine swap_lengths(this, x, y, z, nelv, gdim) …

  end subroutine swap_lengths


  subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, &

                         nx, nxn, nz0, nzn, nelv, gdim)

    integer, intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0

    real(kind=rp), intent(inout) :: lr(nelv), ls(nelv), lt(nelv)

    real(kind=rp), intent(inout) :: w(nx)

    real(kind=rp), intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)

    real(kind=rp), intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)

    real(kind=rp), intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)

    real(kind=rp) ::  lr2, ls2, lt2, weight, wsum

    integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie

    ny = nx

    nz = nx

    j1 = i1

    k1 = i1

    j2 = i2

    k2 = i2

    !   Now, for each element, compute lr,ls,lt between specified planes

    do ie = 1,nelv

       if (gdim .eq. 3) then

          lr2  = 0d0

          wsum = 0d0

          do k = 1,nz

             do j = 1,ny

                weight = w(j)*w(k)

                lr2  = lr2  +   weight /&

                     ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&

                     +   (y(i2,j,k,ie)-y(i1,j,k,ie))**2&

                     +   (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )

                wsum = wsum + weight

             end do

          end do

          lr2     = lr2/wsum

          lr(ie)  = 1d0/sqrt(lr2)

          ls2 = 0d0

          wsum = 0d0

          do k = 1,nz

             do i = 1,nx

                weight = w(i)*w(k)

                ls2  = ls2  +   weight / &

                     ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &

                     +   (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &

                     +   (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )

                wsum = wsum + weight

             end do

          end do

          ls2     = ls2/wsum

          ls(ie)  = 1d0/sqrt(ls2)

          lt2 = 0d0

          wsum = 0d0

          do j=1,ny

             do i=1,nx

                weight = w(i)*w(j)

                lt2  = lt2  +   weight / &

                     ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &

                     +   (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &

                     +   (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )

                wsum = wsum + weight

             end do

          end do

          lt2     = lt2/wsum

          lt(ie)  = 1d0/sqrt(lt2)

       else              ! 2D

          lr2 = 0d0

          wsum = 0d0

          do j=1,ny

             weight = w(j)

             lr2  = lr2  + weight / &

                          ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &

                          + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )

             wsum = wsum + weight

          enddo

          lr2     = lr2/wsum

          lr(ie)  = 1d0/sqrt(lr2)

          ls2 = 0d0

          wsum = 0d0

          do i=1,nx

             weight = w(i)

             ls2  = ls2  + weight / &

                          ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &

                        +   (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )

             wsum = wsum + weight

          enddo

          ls2     = ls2/wsum

          ls(ie)  = 1d0/sqrt(ls2)

       endif

    enddo

    ie = 1014

  subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, & …

  end subroutine plane_space


  subroutine fdm_setup_fast(this, ah, bh, nl, n)

    integer, intent(in) :: nl, n

    type(fdm_t), intent(inout) :: this

    real(kind=rp), intent(inout) ::  ah(n+1,n+1), bh(n+1)

    real(kind=rp), dimension(2*this%Xh%lx + 4) :: lr, ls, lt

    integer :: i, j, k

    integer :: ie, il, nr, ns, nt

    integer :: lbr, rbr, lbs, rbs, lbt, rbt

    real(kind=rp) :: eps, diag


    associate(s => this%s, d => this%d, &

              llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &

              lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &

              lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)

      do ie=1,this%dof%msh%nelv

         lbr = this%dof%msh%facet_type(1, ie)

         rbr = this%dof%msh%facet_type(2, ie)

         lbs = this%dof%msh%facet_type(3, ie)

         rbs = this%dof%msh%facet_type(4, ie)

         lbt = this%dof%msh%facet_type(5, ie)

         rbt = this%dof%msh%facet_type(6, ie)


         nr = nl

         ns = nl

         nt = nl

         call fdm_setup_fast1d(s(1,1,1,ie), lr, nr, lbr, rbr, &

              llr(ie), lmr(ie), lrr(ie), ah, bh, n)

         call fdm_setup_fast1d(s(1,1,2,ie), ls, ns, lbs, rbs, &

              lls(ie), lms(ie), lrs(ie), ah, bh, n)

         if(this%dof%msh%gdim .eq. 3) then

            call fdm_setup_fast1d(s(1,1,3,ie), lt, nt, lbt, rbt, &

                 llt(ie), lmt(ie), lrt(ie), ah, bh, n)

         end if


         il = 1

         if(.not. this%dof%msh%gdim .eq. 3) then

            eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))

            do j = 1, ns

               do i = 1, nr

                  diag = lr(i) + ls(j)

                  if (diag .gt. eps) then

                     d(il,ie) = 1.0_rp / diag

                  else

                     d(il,ie) = 0.0_rp

                  endif

                  il = il + 1

               end do

            end do

         else

            eps = 1d-5 * (vlmax(lr(2), nr-2) + &

                 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))

            do k = 1, nt

               do j = 1, ns

                  do i = 1, nr

                     diag = lr(i) + ls(j) + lt(k)

                     if (diag .gt. eps) then

                        d(il,ie) = 1.0_rp / diag

                     else

                        d(il,ie) = 0.0_rp

                     endif

                     il = il + 1

                  end do

               end do

            end do

         endif

      end do

    end associate


  subroutine fdm_setup_fast(this, ah, bh, nl, n) …

  end subroutine fdm_setup_fast


  subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)

    integer, intent(in)  :: nl, lbc, rbc, n

    real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr

    real(kind=rp), intent(inout) ::  ah(0:n, 0:n), bh(0:n)

    integer ::  lx1, lxm

    real(kind=rp) :: b(2*(n+3)**2)


    lx1 = n + 1

    lxm = lx1 + 2


    call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)

    call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)

    call generalev(s, b, lam, nl, lx1)

    if(lbc .gt. 0) call row_zero(s, nl, nl, 1)

    if(lbc .eq. 1) call row_zero(s, nl, nl, 2)

    if(rbc .gt. 0) call row_zero(s, nl, nl, nl)

    if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1)


    call trsp(s(1,1,2), nl, s, nl)


  subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n) …

  end subroutine fdm_setup_fast1d


  subroutine generalev(a, b, lam, n, lx)

    integer, intent(in) :: n, lx

    real(kind=rp), intent(inout) :: a(n,n), b(n,n), lam(n)

    integer :: lbw, lw

    real(kind=rp) :: bw(4*(lx+2)**3)


    lbw = 4*(lx+2)**3

    lw = n*n

    call sygv(a, b, lam, n, lx, bw, lbw)


  subroutine generalev(a, b, lam, n, lx) …

  end subroutine generalev


  subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)

    integer, intent(in) :: n, lx, lbw

    real(kind=sp), intent(inout) :: a(n,n), b(n,n), lam(n)

    real(kind=sp) :: bw(4*(lx+2)**3)

    integer :: info = 0

    call ssygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)

  subroutine sp_sygv(a, b, lam, n, lx, bw, lbw) …

  end subroutine sp_sygv


  subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)

    integer, intent(in) :: n, lx, lbw

    real(kind=dp), intent(inout) :: a(n,n), b(n,n), lam(n)

    real(kind=dp) :: bw(4*(lx+2)**3)

    integer :: info = 0

    call dsygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)

  subroutine dp_sygv(a, b, lam, n, lx, bw, lbw) …

  end subroutine dp_sygv


  subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)

    integer, intent(in) :: n, lx, lbw

    real(kind=qp), intent(inout) :: a(n,n), b(n,n), lam(n)

    real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)

    real(kind=qp) :: bw(4*(lx+2)**3)

    real(kind=dp) :: bw2(4*(lx+2)**3)

    integer :: info = 0


    a2 = real(a, dp)

    b2 = real(b, dp)

    lam2 = real(lam, dp)

    call dsygv(1, 'V', 'U', n, a2, n, b2, n, lam2, bw2, lbw, info)

    a = real(a2, qp)

    b = real(b2, qp)

    lam = real(lam2, qp)

    if (pe_rank .eq. 0) then

       call neko_warning('Real precision choice not supported for fdm, treating it as double')

    end if


  subroutine qp_sygv(a, b, lam, n, lx, bw, lbw) …

  end subroutine qp_sygv


  subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)

    integer, intent(in) ::lbc, rbc, n

    real(kind=rp), intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr

    real(kind=rp), intent(inout) :: ah(0:n,0:n)

    real(kind=rp) :: fac

    integer :: i, j, i0, i1


    i0 = 0

    if(lbc .eq. 1) i0 = 1

    i1 = n

    if(rbc .eq. 1) i1 = n - 1


    call rzero(a, (n+3) * (n+3))


    fac = 2.0_rp / lm

    a(1,1) = 1.0_rp

    a(n+1,n+1) = 1.0-rp


    do j = i0, i1

       do i = i0, i1

          a(i+1,j+1) = fac * ah(i,j)

       enddo

    enddo


    if(lbc .eq. 0) then

       fac = 2.0_rp / ll

       a(0,0) = fac * ah(n-1,n-1)

       a(1,0) = fac * ah(n  ,n-1)

       a(0,1) = fac * ah(n-1,n  )

       a(1,1) = a(1,1) + fac * ah(n,n)

    else

       a(0,0) = 1.0_rp

    endif


    if(rbc .eq. 0) then

       fac = 2.0_rp / lr

       a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)

       a(n+2,n+1) = fac * ah(1,0)

       a(n+1,n+2) = fac * ah(0,1)

       a(n+2,n+2) = fac * ah(1,1)

    else

       a(n+2,n+2) = 1.0_rp

    endif


  subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n) …

  end subroutine fdm_setup_fast1d_a


  subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)

    integer, intent(in) :: lbc, rbc, n

    real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr

    real(kind=rp), intent(inout) :: bh(0:n)

    real(kind=rp) :: fac

    integer :: i, i0, i1


    i0 = 0

    if(lbc .eq. 1) i0 = 1

    i1 = n

    if(rbc .eq. 1) i1 = n - 1


    call rzero(b, (n + 3) * (n + 3))


    fac = 0.5_rp * lm

    b(1,1) = 1.0_rp

    b(n+1,n+1) = 1.0_rp


    do i = i0, i1

       b(i+1,i+1) = fac * bh(i)

    end do


    if(lbc .eq. 0) then

       fac = 0.5_rp * ll

       b(0,0) = fac * bh(n-1)

       b(1,1) = b(1,1) + fac * bh(n)

    else

       b(0,0) = 1.0_rp

    end if


    if(rbc .eq. 0) then

       fac = 0.5_rp * lr

       b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)

       b(n+2,n+2) = fac * bh(1)

    else

       b(n+2,n+2) = 1.0_rp

    end if


  subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n) …

  end subroutine fdm_setup_fast1d_b


  subroutine fdm_free(this)

    class(fdm_t), intent(inout) :: this


    if(allocated(this%s)) then

       deallocate(this%s)

    end if


    if(allocated(this%d)) then

       deallocate(this%d)

    end if


    if(allocated(this%len_lr)) then

       deallocate(this%len_lr)

    end if


    if(allocated(this%len_ls)) then

       deallocate(this%len_ls)

    end if


    if(allocated(this%len_lt)) then

       deallocate(this%len_lt)

    end if


    if(allocated(this%len_mr)) then

       deallocate(this%len_mr)

    end if


    if(allocated(this%len_ms)) then

       deallocate(this%len_ms)

    end if


    if(allocated(this%len_mt)) then

       deallocate(this%len_mt)

    end if


    if(allocated(this%len_rr)) then

       deallocate(this%len_rr)

    end if


    if(allocated(this%len_rs)) then

       deallocate(this%len_rs)

    end if


    if(allocated(this%len_rt)) then

       deallocate(this%len_rt)

    end if


    if(allocated(this%swplen)) then

       deallocate(this%swplen)

    end if


    nullify(this%Xh)

    nullify(this%dof)

    nullify(this%gs_h)

    nullify(this%msh)


  subroutine fdm_free(this) …

  end subroutine fdm_free


  subroutine fdm_compute(this, e, r, stream)

    class(fdm_t), intent(inout) :: this

    real(kind=rp), dimension((this%Xh%lx+2)**3, this%msh%nelv), intent(inout) :: e, r

    type(c_ptr), optional :: stream

    type(c_ptr) :: strm


    if (present(stream)) then

       strm = stream

    else

       strm = glb_cmd_queue

    end if


    if (neko_bcknd_sx .eq. 1) then

       call fdm_do_fast_sx(e, r, this%s, this%d, &

            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)

    else if (neko_bcknd_xsmm .eq. 1) then

       call fdm_do_fast_xsmm(e, r, this%s, this%d, &

            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)

    else if (neko_bcknd_device .eq. 1) then

       call fdm_do_fast_device(e, r, this%s, this%d, &

            this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)

    else

       call fdm_do_fast_cpu(e, r, this%s, this%d, &

            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)

    end if


  subroutine fdm_compute(this, e, r, stream) …

  end subroutine fdm_compute


end module fdm

real
double real
Definition device_config.h:12

device::device_map
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57

device::device_memcpy
Copy data between host and device (or device and device)
Definition device.F90:51

fdm::sygv
Definition fdm.f90:103

comm
Definition comm.F90:1

comm::pe_rank
integer pe_rank
MPI rank.
Definition comm.F90:28

device
Device abstraction, common interface for various accelerators.
Definition device.F90:34

device::host_to_device
integer, parameter, public host_to_device
Definition device.F90:47

device::device_to_host
integer, parameter, public device_to_host
Definition device.F90:47

dofmap
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35

fast3d
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61

fast3d::semhat
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Definition fast3d.f90:168

fdm_cpu
Fast Diagonalization.
Definition fdm_cpu.f90:2

fdm_device
Definition fdm_device.F90:33

fdm_sx
Fast Diagonalization SX-Aurora backend.
Definition fdm_sx.f90:34

fdm_xsmm
Fast Diagonalization libxsmm backend.
Definition fdm_xsmm.f90:2

fdm
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61

fdm::swap_lengths
subroutine swap_lengths(this, x, y, z, nelv, gdim)
Definition fdm.f90:161

fdm::fdm_setup_fast
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
Definition fdm.f90:337

fdm::qp_sygv
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:460

fdm::fdm_compute
subroutine fdm_compute(this, e, r, stream)
Definition fdm.f90:625

fdm::fdm_free
subroutine fdm_free(this)
Definition fdm.f90:567

fdm::dp_sygv
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:452

fdm::generalev
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
Definition fdm.f90:432

fdm::fdm_setup_fast1d
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
Definition fdm.f90:407

fdm::fdm_setup_fast1d_b
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition fdm.f90:527

fdm::plane_space
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
Definition fdm.f90:248

fdm::fdm_setup_fast1d_a
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition fdm.f90:481

fdm::sp_sygv
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:444

fdm::fdm_init
subroutine fdm_init(this, xh, dm, gs_h)
Definition fdm.f90:110

gather_scatter
Gather-scatter.
Definition gather_scatter.f90:34

math
Definition math.f90:60

math::rzero
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:195

mesh
Defines a mesh.
Definition mesh.f90:34

neko_config
Build configurations.
Definition neko_config.f90:34

neko_config::neko_bcknd_device
integer, parameter neko_bcknd_device
Definition neko_config.f90:44

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

space
Defines a function space.
Definition space.f90:34

speclib
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148

tensor
Tensor operations.
Definition tensor.f90:61

utils
Utilities.
Definition utils.f90:35

dofmap::dofmap_t
Definition dofmap.f90:53

fdm::fdm_t
Definition fdm.f90:83

gather_scatter::gs_t
Definition gather_scatter.f90:58

mesh::mesh_t
Definition mesh.f90:64

space::space_t
The function space for the SEM solution fields.
Definition space.f90:62