80  use, 
intrinsic :: iso_c_binding
 
   85     real(kind=
rp), 
allocatable :: s(:,:,:,:)
 
   86     real(kind=
rp), 
allocatable :: d(:,:)
 
   87     type(c_ptr) :: s_d = c_null_ptr
 
   88     type(c_ptr) :: d_d = c_null_ptr
 
   89     real(kind=
rp), 
allocatable :: len_lr(:), len_ls(:), len_lt(:)
 
   90     real(kind=
rp), 
allocatable :: len_mr(:), len_ms(:), len_mt(:)
 
   91     real(kind=
rp), 
allocatable :: len_rr(:), len_rs(:), len_rt(:)
 
   92     real(kind=
rp), 
allocatable :: swplen(:,:,:,:)
 
   93     type(c_ptr) :: swplen_d = c_null_ptr
 
   96     type(
gs_t), 
pointer :: gs_h => null()
 
   97     type(
mesh_t), 
pointer :: msh => null()
 
 
  111    class(
fdm_t), 
intent(inout) :: this
 
  112    type(
space_t), 
target, 
intent(inout) :: Xh
 
  113    type(
dofmap_t), 
target, 
intent(in) :: dof
 
  114    type(
gs_t), 
target, 
intent(inout) :: gs_h
 
  116    real(kind=
rp), 
dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
 
  117    real(kind=
rp), 
dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
 
  118    integer :: nl, n, nelv
 
  124    allocate(this%s(nl*nl,2,dof%msh%gdim, dof%msh%nelv))
 
  125    allocate(this%d(nl**3,dof%msh%nelv))
 
  126    allocate(this%swplen(xh%lx, xh%lx, xh%lx,dof%msh%nelv))
 
  127    allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
 
  128    allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
 
  129    allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
 
  133    call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
 
  136       call device_map(this%s, this%s_d,nl*nl*2*dof%msh%gdim*dof%msh%nelv)
 
  137       call device_map(this%d, this%d_d,nl**dof%msh%gdim*dof%msh%nelv)
 
  138       call device_map(this%swplen,this%swplen_d, xh%lxyz*dof%msh%nelv)
 
  141    call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
 
  147    call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
 
 
  162    type(
fdm_t), 
intent(inout) :: this
 
  163    integer, 
intent(in) :: gdim, nelv
 
  164    real(kind=
rp), 
dimension(this%Xh%lxyz,nelv) , 
intent(in):: x, y, z
 
  165    integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
 
  167    associate(l => this%swplen, xh =>this%Xh, &
 
  168         llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
 
  169         lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
 
  170         lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
 
  176      if (gdim .eq. 3) 
then 
  180      call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
 
  181           nx, n2, nz0, nzn, nelv, gdim)
 
  183      if (gdim .eq. 3) 
then 
  199            call this%gs_h%op(l, this%dof%size(), gs_op_add)
 
  203            call this%gs_h%op(l, this%dof%size(), gs_op_add)
 
  207            llr(e) = l(1,2,2,e) - lmr(e)
 
  208            lrr(e) = l(n,2,2,e) - lmr(e)
 
  209            lls(e) = l(2,1,2,e) - lms(e)
 
  210            lrs(e) = l(2,n,2,e) - lms(e)
 
  211            llt(e) = l(2,2,1,e) - lmt(e)
 
  212            lrt(e) = l(2,2,n,e) - lmt(e)
 
  227            call this%gs_h%op(l, this%dof%size(), gs_op_add)
 
  231            call this%gs_h%op(l, this%dof%size(), gs_op_add)
 
  235            llr(e) = l(1,2,1,e) - lmr(e)
 
  236            lrr(e) = l(n,2,1,e) - lmr(e)
 
  237            lls(e) = l(2,1,1,e) - lms(e)
 
  238            lrs(e) = l(2,n,1,e) - lms(e)
 
 
  248       nx, nxn, nz0, nzn, nelv, gdim)
 
  249    integer, 
intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
 
  250    real(kind=rp), 
intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
 
  251    real(kind=rp), 
intent(inout) :: w(nx)
 
  252    real(kind=rp), 
intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
 
  253    real(kind=rp), 
intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
 
  254    real(kind=rp), 
intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
 
  255    real(kind=rp) :: lr2, ls2, lt2, weight, wsum
 
  256    integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
 
  265       if (gdim .eq. 3) 
then 
  271                lr2 = lr2 + weight /&
 
  272                     ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
 
  273                     + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
 
  274                     + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
 
  279          lr(ie) = 1d0/sqrt(lr2)
 
  285                ls2 = ls2 + weight / &
 
  286                     ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
 
  287                     + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
 
  288                     + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
 
  293          ls(ie) = 1d0/sqrt(ls2)
 
  299                lt2 = lt2 + weight / &
 
  300                     ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
 
  301                     + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
 
  302                     + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
 
  307          lt(ie) = 1d0/sqrt(lt2)
 
  313             lr2 = lr2 + weight / &
 
  314                  ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
 
  315                  + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
 
  319          lr(ie) = 1d0/sqrt(lr2)
 
  324             ls2 = ls2 + weight / &
 
  325                  ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
 
  326                  + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
 
  330          ls(ie) = 1d0/sqrt(ls2)
 
 
  338    integer, 
intent(in) :: nl, n
 
  339    type(
fdm_t), 
intent(inout) :: this
 
  340    real(kind=rp), 
intent(inout) :: ah(n+1,n+1), bh(n+1)
 
  341    real(kind=rp), 
dimension(2*this%Xh%lx + 4) :: lr, ls, lt
 
  343    integer :: ie, il, nr, ns, nt
 
  344    integer :: lbr, rbr, lbs, rbs, lbt, rbt
 
  345    real(kind=rp) :: eps, diag
 
  347    associate(s => this%s, d => this%d, &
 
  348         llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
 
  349         lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
 
  350         lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
 
  351      do ie=1,this%dof%msh%nelv
 
  352         lbr = this%dof%msh%facet_type(1, ie)
 
  353         rbr = this%dof%msh%facet_type(2, ie)
 
  354         lbs = this%dof%msh%facet_type(3, ie)
 
  355         rbs = this%dof%msh%facet_type(4, ie)
 
  356         lbt = this%dof%msh%facet_type(5, ie)
 
  357         rbt = this%dof%msh%facet_type(6, ie)
 
  363              llr(ie), lmr(ie), lrr(ie), ah, bh, n)
 
  365              lls(ie), lms(ie), lrs(ie), ah, bh, n)
 
  366         if(this%dof%msh%gdim .eq. 3) 
then 
  368                 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
 
  372         if(.not. this%dof%msh%gdim .eq. 3) 
then 
  373            eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
 
  377                  if (diag .gt. eps) 
then 
  378                     d(il,ie) = 1.0_rp / diag
 
  386            eps = 1d-5 * (vlmax(lr(2), nr-2) + &
 
  387                 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
 
  391                     diag = lr(i) + ls(j) + lt(k)
 
  392                     if (diag .gt. eps) 
then 
  393                        d(il,ie) = 1.0_rp / diag
 
 
  407  subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
 
  408    integer, 
intent(in) :: nl, lbc, rbc, n
 
  409    real(kind=rp), 
intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
 
  410    real(kind=rp), 
intent(inout) :: ah(0:n, 0:n), bh(0:n)
 
  412    real(kind=rp) :: b(2*(n+3)**2)
 
  420    if(lbc .gt. 0) 
call row_zero(s, nl, nl, 1)
 
  421    if(lbc .eq. 1) 
call row_zero(s, nl, nl, 2)
 
  422    if(rbc .gt. 0) 
call row_zero(s, nl, nl, nl)
 
  423    if(rbc .eq. 1) 
call row_zero(s, nl, nl, nl-1)
 
  425    call trsp(s(1,1,2), nl, s, nl)
 
 
  433    integer, 
intent(in) :: n, lx
 
  434    real(kind=rp), 
intent(inout) :: a(n,n), b(n,n), lam(n)
 
  436    real(kind=rp) :: bw(4*(lx+2)**3)
 
  440    call sygv(a, b, lam, n, lx, bw, lbw)
 
 
  445    integer, 
intent(in) :: n, lx, lbw
 
  446    real(kind=sp), 
intent(inout) :: a(n,n), b(n,n), lam(n)
 
  447    real(kind=sp) :: bw(4*(lx+2)**3)
 
  449    call ssygv(1, 
'V', 
'U', n, a, n, b, n, lam, bw, lbw, info)
 
 
  453    integer, 
intent(in) :: n, lx, lbw
 
  454    real(kind=dp), 
intent(inout) :: a(n,n), b(n,n), lam(n)
 
  455    real(kind=dp) :: bw(4*(lx+2)**3)
 
  457    call dsygv(1, 
'V', 
'U', n, a, n, b, n, lam, bw, lbw, info)
 
 
  461    integer, 
intent(in) :: n, lx, lbw
 
  462    real(kind=qp), 
intent(inout) :: a(n,n), b(n,n), lam(n)
 
  463    real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
 
  464    real(kind=qp) :: bw(4*(lx+2)**3)
 
  465    real(kind=dp) :: bw2(4*(lx+2)**3)
 
  471    call dsygv(1, 
'V', 
'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
 
  475    if (pe_rank .eq. 0) 
then 
  476       call neko_warning(
'Real precision choice not supported for fdm, treating it as double')
 
 
  482    integer, 
intent(in) ::lbc, rbc, n
 
  483    real(kind=rp), 
intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
 
  484    real(kind=rp), 
intent(inout) :: ah(0:n,0:n)
 
  486    integer :: i, j, i0, i1
 
  489    if(lbc .eq. 1) i0 = 1
 
  491    if(rbc .eq. 1) i1 = n - 1
 
  493    call rzero(a, (n+3) * (n+3))
 
  501          a(i+1,j+1) = fac * ah(i,j)
 
  507       a(0,0) = fac * ah(n-1,n-1)
 
  508       a(1,0) = fac * ah(n ,n-1)
 
  509       a(0,1) = fac * ah(n-1,n )
 
  510       a(1,1) = a(1,1) + fac * ah(n,n)
 
  517       a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
 
  518       a(n+2,n+1) = fac * ah(1,0)
 
  519       a(n+1,n+2) = fac * ah(0,1)
 
  520       a(n+2,n+2) = fac * ah(1,1)
 
 
  528    integer, 
intent(in) :: lbc, rbc, n
 
  529    real(kind=rp), 
intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
 
  530    real(kind=rp), 
intent(inout) :: bh(0:n)
 
  535    if(lbc .eq. 1) i0 = 1
 
  537    if(rbc .eq. 1) i1 = n - 1
 
  539    call rzero(b, (n + 3) * (n + 3))
 
  546       b(i+1,i+1) = fac * bh(i)
 
  551       b(0,0) = fac * bh(n-1)
 
  552       b(1,1) = b(1,1) + fac * bh(n)
 
  559       b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
 
  560       b(n+2,n+2) = fac * bh(1)
 
 
  568    class(
fdm_t), 
intent(inout) :: this
 
  570    if(
allocated(this%s)) 
then 
  574    if(
allocated(this%d)) 
then 
  578    if(
allocated(this%len_lr)) 
then 
  579       deallocate(this%len_lr)
 
  582    if(
allocated(this%len_ls)) 
then 
  583       deallocate(this%len_ls)
 
  586    if(
allocated(this%len_lt)) 
then 
  587       deallocate(this%len_lt)
 
  590    if(
allocated(this%len_mr)) 
then 
  591       deallocate(this%len_mr)
 
  594    if(
allocated(this%len_ms)) 
then 
  595       deallocate(this%len_ms)
 
  598    if(
allocated(this%len_mt)) 
then 
  599       deallocate(this%len_mt)
 
  602    if(
allocated(this%len_rr)) 
then 
  603       deallocate(this%len_rr)
 
  606    if(
allocated(this%len_rs)) 
then 
  607       deallocate(this%len_rs)
 
  610    if(
allocated(this%len_rt)) 
then 
  611       deallocate(this%len_rt)
 
  614    if(
allocated(this%swplen)) 
then 
  615       deallocate(this%swplen)
 
  623    if (c_associated(this%s_d)) 
then 
  624       call device_free(this%s_d)
 
  626    if (c_associated(this%d_d)) 
then 
  627       call device_free(this%d_d)
 
  629    if (c_associated(this%swplen_d)) 
then 
  630       call device_free(this%swplen_d)
 
 
  636    class(
fdm_t), 
intent(inout) :: this
 
  637    real(kind=rp), 
dimension((this%Xh%lx+2)**3, this%msh%nelv), 
intent(inout) :: e, r
 
  638    type(c_ptr), 
optional :: stream
 
  641    if (
present(stream)) 
then 
  647    if (neko_bcknd_sx .eq. 1) 
then 
  648       call fdm_do_fast_sx(e, r, this%s, this%d, &
 
  649            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
 
  650    else if (neko_bcknd_xsmm .eq. 1) 
then 
  651       call fdm_do_fast_xsmm(e, r, this%s, this%d, &
 
  652            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
 
  653    else if (neko_bcknd_device .eq. 1) 
then 
  654       call fdm_do_fast_device(e, r, this%s, this%d, &
 
  655            this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
 
  657       call fdm_do_fast_cpu(e, r, this%s, this%d, &
 
  658            this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
 
 
Map a Fortran array to a device (allocate and associate)
 
Copy data between host and device (or device and device)
 
integer, public pe_rank
MPI rank.
 
Device abstraction, common interface for various accelerators.
 
integer, parameter, public host_to_device
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
integer, parameter, public device_to_host
 
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
 
Defines a mapping of the degrees of freedom.
 
Fast diagonalization methods from NEKTON.
 
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
 
subroutine, public fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
 
subroutine, public fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
 
Fast Diagonalization SX-Aurora backend.
 
subroutine, public fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
 
Fast Diagonalization libxsmm backend.
 
subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
 
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
 
subroutine swap_lengths(this, x, y, z, nelv, gdim)
 
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
 
subroutine fdm_init(this, xh, dof, gs_h)
 
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
 
subroutine fdm_compute(this, e, r, stream)
 
subroutine fdm_free(this)
 
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
 
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
 
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
 
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
 
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
 
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
 
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
 
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
 
subroutine, public rzero(a, n)
Zero a real vector.
 
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
 
integer, parameter neko_bcknd_device
 
integer, parameter, public qp
 
integer, parameter, public dp
 
integer, parameter, public sp
 
integer, parameter, public rp
Global precision used in computations.
 
Defines a function space.
 
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
 
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
 
The function space for the SEM solution fields.