80 use,
intrinsic :: iso_c_binding
85 real(kind=
rp),
allocatable :: s(:,:,:,:)
86 real(kind=
rp),
allocatable :: d(:,:)
87 type(c_ptr) :: s_d = c_null_ptr
88 type(c_ptr) :: d_d = c_null_ptr
89 real(kind=
rp),
allocatable :: len_lr(:), len_ls(:), len_lt(:)
90 real(kind=
rp),
allocatable :: len_mr(:), len_ms(:), len_mt(:)
91 real(kind=
rp),
allocatable :: len_rr(:), len_rs(:), len_rt(:)
92 real(kind=
rp),
allocatable :: swplen(:,:,:,:)
93 type(c_ptr) :: swplen_d = c_null_ptr
96 type(
gs_t),
pointer :: gs_h => null()
97 type(
mesh_t),
pointer :: msh => null()
111 class(
fdm_t),
intent(inout) :: this
112 type(
space_t),
target,
intent(inout) :: Xh
113 type(
dofmap_t),
target,
intent(in) :: dof
114 type(
gs_t),
target,
intent(inout) :: gs_h
116 real(kind=
rp),
dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
117 real(kind=
rp),
dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
118 integer :: nl, n, nelv
124 allocate(this%s(nl*nl,2,dof%msh%gdim, dof%msh%nelv))
125 allocate(this%d(nl**3,dof%msh%nelv))
126 allocate(this%swplen(xh%lx, xh%lx, xh%lx,dof%msh%nelv))
127 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
128 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
129 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
133 call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
136 call device_map(this%s, this%s_d,nl*nl*2*dof%msh%gdim*dof%msh%nelv)
137 call device_map(this%d, this%d_d,nl**dof%msh%gdim*dof%msh%nelv)
138 call device_map(this%swplen,this%swplen_d, xh%lxyz*dof%msh%nelv)
141 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
147 call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
162 type(
fdm_t),
intent(inout) :: this
163 integer,
intent(in) :: gdim, nelv
164 real(kind=
rp),
dimension(this%Xh%lxyz,nelv) ,
intent(in):: x, y, z
165 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
167 associate(l => this%swplen, xh =>this%Xh, &
168 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
169 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
170 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
176 if (gdim .eq. 3)
then
180 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
181 nx, n2, nz0, nzn, nelv, gdim)
183 if (gdim .eq. 3)
then
199 call this%gs_h%op(l, this%dof%size(), gs_op_add)
203 call this%gs_h%op(l, this%dof%size(), gs_op_add)
207 llr(e) = l(1,2,2,e) - lmr(e)
208 lrr(e) = l(n,2,2,e) - lmr(e)
209 lls(e) = l(2,1,2,e) - lms(e)
210 lrs(e) = l(2,n,2,e) - lms(e)
211 llt(e) = l(2,2,1,e) - lmt(e)
212 lrt(e) = l(2,2,n,e) - lmt(e)
227 call this%gs_h%op(l, this%dof%size(), gs_op_add)
231 call this%gs_h%op(l, this%dof%size(), gs_op_add)
235 llr(e) = l(1,2,1,e) - lmr(e)
236 lrr(e) = l(n,2,1,e) - lmr(e)
237 lls(e) = l(2,1,1,e) - lms(e)
238 lrs(e) = l(2,n,1,e) - lms(e)
248 nx, nxn, nz0, nzn, nelv, gdim)
249 integer,
intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
250 real(kind=rp),
intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
251 real(kind=rp),
intent(inout) :: w(nx)
252 real(kind=rp),
intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
253 real(kind=rp),
intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
254 real(kind=rp),
intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
255 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
256 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
265 if (gdim .eq. 3)
then
271 lr2 = lr2 + weight /&
272 ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
273 + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
274 + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
279 lr(ie) = 1d0/sqrt(lr2)
285 ls2 = ls2 + weight / &
286 ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
287 + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
288 + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
293 ls(ie) = 1d0/sqrt(ls2)
299 lt2 = lt2 + weight / &
300 ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
301 + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
302 + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
307 lt(ie) = 1d0/sqrt(lt2)
313 lr2 = lr2 + weight / &
314 ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
315 + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
319 lr(ie) = 1d0/sqrt(lr2)
324 ls2 = ls2 + weight / &
325 ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
326 + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
330 ls(ie) = 1d0/sqrt(ls2)
338 integer,
intent(in) :: nl, n
339 type(
fdm_t),
intent(inout) :: this
340 real(kind=rp),
intent(inout) :: ah(n+1,n+1), bh(n+1)
341 real(kind=rp),
dimension(2*this%Xh%lx + 4) :: lr, ls, lt
343 integer :: ie, il, nr, ns, nt
344 integer :: lbr, rbr, lbs, rbs, lbt, rbt
345 real(kind=rp) :: eps, diag
347 associate(s => this%s, d => this%d, &
348 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
349 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
350 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
351 do ie=1,this%dof%msh%nelv
352 lbr = this%dof%msh%facet_type(1, ie)
353 rbr = this%dof%msh%facet_type(2, ie)
354 lbs = this%dof%msh%facet_type(3, ie)
355 rbs = this%dof%msh%facet_type(4, ie)
356 lbt = this%dof%msh%facet_type(5, ie)
357 rbt = this%dof%msh%facet_type(6, ie)
363 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
365 lls(ie), lms(ie), lrs(ie), ah, bh, n)
366 if(this%dof%msh%gdim .eq. 3)
then
368 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
372 if(.not. this%dof%msh%gdim .eq. 3)
then
373 eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
377 if (diag .gt. eps)
then
378 d(il,ie) = 1.0_rp / diag
386 eps = 1d-5 * (vlmax(lr(2), nr-2) + &
387 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
391 diag = lr(i) + ls(j) + lt(k)
392 if (diag .gt. eps)
then
393 d(il,ie) = 1.0_rp / diag
407 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
408 integer,
intent(in) :: nl, lbc, rbc, n
409 real(kind=rp),
intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
410 real(kind=rp),
intent(inout) :: ah(0:n, 0:n), bh(0:n)
412 real(kind=rp) :: b(2*(n+3)**2)
420 if(lbc .gt. 0)
call row_zero(s, nl, nl, 1)
421 if(lbc .eq. 1)
call row_zero(s, nl, nl, 2)
422 if(rbc .gt. 0)
call row_zero(s, nl, nl, nl)
423 if(rbc .eq. 1)
call row_zero(s, nl, nl, nl-1)
425 call trsp(s(1,1,2), nl, s, nl)
433 integer,
intent(in) :: n, lx
434 real(kind=rp),
intent(inout) :: a(n,n), b(n,n), lam(n)
436 real(kind=rp) :: bw(4*(lx+2)**3)
440 call sygv(a, b, lam, n, lx, bw, lbw)
445 integer,
intent(in) :: n, lx, lbw
446 real(kind=sp),
intent(inout) :: a(n,n), b(n,n), lam(n)
447 real(kind=sp) :: bw(4*(lx+2)**3)
449 call ssygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
453 integer,
intent(in) :: n, lx, lbw
454 real(kind=dp),
intent(inout) :: a(n,n), b(n,n), lam(n)
455 real(kind=dp) :: bw(4*(lx+2)**3)
457 call dsygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
461 integer,
intent(in) :: n, lx, lbw
462 real(kind=qp),
intent(inout) :: a(n,n), b(n,n), lam(n)
463 real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
464 real(kind=qp) :: bw(4*(lx+2)**3)
465 real(kind=dp) :: bw2(4*(lx+2)**3)
471 call dsygv(1,
'V',
'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
475 if (pe_rank .eq. 0)
then
476 call neko_warning(
'Real precision choice not supported for fdm, treating it as double')
482 integer,
intent(in) ::lbc, rbc, n
483 real(kind=rp),
intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
484 real(kind=rp),
intent(inout) :: ah(0:n,0:n)
486 integer :: i, j, i0, i1
489 if(lbc .eq. 1) i0 = 1
491 if(rbc .eq. 1) i1 = n - 1
493 call rzero(a, (n+3) * (n+3))
501 a(i+1,j+1) = fac * ah(i,j)
507 a(0,0) = fac * ah(n-1,n-1)
508 a(1,0) = fac * ah(n ,n-1)
509 a(0,1) = fac * ah(n-1,n )
510 a(1,1) = a(1,1) + fac * ah(n,n)
517 a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
518 a(n+2,n+1) = fac * ah(1,0)
519 a(n+1,n+2) = fac * ah(0,1)
520 a(n+2,n+2) = fac * ah(1,1)
528 integer,
intent(in) :: lbc, rbc, n
529 real(kind=rp),
intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
530 real(kind=rp),
intent(inout) :: bh(0:n)
535 if(lbc .eq. 1) i0 = 1
537 if(rbc .eq. 1) i1 = n - 1
539 call rzero(b, (n + 3) * (n + 3))
546 b(i+1,i+1) = fac * bh(i)
551 b(0,0) = fac * bh(n-1)
552 b(1,1) = b(1,1) + fac * bh(n)
559 b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
560 b(n+2,n+2) = fac * bh(1)
568 class(
fdm_t),
intent(inout) :: this
570 if(
allocated(this%s))
then
574 if(
allocated(this%d))
then
578 if(
allocated(this%len_lr))
then
579 deallocate(this%len_lr)
582 if(
allocated(this%len_ls))
then
583 deallocate(this%len_ls)
586 if(
allocated(this%len_lt))
then
587 deallocate(this%len_lt)
590 if(
allocated(this%len_mr))
then
591 deallocate(this%len_mr)
594 if(
allocated(this%len_ms))
then
595 deallocate(this%len_ms)
598 if(
allocated(this%len_mt))
then
599 deallocate(this%len_mt)
602 if(
allocated(this%len_rr))
then
603 deallocate(this%len_rr)
606 if(
allocated(this%len_rs))
then
607 deallocate(this%len_rs)
610 if(
allocated(this%len_rt))
then
611 deallocate(this%len_rt)
614 if(
allocated(this%swplen))
then
615 deallocate(this%swplen)
626 class(
fdm_t),
intent(inout) :: this
627 real(kind=rp),
dimension((this%Xh%lx+2)**3, this%msh%nelv),
intent(inout) :: e, r
628 type(c_ptr),
optional :: stream
631 if (
present(stream))
then
637 if (neko_bcknd_sx .eq. 1)
then
638 call fdm_do_fast_sx(e, r, this%s, this%d, &
639 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
640 else if (neko_bcknd_xsmm .eq. 1)
then
641 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
642 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
643 else if (neko_bcknd_device .eq. 1)
then
644 call fdm_do_fast_device(e, r, this%s, this%d, &
645 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
647 call fdm_do_fast_cpu(e, r, this%s, this%d, &
648 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
integer, public pe_rank
MPI rank.
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
integer, parameter, public device_to_host
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Defines a mapping of the degrees of freedom.
Fast diagonalization methods from NEKTON.
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
subroutine, public fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
subroutine, public fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
Fast Diagonalization SX-Aurora backend.
subroutine, public fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
Fast Diagonalization libxsmm backend.
subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
subroutine swap_lengths(this, x, y, z, nelv, gdim)
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
subroutine fdm_init(this, xh, dof, gs_h)
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine fdm_compute(this, e, r, stream)
subroutine fdm_free(this)
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
subroutine, public rzero(a, n)
Zero a real vector.
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
integer, parameter neko_bcknd_device
integer, parameter, public qp
integer, parameter, public dp
integer, parameter, public sp
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
The function space for the SEM solution fields.