79 use,
intrinsic :: iso_c_binding
84 real(kind=
rp),
allocatable :: s(:,:,:,:)
85 real(kind=
rp),
allocatable :: d(:,:)
86 type(c_ptr) :: s_d = c_null_ptr
87 type(c_ptr) :: d_d = c_null_ptr
88 real(kind=
rp),
allocatable :: len_lr(:), len_ls(:), len_lt(:)
89 real(kind=
rp),
allocatable :: len_mr(:), len_ms(:), len_mt(:)
90 real(kind=
rp),
allocatable :: len_rr(:), len_rs(:), len_rt(:)
91 real(kind=
rp),
allocatable :: swplen(:,:,:,:)
92 type(c_ptr) :: swplen_d = c_null_ptr
95 type(
gs_t),
pointer :: gs_h
110 class(
fdm_t),
intent(inout) :: this
111 type(
space_t),
target,
intent(inout) :: Xh
112 type(
dofmap_t),
target,
intent(inout) :: dm
113 type(
gs_t),
target,
intent(inout) :: gs_h
115 real(kind=
rp),
dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
116 real(kind=
rp),
dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
117 integer :: nl, n, nelv
123 allocate(this%s(nl*nl,2,dm%msh%gdim, dm%msh%nelv))
124 allocate(this%d(nl**3,dm%msh%nelv))
125 allocate(this%swplen(xh%lx, xh%lx, xh%lx,dm%msh%nelv))
126 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
127 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
128 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
132 call rzero(this%swplen, xh%lxyz * dm%msh%nelv)
135 call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv)
136 call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv)
137 call device_map(this%swplen,this%swplen_d, xh%lxyz*dm%msh%nelv)
140 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
146 call swap_lengths(this, dm%x, dm%y, dm%z, dm%msh%nelv, dm%msh%gdim)
161 type(
fdm_t),
intent(inout) :: this
162 integer,
intent(in) :: gdim, nelv
163 real(kind=
rp),
dimension(this%Xh%lxyz,nelv) ,
intent(in):: x, y, z
164 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
166 associate(l => this%swplen, xh =>this%Xh, &
167 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
168 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
169 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
175 if (gdim .eq. 3)
then
179 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
180 nx, n2, nz0, nzn, nelv, gdim)
182 if (gdim .eq. 3)
then
198 call this%gs_h%op(l, this%dof%size(), gs_op_add)
202 call this%gs_h%op(l, this%dof%size(), gs_op_add)
206 llr(e) = l(1,2,2,e) - lmr(e)
207 lrr(e) = l(n,2,2,e) - lmr(e)
208 lls(e) = l(2,1,2,e) - lms(e)
209 lrs(e) = l(2,n,2,e) - lms(e)
210 llt(e) = l(2,2,1,e) - lmt(e)
211 lrt(e) = l(2,2,n,e) - lmt(e)
226 call this%gs_h%op(l, this%dof%size(), gs_op_add)
230 call this%gs_h%op(l, this%dof%size(), gs_op_add)
234 llr(e) = l(1,2,1,e) - lmr(e)
235 lrr(e) = l(n,2,1,e) - lmr(e)
236 lls(e) = l(2,1,1,e) - lms(e)
237 lrs(e) = l(2,n,1,e) - lms(e)
247 nx, nxn, nz0, nzn, nelv, gdim)
248 integer,
intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
249 real(kind=rp),
intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
250 real(kind=rp),
intent(inout) :: w(nx)
251 real(kind=rp),
intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
252 real(kind=rp),
intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
253 real(kind=rp),
intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
254 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
255 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
264 if (gdim .eq. 3)
then
270 lr2 = lr2 + weight /&
271 ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
272 + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
273 + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
278 lr(ie) = 1d0/sqrt(lr2)
284 ls2 = ls2 + weight / &
285 ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
286 + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
287 + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
292 ls(ie) = 1d0/sqrt(ls2)
298 lt2 = lt2 + weight / &
299 ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
300 + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
301 + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
306 lt(ie) = 1d0/sqrt(lt2)
312 lr2 = lr2 + weight / &
313 ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
314 + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
318 lr(ie) = 1d0/sqrt(lr2)
323 ls2 = ls2 + weight / &
324 ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
325 + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
329 ls(ie) = 1d0/sqrt(ls2)
337 integer,
intent(in) :: nl, n
338 type(
fdm_t),
intent(inout) :: this
339 real(kind=rp),
intent(inout) :: ah(n+1,n+1), bh(n+1)
340 real(kind=rp),
dimension(2*this%Xh%lx + 4) :: lr, ls, lt
342 integer :: ie, il, nr, ns, nt
343 integer :: lbr, rbr, lbs, rbs, lbt, rbt
344 real(kind=rp) :: eps, diag
346 associate(s => this%s, d => this%d, &
347 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
348 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
349 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
350 do ie=1,this%dof%msh%nelv
351 lbr = this%dof%msh%facet_type(1, ie)
352 rbr = this%dof%msh%facet_type(2, ie)
353 lbs = this%dof%msh%facet_type(3, ie)
354 rbs = this%dof%msh%facet_type(4, ie)
355 lbt = this%dof%msh%facet_type(5, ie)
356 rbt = this%dof%msh%facet_type(6, ie)
362 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
364 lls(ie), lms(ie), lrs(ie), ah, bh, n)
365 if(this%dof%msh%gdim .eq. 3)
then
367 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
371 if(.not. this%dof%msh%gdim .eq. 3)
then
372 eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
376 if (diag .gt. eps)
then
377 d(il,ie) = 1.0_rp / diag
385 eps = 1d-5 * (vlmax(lr(2), nr-2) + &
386 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
390 diag = lr(i) + ls(j) + lt(k)
391 if (diag .gt. eps)
then
392 d(il,ie) = 1.0_rp / diag
406 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
407 integer,
intent(in) :: nl, lbc, rbc, n
408 real(kind=rp),
intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
409 real(kind=rp),
intent(inout) :: ah(0:n, 0:n), bh(0:n)
411 real(kind=rp) :: b(2*(n+3)**2)
419 if(lbc .gt. 0)
call row_zero(s, nl, nl, 1)
420 if(lbc .eq. 1)
call row_zero(s, nl, nl, 2)
421 if(rbc .gt. 0)
call row_zero(s, nl, nl, nl)
422 if(rbc .eq. 1)
call row_zero(s, nl, nl, nl-1)
424 call trsp(s(1,1,2), nl, s, nl)
432 integer,
intent(in) :: n, lx
433 real(kind=rp),
intent(inout) :: a(n,n), b(n,n), lam(n)
435 real(kind=rp) :: bw(4*(lx+2)**3)
439 call sygv(a, b, lam, n, lx, bw, lbw)
444 integer,
intent(in) :: n, lx, lbw
445 real(kind=sp),
intent(inout) :: a(n,n), b(n,n), lam(n)
446 real(kind=sp) :: bw(4*(lx+2)**3)
448 call ssygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
452 integer,
intent(in) :: n, lx, lbw
453 real(kind=dp),
intent(inout) :: a(n,n), b(n,n), lam(n)
454 real(kind=dp) :: bw(4*(lx+2)**3)
456 call dsygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
460 integer,
intent(in) :: n, lx, lbw
461 real(kind=qp),
intent(inout) :: a(n,n), b(n,n), lam(n)
462 real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
463 real(kind=qp) :: bw(4*(lx+2)**3)
464 real(kind=dp) :: bw2(4*(lx+2)**3)
470 call dsygv(1,
'V',
'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
474 if (pe_rank .eq. 0)
then
475 call neko_warning(
'Real precision choice not supported for fdm, treating it as double')
481 integer,
intent(in) ::lbc, rbc, n
482 real(kind=rp),
intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
483 real(kind=rp),
intent(inout) :: ah(0:n,0:n)
485 integer :: i, j, i0, i1
488 if(lbc .eq. 1) i0 = 1
490 if(rbc .eq. 1) i1 = n - 1
492 call rzero(a, (n+3) * (n+3))
500 a(i+1,j+1) = fac * ah(i,j)
506 a(0,0) = fac * ah(n-1,n-1)
507 a(1,0) = fac * ah(n ,n-1)
508 a(0,1) = fac * ah(n-1,n )
509 a(1,1) = a(1,1) + fac * ah(n,n)
516 a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
517 a(n+2,n+1) = fac * ah(1,0)
518 a(n+1,n+2) = fac * ah(0,1)
519 a(n+2,n+2) = fac * ah(1,1)
527 integer,
intent(in) :: lbc, rbc, n
528 real(kind=rp),
intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
529 real(kind=rp),
intent(inout) :: bh(0:n)
534 if(lbc .eq. 1) i0 = 1
536 if(rbc .eq. 1) i1 = n - 1
538 call rzero(b, (n + 3) * (n + 3))
545 b(i+1,i+1) = fac * bh(i)
550 b(0,0) = fac * bh(n-1)
551 b(1,1) = b(1,1) + fac * bh(n)
558 b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
559 b(n+2,n+2) = fac * bh(1)
567 class(
fdm_t),
intent(inout) :: this
569 if(
allocated(this%s))
then
573 if(
allocated(this%d))
then
577 if(
allocated(this%len_lr))
then
578 deallocate(this%len_lr)
581 if(
allocated(this%len_ls))
then
582 deallocate(this%len_ls)
585 if(
allocated(this%len_lt))
then
586 deallocate(this%len_lt)
589 if(
allocated(this%len_mr))
then
590 deallocate(this%len_mr)
593 if(
allocated(this%len_ms))
then
594 deallocate(this%len_ms)
597 if(
allocated(this%len_mt))
then
598 deallocate(this%len_mt)
601 if(
allocated(this%len_rr))
then
602 deallocate(this%len_rr)
605 if(
allocated(this%len_rs))
then
606 deallocate(this%len_rs)
609 if(
allocated(this%len_rt))
then
610 deallocate(this%len_rt)
613 if(
allocated(this%swplen))
then
614 deallocate(this%swplen)
625 class(
fdm_t),
intent(inout) :: this
626 real(kind=rp),
dimension((this%Xh%lx+2)**3, this%msh%nelv),
intent(inout) :: e, r
627 type(c_ptr),
optional :: stream
630 if (
present(stream))
then
636 if (neko_bcknd_sx .eq. 1)
then
637 call fdm_do_fast_sx(e, r, this%s, this%d, &
638 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
639 else if (neko_bcknd_xsmm .eq. 1)
then
640 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
641 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
642 else if (neko_bcknd_device .eq. 1)
then
643 call fdm_do_fast_device(e, r, this%s, this%d, &
644 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
646 call fdm_do_fast_cpu(e, r, this%s, this%d, &
647 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
integer, parameter, public device_to_host
Defines a mapping of the degrees of freedom.
Fast diagonalization methods from NEKTON.
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Fast Diagonalization SX-Aurora backend.
Fast Diagonalization libxsmm backend.
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
subroutine swap_lengths(this, x, y, z, nelv, gdim)
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine fdm_compute(this, e, r, stream)
subroutine fdm_free(this)
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine fdm_init(this, Xh, dm, gs_h)
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
LIBRARY ROUTINES FOR SPECTRAL METHODS.
The function space for the SEM solution fields.