80 use,
intrinsic :: iso_c_binding
85 real(kind=
rp),
allocatable :: s(:,:,:,:)
86 real(kind=
rp),
allocatable :: d(:,:)
87 type(c_ptr) :: s_d = c_null_ptr
88 type(c_ptr) :: d_d = c_null_ptr
89 real(kind=
rp),
allocatable :: len_lr(:), len_ls(:), len_lt(:)
90 real(kind=
rp),
allocatable :: len_mr(:), len_ms(:), len_mt(:)
91 real(kind=
rp),
allocatable :: len_rr(:), len_rs(:), len_rt(:)
92 real(kind=
rp),
allocatable :: swplen(:,:,:,:)
93 type(c_ptr) :: swplen_d = c_null_ptr
96 type(
gs_t),
pointer :: gs_h => null()
97 type(
mesh_t),
pointer :: msh => null()
111 class(
fdm_t),
intent(inout) :: this
112 type(
space_t),
target,
intent(inout) :: Xh
113 type(
dofmap_t),
target,
intent(in) :: dof
114 type(
gs_t),
target,
intent(inout) :: gs_h
116 real(kind=
rp),
dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
117 real(kind=
rp),
dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
118 integer :: nl, n, nelv
124 allocate(this%s(nl*nl, 2, dof%msh%gdim, dof%msh%nelv))
125 allocate(this%d(nl**3, dof%msh%nelv))
126 allocate(this%swplen(xh%lx, xh%lx, xh%lx, dof%msh%nelv))
127 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
128 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
129 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
133 call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
136 call device_map(this%s, this%s_d, nl*nl*2*dof%msh%gdim*dof%msh%nelv)
137 call device_map(this%d, this%d_d, nl**dof%msh%gdim*dof%msh%nelv)
138 call device_map(this%swplen, this%swplen_d, xh%lxyz*dof%msh%nelv)
141 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
147 call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
153 nl*nl*2*dof%msh%gdim*dof%msh%nelv,
host_to_device, sync = .false.)
162 type(
fdm_t),
intent(inout) :: this
163 integer,
intent(in) :: gdim, nelv
164 real(kind=
rp),
dimension(this%Xh%lxyz, nelv),
intent(in) :: x, y, z
165 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
167 associate(l => this%swplen, xh => this%Xh, &
168 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
169 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
170 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
176 if (gdim .eq. 3)
then
180 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z, &
181 nx, n2, nz0, nzn, nelv, gdim)
183 if (gdim .eq. 3)
then
187 l(1, k, j, e) = lmr(e)
188 l(n, k, j, e) = lmr(e)
189 l(k, 1, j, e) = lms(e)
190 l(k, n, j, e) = lms(e)
191 l(k, j, 1, e) = lmt(e)
192 l(k, j, n, e) = lmt(e)
199 call this%gs_h%op(l, this%dof%size(), gs_op_add)
203 call this%gs_h%op(l, this%dof%size(), gs_op_add)
207 llr(e) = l(1, 2, 2, e) - lmr(e)
208 lrr(e) = l(n, 2, 2, e) - lmr(e)
209 lls(e) = l(2, 1, 2, e) - lms(e)
210 lrs(e) = l(2, n, 2, e) - lms(e)
211 llt(e) = l(2, 2, 1, e) - lmt(e)
212 lrt(e) = l(2, 2, n, e) - lmt(e)
217 l(1, j, 1, e) = lmr(e)
218 l(n, j, 1, e) = lmr(e)
219 l(j, 1, 1, e) = lms(e)
220 l(j, n, 1, e) = lms(e)
227 call this%gs_h%op(l, this%dof%size(), gs_op_add)
231 call this%gs_h%op(l, this%dof%size(), gs_op_add)
235 llr(e) = l(1, 2, 1, e) - lmr(e)
236 lrr(e) = l(n, 2, 1, e) - lmr(e)
237 lls(e) = l(2, 1, 1, e) - lms(e)
238 lrs(e) = l(2, n, 1, e) - lms(e)
248 nx, nxn, nz0, nzn, nelv, gdim)
249 integer,
intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
250 real(kind=rp),
intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
251 real(kind=rp),
intent(inout) :: w(nx)
252 real(kind=rp),
intent(in) :: x(0:nxn, 0:nxn, nz0:nzn, nelv)
253 real(kind=rp),
intent(in) :: y(0:nxn, 0:nxn, nz0:nzn, nelv)
254 real(kind=rp),
intent(in) :: z(0:nxn, 0:nxn, nz0:nzn, nelv)
255 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
256 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
265 if (gdim .eq. 3)
then
271 lr2 = lr2 + weight /&
272 ( (x(i2, j, k, ie) - x(i1, j, k, ie))**2&
273 + (y(i2, j, k, ie) - y(i1, j, k, ie))**2&
274 + (z(i2, j, k, ie) - z(i1, j, k, ie))**2 )
279 lr(ie) = 1d0/sqrt(lr2)
285 ls2 = ls2 + weight / &
286 ( (x(i, j2, k, ie) - x(i, j1, k, ie))**2 &
287 + (y(i, j2, k, ie) - y(i, j1, k, ie))**2 &
288 + (z(i, j2, k, ie) - z(i, j1, k, ie))**2 )
293 ls(ie) = 1d0/sqrt(ls2)
299 lt2 = lt2 + weight / &
300 ( (x(i, j, k2, ie) - x(i, j, k1, ie))**2 &
301 + (y(i, j, k2, ie) - y(i, j, k1, ie))**2 &
302 + (z(i, j, k2, ie) - z(i, j, k1, ie))**2 )
307 lt(ie) = 1d0/sqrt(lt2)
313 lr2 = lr2 + weight / &
314 ( (x(i2, j, 1, ie) - x(i1, j, 1, ie))**2 &
315 + (y(i2, j, 1, ie) - y(i1, j, 1, ie))**2 )
319 lr(ie) = 1d0/sqrt(lr2)
324 ls2 = ls2 + weight / &
325 ( (x(i, j2, 1, ie) - x(i, j1, 1, ie))**2 &
326 + (y(i, j2, 1, ie) - y(i, j1, 1, ie))**2 )
330 ls(ie) = 1d0/sqrt(ls2)
338 integer,
intent(in) :: nl, n
339 type(
fdm_t),
intent(inout) :: this
340 real(kind=rp),
intent(inout) :: ah(n+1, n+1), bh(n+1)
341 real(kind=rp),
dimension(2*this%Xh%lx + 4) :: lr, ls, lt
343 integer :: ie, il, nr, ns, nt
344 integer :: lbr, rbr, lbs, rbs, lbt, rbt
345 real(kind=rp) :: eps, diag
347 associate(s => this%s, d => this%d, &
348 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
349 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
350 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
351 do ie = 1, this%dof%msh%nelv
352 lbr = this%dof%msh%facet_type(1, ie)
353 rbr = this%dof%msh%facet_type(2, ie)
354 lbs = this%dof%msh%facet_type(3, ie)
355 rbs = this%dof%msh%facet_type(4, ie)
356 lbt = this%dof%msh%facet_type(5, ie)
357 rbt = this%dof%msh%facet_type(6, ie)
363 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
365 lls(ie), lms(ie), lrs(ie), ah, bh, n)
366 if (this%dof%msh%gdim .eq. 3)
then
368 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
372 if (.not. this%dof%msh%gdim .eq. 3)
then
373 eps = 1d-5 * (vlmax(lr(2), nr - 2) + vlmax(ls(2), ns - 2))
377 if (diag .gt. eps)
then
378 d(il, ie) = 1.0_rp / diag
386 eps = 1d-5 * (vlmax(lr(2), nr - 2) + &
387 vlmax(ls(2), ns - 2) + vlmax(lt(2), nt - 2))
391 diag = lr(i) + ls(j) + lt(k)
392 if (diag .gt. eps)
then
393 d(il, ie) = 1.0_rp / diag
407 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
408 integer,
intent(in) :: nl, lbc, rbc, n
409 real(kind=rp),
intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
410 real(kind=rp),
intent(inout) :: ah(0:n, 0:n), bh(0:n)
412 real(kind=rp) :: b(2*(n+3)**2)
420 if (lbc .gt. 0)
call row_zero(s, nl, nl, 1)
421 if (lbc .eq. 1)
call row_zero(s, nl, nl, 2)
422 if (rbc .gt. 0)
call row_zero(s, nl, nl, nl)
423 if (rbc .eq. 1)
call row_zero(s, nl, nl, nl-1)
425 call trsp(s(1, 1, 2), nl, s, nl)
433 integer,
intent(in) :: n, lx
434 real(kind=rp),
intent(inout) :: a(n, n), b(n, n), lam(n)
436 real(kind=rp) :: bw(4*(lx+2)**3)
440 call sygv(a, b, lam, n, lx, bw, lbw)
445 integer,
intent(in) :: n, lx, lbw
446 real(kind=sp),
intent(inout) :: a(n, n), b(n, n), lam(n)
447 real(kind=sp) :: bw(4*(lx+2)**3)
449 call ssygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
453 integer,
intent(in) :: n, lx, lbw
454 real(kind=dp),
intent(inout) :: a(n, n), b(n, n), lam(n)
455 real(kind=dp) :: bw(4*(lx+2)**3)
457 call dsygv(1,
'V',
'U', n, a, n, b, n, lam, bw, lbw, info)
461 integer,
intent(in) :: n, lx, lbw
462 real(kind=qp),
intent(inout) :: a(n, n), b(n, n), lam(n)
463 real(kind=dp) :: a2(n, n), b2(n, n), lam2(n)
464 real(kind=qp) :: bw(4*(lx+2)**3)
465 real(kind=dp) :: bw2(4*(lx+2)**3)
471 call dsygv(1,
'V',
'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
475 if (pe_rank .eq. 0)
then
476 call neko_warning(
'Real precision choice not supported for fdm, ' // &
477 'treating it as double')
483 integer,
intent(in) ::lbc, rbc, n
484 real(kind=rp),
intent(inout) :: a(0:n+2, 0:n+2), ll, lm, lr
485 real(kind=rp),
intent(inout) :: ah(0:n, 0:n)
487 integer :: i, j, i0, i1
490 if (lbc .eq. 1) i0 = 1
492 if (rbc .eq. 1) i1 = n - 1
494 call rzero(a, (n+3) * (n+3))
502 a(i+1, j+1) = fac * ah(i, j)
508 a(0, 0) = fac * ah(n-1, n-1)
509 a(1, 0) = fac * ah(n, n-1)
510 a(0, 1) = fac * ah(n-1, n)
511 a(1, 1) = a(1, 1) + fac * ah(n, n)
518 a(n+1, n+1) = a(n+1, n+1) + fac * ah(0, 0)
519 a(n+2, n+1) = fac * ah(1, 0)
520 a(n+1, n+2) = fac * ah(0, 1)
521 a(n+2, n+2) = fac * ah(1, 1)
529 integer,
intent(in) :: lbc, rbc, n
530 real(kind=rp),
intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
531 real(kind=rp),
intent(inout) :: bh(0:n)
536 if (lbc .eq. 1) i0 = 1
538 if (rbc .eq. 1) i1 = n - 1
540 call rzero(b, (n + 3) * (n + 3))
547 b(i+1, i+1) = fac * bh(i)
552 b(0, 0) = fac * bh(n-1)
553 b(1, 1) = b(1, 1) + fac * bh(n)
560 b(n+1, n+1) = b(n+1, n+1) + fac * bh(0)
561 b(n+2, n+2) = fac * bh(1)
569 class(
fdm_t),
intent(inout) :: this
571 if (
allocated(this%s))
then
575 if (
allocated(this%d))
then
579 if (
allocated(this%len_lr))
then
580 deallocate(this%len_lr)
583 if (
allocated(this%len_ls))
then
584 deallocate(this%len_ls)
587 if (
allocated(this%len_lt))
then
588 deallocate(this%len_lt)
591 if (
allocated(this%len_mr))
then
592 deallocate(this%len_mr)
595 if (
allocated(this%len_ms))
then
596 deallocate(this%len_ms)
599 if (
allocated(this%len_mt))
then
600 deallocate(this%len_mt)
603 if (
allocated(this%len_rr))
then
604 deallocate(this%len_rr)
607 if (
allocated(this%len_rs))
then
608 deallocate(this%len_rs)
611 if (
allocated(this%len_rt))
then
612 deallocate(this%len_rt)
615 if (
allocated(this%swplen))
then
616 deallocate(this%swplen)
624 if (c_associated(this%s_d))
then
625 call device_free(this%s_d)
627 if (c_associated(this%d_d))
then
628 call device_free(this%d_d)
630 if (c_associated(this%swplen_d))
then
631 call device_free(this%swplen_d)
637 class(
fdm_t),
intent(inout) :: this
638 real(kind=rp),
dimension((this%Xh%lx + 2)**3, this%msh%nelv), &
639 intent(inout) :: e, r
640 type(c_ptr),
optional :: stream
643 if (
present(stream))
then
649 if (neko_bcknd_sx .eq. 1)
then
650 call fdm_do_fast_sx(e, r, this%s, this%d, &
651 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
652 else if (neko_bcknd_xsmm .eq. 1)
then
653 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
654 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
655 else if (neko_bcknd_device .eq. 1)
then
656 call fdm_do_fast_device(e, r, this%s, this%d, &
657 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
659 call fdm_do_fast_cpu(e, r, this%s, this%d, &
660 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
integer, public pe_rank
MPI rank.
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Defines a mapping of the degrees of freedom.
Fast diagonalization methods from NEKTON.
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
subroutine, public fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
subroutine, public fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
Fast Diagonalization SX-Aurora backend.
subroutine, public fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
Fast Diagonalization libxsmm backend.
subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
subroutine swap_lengths(this, x, y, z, nelv, gdim)
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
subroutine fdm_init(this, xh, dof, gs_h)
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine fdm_compute(this, e, r, stream)
subroutine fdm_free(this)
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
subroutine, public rzero(a, n)
Zero a real vector.
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_xsmm
integer, parameter, public qp
integer, parameter, public dp
integer, parameter, public sp
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
The function space for the SEM solution fields.