Neko  0.8.99
A portable framework for high-order spectral element flow simulations
fdm.f90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
61 module fdm
62  use neko_config
63  use num_types
64  use speclib
65  use math
66  use mesh
67  use space
68  use dofmap
69  use gather_scatter
70  use fast3d
71  use tensor
72  use fdm_sx
73  use fdm_xsmm
74  use fdm_cpu
75  use fdm_device
76  use device
77  use utils
78  use comm, only : pe_rank
79  use, intrinsic :: iso_c_binding
80  implicit none
81  private
82 
83  type, public :: fdm_t
84  real(kind=rp), allocatable :: s(:,:,:,:)
85  real(kind=rp), allocatable :: d(:,:)
86  type(c_ptr) :: s_d = c_null_ptr
87  type(c_ptr) :: d_d = c_null_ptr
88  real(kind=rp), allocatable :: len_lr(:), len_ls(:), len_lt(:)
89  real(kind=rp), allocatable :: len_mr(:), len_ms(:), len_mt(:)
90  real(kind=rp), allocatable :: len_rr(:), len_rs(:), len_rt(:)
91  real(kind=rp), allocatable :: swplen(:,:,:,:)
92  type(c_ptr) :: swplen_d = c_null_ptr
93  type(space_t), pointer :: xh
94  type(dofmap_t), pointer :: dof
95  type(gs_t), pointer :: gs_h
96  type(mesh_t), pointer :: msh
97  contains
98  procedure, pass(this) :: init => fdm_init
99  procedure, pass(this) :: free => fdm_free
100  procedure, pass(this) :: compute => fdm_compute
101  end type fdm_t
102 
103  interface sygv
104  module procedure sp_sygv, dp_sygv, qp_sygv
105  end interface sygv
106 
107 contains
108 
109  subroutine fdm_init(this, Xh, dm, gs_h)
110  class(fdm_t), intent(inout) :: this
111  type(space_t), target, intent(inout) :: Xh
112  type(dofmap_t), target, intent(inout) :: dm
113  type(gs_t), target, intent(inout) :: gs_h
114  !We only really use ah, bh
115  real(kind=rp), dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
116  real(kind=rp), dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
117  integer :: nl, n, nelv
118 
119  n = xh%lx -1 !Polynomnial degree
120  nl = xh%lx + 2 !Schwarz!
121  nelv = dm%msh%nelv
122  call fdm_free(this)
123  allocate(this%s(nl*nl,2,dm%msh%gdim, dm%msh%nelv))
124  allocate(this%d(nl**3,dm%msh%nelv))
125  allocate(this%swplen(xh%lx, xh%lx, xh%lx,dm%msh%nelv))
126  allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
127  allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
128  allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
129 
130  ! Zeroing here enables easier debugging since then
131  ! MPI messages in GS are deterministic
132  call rzero(this%swplen, xh%lxyz * dm%msh%nelv)
133 
134  if (neko_bcknd_device .eq. 1) then
135  call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv)
136  call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv)
137  call device_map(this%swplen,this%swplen_d, xh%lxyz*dm%msh%nelv)
138  end if
139 
140  call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
141  this%Xh => xh
142  this%dof => dm
143  this%gs_h => gs_h
144  this%msh => dm%msh
145 
146  call swap_lengths(this, dm%x, dm%y, dm%z, dm%msh%nelv, dm%msh%gdim)
147 
148  call fdm_setup_fast(this, ah, bh, nl, n)
149 
150  if (neko_bcknd_device .eq. 1) then
151  call device_memcpy(this%s, this%s_d, &
152  nl*nl*2*dm%msh%gdim*dm%msh%nelv, host_to_device, sync=.false.)
153  call device_memcpy(this%d, this%d_d, &
154  nl**dm%msh%gdim*dm%msh%nelv, host_to_device, sync=.false.)
155  call device_memcpy(this%swplen, this%swplen_d, &
156  xh%lxyz*dm%msh%nelv, host_to_device, sync=.false.)
157  end if
158  end subroutine fdm_init
159 
160  subroutine swap_lengths(this, x, y, z, nelv, gdim)
161  type(fdm_t), intent(inout) :: this
162  integer, intent(in) :: gdim, nelv
163  real(kind=rp), dimension(this%Xh%lxyz,nelv) , intent(in):: x, y, z
164  integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
165 
166  associate(l => this%swplen, xh =>this%Xh, &
167  llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
168  lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
169  lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
170  lx1 = this%Xh%lx
171  n2 = lx1 - 1
172  nz0 = 1
173  nzn = 1
174  nx = lx1 - 2
175  if (gdim .eq. 3) then
176  nz0 = 0
177  nzn = n2
178  end if
179  call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
180  nx, n2, nz0, nzn, nelv, gdim)
181  n = n2 + 1
182  if (gdim .eq. 3) then
183  do e = 1,nelv
184  do j = 2,n2
185  do k = 2,n2
186  l(1,k,j,e) = lmr(e)
187  l(n,k,j,e) = lmr(e)
188  l(k,1,j,e) = lms(e)
189  l(k,n,j,e) = lms(e)
190  l(k,j,1,e) = lmt(e)
191  l(k,j,n,e) = lmt(e)
192  end do
193  end do
194  end do
195  if (neko_bcknd_device .eq. 1) then
196  call device_memcpy(l, this%swplen_d, this%dof%size(), &
197  host_to_device, sync=.false.)
198  call this%gs_h%op(l, this%dof%size(), gs_op_add)
199  call device_memcpy(l, this%swplen_d, this%dof%size(), &
200  device_to_host, sync=.false.)
201  else
202  call this%gs_h%op(l, this%dof%size(), gs_op_add)
203  end if
204 
205  do e = 1,nelv
206  llr(e) = l(1,2,2,e) - lmr(e)
207  lrr(e) = l(n,2,2,e) - lmr(e)
208  lls(e) = l(2,1,2,e) - lms(e)
209  lrs(e) = l(2,n,2,e) - lms(e)
210  llt(e) = l(2,2,1,e) - lmt(e)
211  lrt(e) = l(2,2,n,e) - lmt(e)
212  end do
213  else
214  do e = 1,nelv
215  do j = 2,n2
216  l(1,j,1,e) = lmr(e)
217  l(n,j,1,e) = lmr(e)
218  l(j,1,1,e) = lms(e)
219  l(j,n,1,e) = lms(e)
220  end do
221  end do
222 
223  if (neko_bcknd_device .eq. 1) then
224  call device_memcpy(l, this%swplen_d, this%dof%size(), &
225  host_to_device, sync=.false.)
226  call this%gs_h%op(l, this%dof%size(), gs_op_add)
227  call device_memcpy(l, this%swplen_d, this%dof%size(), &
228  device_to_host, sync=.true.)
229  else
230  call this%gs_h%op(l, this%dof%size(), gs_op_add)
231  end if
232 
233  do e = 1,nelv
234  llr(e) = l(1,2,1,e) - lmr(e)
235  lrr(e) = l(n,2,1,e) - lmr(e)
236  lls(e) = l(2,1,1,e) - lms(e)
237  lrs(e) = l(2,n,1,e) - lms(e)
238  end do
239  end if
240  end associate
241  end subroutine swap_lengths
242 
246  subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, &
247  nx, nxn, nz0, nzn, nelv, gdim)
248  integer, intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
249  real(kind=rp), intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
250  real(kind=rp), intent(inout) :: w(nx)
251  real(kind=rp), intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
252  real(kind=rp), intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
253  real(kind=rp), intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
254  real(kind=rp) :: lr2, ls2, lt2, weight, wsum
255  integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
256  ny = nx
257  nz = nx
258  j1 = i1
259  k1 = i1
260  j2 = i2
261  k2 = i2
262  ! Now, for each element, compute lr,ls,lt between specified planes
263  do ie = 1,nelv
264  if (gdim .eq. 3) then
265  lr2 = 0d0
266  wsum = 0d0
267  do k = 1,nz
268  do j = 1,ny
269  weight = w(j)*w(k)
270  lr2 = lr2 + weight /&
271  ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
272  + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
273  + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
274  wsum = wsum + weight
275  end do
276  end do
277  lr2 = lr2/wsum
278  lr(ie) = 1d0/sqrt(lr2)
279  ls2 = 0d0
280  wsum = 0d0
281  do k = 1,nz
282  do i = 1,nx
283  weight = w(i)*w(k)
284  ls2 = ls2 + weight / &
285  ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
286  + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
287  + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
288  wsum = wsum + weight
289  end do
290  end do
291  ls2 = ls2/wsum
292  ls(ie) = 1d0/sqrt(ls2)
293  lt2 = 0d0
294  wsum = 0d0
295  do j=1,ny
296  do i=1,nx
297  weight = w(i)*w(j)
298  lt2 = lt2 + weight / &
299  ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
300  + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
301  + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
302  wsum = wsum + weight
303  end do
304  end do
305  lt2 = lt2/wsum
306  lt(ie) = 1d0/sqrt(lt2)
307  else ! 2D
308  lr2 = 0d0
309  wsum = 0d0
310  do j=1,ny
311  weight = w(j)
312  lr2 = lr2 + weight / &
313  ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
314  + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
315  wsum = wsum + weight
316  enddo
317  lr2 = lr2/wsum
318  lr(ie) = 1d0/sqrt(lr2)
319  ls2 = 0d0
320  wsum = 0d0
321  do i=1,nx
322  weight = w(i)
323  ls2 = ls2 + weight / &
324  ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
325  + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
326  wsum = wsum + weight
327  enddo
328  ls2 = ls2/wsum
329  ls(ie) = 1d0/sqrt(ls2)
330  endif
331  enddo
332  ie = 1014
333  end subroutine plane_space
334 
336  subroutine fdm_setup_fast(this, ah, bh, nl, n)
337  integer, intent(in) :: nl, n
338  type(fdm_t), intent(inout) :: this
339  real(kind=rp), intent(inout) :: ah(n+1,n+1), bh(n+1)
340  real(kind=rp), dimension(2*this%Xh%lx + 4) :: lr, ls, lt
341  integer :: i, j, k
342  integer :: ie, il, nr, ns, nt
343  integer :: lbr, rbr, lbs, rbs, lbt, rbt
344  real(kind=rp) :: eps, diag
345 
346  associate(s => this%s, d => this%d, &
347  llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
348  lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
349  lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
350  do ie=1,this%dof%msh%nelv
351  lbr = this%dof%msh%facet_type(1, ie)
352  rbr = this%dof%msh%facet_type(2, ie)
353  lbs = this%dof%msh%facet_type(3, ie)
354  rbs = this%dof%msh%facet_type(4, ie)
355  lbt = this%dof%msh%facet_type(5, ie)
356  rbt = this%dof%msh%facet_type(6, ie)
357 
358  nr = nl
359  ns = nl
360  nt = nl
361  call fdm_setup_fast1d(s(1,1,1,ie), lr, nr, lbr, rbr, &
362  llr(ie), lmr(ie), lrr(ie), ah, bh, n)
363  call fdm_setup_fast1d(s(1,1,2,ie), ls, ns, lbs, rbs, &
364  lls(ie), lms(ie), lrs(ie), ah, bh, n)
365  if(this%dof%msh%gdim .eq. 3) then
366  call fdm_setup_fast1d(s(1,1,3,ie), lt, nt, lbt, rbt, &
367  llt(ie), lmt(ie), lrt(ie), ah, bh, n)
368  end if
369 
370  il = 1
371  if(.not. this%dof%msh%gdim .eq. 3) then
372  eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
373  do j = 1, ns
374  do i = 1, nr
375  diag = lr(i) + ls(j)
376  if (diag .gt. eps) then
377  d(il,ie) = 1.0_rp / diag
378  else
379  d(il,ie) = 0.0_rp
380  endif
381  il = il + 1
382  end do
383  end do
384  else
385  eps = 1d-5 * (vlmax(lr(2), nr-2) + &
386  vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
387  do k = 1, nt
388  do j = 1, ns
389  do i = 1, nr
390  diag = lr(i) + ls(j) + lt(k)
391  if (diag .gt. eps) then
392  d(il,ie) = 1.0_rp / diag
393  else
394  d(il,ie) = 0.0_rp
395  endif
396  il = il + 1
397  end do
398  end do
399  end do
400  endif
401  end do
402  end associate
403 
404  end subroutine fdm_setup_fast
405 
406  subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
407  integer, intent(in) :: nl, lbc, rbc, n
408  real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
409  real(kind=rp), intent(inout) :: ah(0:n, 0:n), bh(0:n)
410  integer :: lx1, lxm
411  real(kind=rp) :: b(2*(n+3)**2)
412 
413  lx1 = n + 1
414  lxm = lx1 + 2
415 
416  call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)
417  call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
418  call generalev(s, b, lam, nl, lx1)
419  if(lbc .gt. 0) call row_zero(s, nl, nl, 1)
420  if(lbc .eq. 1) call row_zero(s, nl, nl, 2)
421  if(rbc .gt. 0) call row_zero(s, nl, nl, nl)
422  if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1)
423 
424  call trsp(s(1,1,2), nl, s, nl)
425 
426  end subroutine fdm_setup_fast1d
427 
431  subroutine generalev(a, b, lam, n, lx)
432  integer, intent(in) :: n, lx
433  real(kind=rp), intent(inout) :: a(n,n), b(n,n), lam(n)
434  integer :: lbw, lw
435  real(kind=rp) :: bw(4*(lx+2)**3)
436 
437  lbw = 4*(lx+2)**3
438  lw = n*n
439  call sygv(a, b, lam, n, lx, bw, lbw)
440 
441  end subroutine generalev
442 
443  subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
444  integer, intent(in) :: n, lx, lbw
445  real(kind=sp), intent(inout) :: a(n,n), b(n,n), lam(n)
446  real(kind=sp) :: bw(4*(lx+2)**3)
447  integer :: info = 0
448  call ssygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
449  end subroutine sp_sygv
450 
451  subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
452  integer, intent(in) :: n, lx, lbw
453  real(kind=dp), intent(inout) :: a(n,n), b(n,n), lam(n)
454  real(kind=dp) :: bw(4*(lx+2)**3)
455  integer :: info = 0
456  call dsygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
457  end subroutine dp_sygv
458 
459  subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
460  integer, intent(in) :: n, lx, lbw
461  real(kind=qp), intent(inout) :: a(n,n), b(n,n), lam(n)
462  real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
463  real(kind=qp) :: bw(4*(lx+2)**3)
464  real(kind=dp) :: bw2(4*(lx+2)**3)
465  integer :: info = 0
466 
467  a2 = real(a, dp)
468  b2 = real(b, dp)
469  lam2 = real(lam, dp)
470  call dsygv(1, 'V', 'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
471  a = real(a2, qp)
472  b = real(b2, qp)
473  lam = real(lam2, qp)
474  if (pe_rank .eq. 0) then
475  call neko_warning('Real precision choice not supported for fdm, treating it as double')
476  end if
477 
478  end subroutine qp_sygv
479 
480  subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
481  integer, intent(in) ::lbc, rbc, n
482  real(kind=rp), intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
483  real(kind=rp), intent(inout) :: ah(0:n,0:n)
484  real(kind=rp) :: fac
485  integer :: i, j, i0, i1
486 
487  i0 = 0
488  if(lbc .eq. 1) i0 = 1
489  i1 = n
490  if(rbc .eq. 1) i1 = n - 1
491 
492  call rzero(a, (n+3) * (n+3))
493 
494  fac = 2.0_rp / lm
495  a(1,1) = 1.0_rp
496  a(n+1,n+1) = 1.0-rp
497 
498  do j = i0, i1
499  do i = i0, i1
500  a(i+1,j+1) = fac * ah(i,j)
501  enddo
502  enddo
503 
504  if(lbc .eq. 0) then
505  fac = 2.0_rp / ll
506  a(0,0) = fac * ah(n-1,n-1)
507  a(1,0) = fac * ah(n ,n-1)
508  a(0,1) = fac * ah(n-1,n )
509  a(1,1) = a(1,1) + fac * ah(n,n)
510  else
511  a(0,0) = 1.0_rp
512  endif
513 
514  if(rbc .eq. 0) then
515  fac = 2.0_rp / lr
516  a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
517  a(n+2,n+1) = fac * ah(1,0)
518  a(n+1,n+2) = fac * ah(0,1)
519  a(n+2,n+2) = fac * ah(1,1)
520  else
521  a(n+2,n+2) = 1.0_rp
522  endif
523 
524  end subroutine fdm_setup_fast1d_a
525 
526  subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
527  integer, intent(in) :: lbc, rbc, n
528  real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
529  real(kind=rp), intent(inout) :: bh(0:n)
530  real(kind=rp) :: fac
531  integer :: i, i0, i1
532 
533  i0 = 0
534  if(lbc .eq. 1) i0 = 1
535  i1 = n
536  if(rbc .eq. 1) i1 = n - 1
537 
538  call rzero(b, (n + 3) * (n + 3))
539 
540  fac = 0.5_rp * lm
541  b(1,1) = 1.0_rp
542  b(n+1,n+1) = 1.0_rp
543 
544  do i = i0, i1
545  b(i+1,i+1) = fac * bh(i)
546  end do
547 
548  if(lbc .eq. 0) then
549  fac = 0.5_rp * ll
550  b(0,0) = fac * bh(n-1)
551  b(1,1) = b(1,1) + fac * bh(n)
552  else
553  b(0,0) = 1.0_rp
554  end if
555 
556  if(rbc .eq. 0) then
557  fac = 0.5_rp * lr
558  b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
559  b(n+2,n+2) = fac * bh(1)
560  else
561  b(n+2,n+2) = 1.0_rp
562  end if
563 
564  end subroutine fdm_setup_fast1d_b
565 
566  subroutine fdm_free(this)
567  class(fdm_t), intent(inout) :: this
568 
569  if(allocated(this%s)) then
570  deallocate(this%s)
571  end if
572 
573  if(allocated(this%d)) then
574  deallocate(this%d)
575  end if
576 
577  if(allocated(this%len_lr)) then
578  deallocate(this%len_lr)
579  end if
580 
581  if(allocated(this%len_ls)) then
582  deallocate(this%len_ls)
583  end if
584 
585  if(allocated(this%len_lt)) then
586  deallocate(this%len_lt)
587  end if
588 
589  if(allocated(this%len_mr)) then
590  deallocate(this%len_mr)
591  end if
592 
593  if(allocated(this%len_ms)) then
594  deallocate(this%len_ms)
595  end if
596 
597  if(allocated(this%len_mt)) then
598  deallocate(this%len_mt)
599  end if
600 
601  if(allocated(this%len_rr)) then
602  deallocate(this%len_rr)
603  end if
604 
605  if(allocated(this%len_rs)) then
606  deallocate(this%len_rs)
607  end if
608 
609  if(allocated(this%len_rt)) then
610  deallocate(this%len_rt)
611  end if
612 
613  if(allocated(this%swplen)) then
614  deallocate(this%swplen)
615  end if
616 
617  nullify(this%Xh)
618  nullify(this%dof)
619  nullify(this%gs_h)
620  nullify(this%msh)
621 
622  end subroutine fdm_free
623 
624  subroutine fdm_compute(this, e, r, stream)
625  class(fdm_t), intent(inout) :: this
626  real(kind=rp), dimension((this%Xh%lx+2)**3, this%msh%nelv), intent(inout) :: e, r
627  type(c_ptr), optional :: stream
628  type(c_ptr) :: strm
629 
630  if (present(stream)) then
631  strm = stream
632  else
633  strm = glb_cmd_queue
634  end if
635 
636  if (neko_bcknd_sx .eq. 1) then
637  call fdm_do_fast_sx(e, r, this%s, this%d, &
638  this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
639  else if (neko_bcknd_xsmm .eq. 1) then
640  call fdm_do_fast_xsmm(e, r, this%s, this%d, &
641  this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
642  else if (neko_bcknd_device .eq. 1) then
643  call fdm_do_fast_device(e, r, this%s, this%d, &
644  this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
645  else
646  call fdm_do_fast_cpu(e, r, this%s, this%d, &
647  this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
648  end if
649 
650  end subroutine fdm_compute
651 
652 
653 end module fdm
double real
Definition: device_config.h:12
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Definition: comm.F90:1
integer pe_rank
MPI rank.
Definition: comm.F90:26
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
integer, parameter, public host_to_device
Definition: device.F90:47
integer, parameter, public device_to_host
Definition: device.F90:47
Defines a mapping of the degrees of freedom.
Definition: dofmap.f90:35
Fast diagonalization methods from NEKTON.
Definition: fast3d.f90:61
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Definition: fast3d.f90:168
Fast Diagonalization.
Definition: fdm_cpu.f90:2
Fast Diagonalization SX-Aurora backend.
Definition: fdm_sx.f90:34
Fast Diagonalization libxsmm backend.
Definition: fdm_xsmm.f90:2
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition: fdm.f90:61
subroutine swap_lengths(this, x, y, z, nelv, gdim)
Definition: fdm.f90:161
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
Definition: fdm.f90:337
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
Definition: fdm.f90:460
subroutine fdm_compute(this, e, r, stream)
Definition: fdm.f90:625
subroutine fdm_free(this)
Definition: fdm.f90:567
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
Definition: fdm.f90:452
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
Definition: fdm.f90:432
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
Definition: fdm.f90:407
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition: fdm.f90:527
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
Definition: fdm.f90:248
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition: fdm.f90:481
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
Definition: fdm.f90:444
subroutine fdm_init(this, Xh, dm, gs_h)
Definition: fdm.f90:110
Gather-scatter.
Definition: math.f90:60
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:184
Defines a mesh.
Definition: mesh.f90:34
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition: speclib.f90:148
Tensor operations.
Definition: tensor.f90:61
Utilities.
Definition: utils.f90:35
The function space for the SEM solution fields.
Definition: space.f90:62