Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
fdm.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module fdm
62 use neko_config
63 use num_types
64 use speclib
65 use math
66 use mesh
67 use space
68 use dofmap
70 use fast3d
71 use tensor
72 use fdm_sx
73 use fdm_xsmm
74 use fdm_cpu
75 use fdm_device
76 use device
77 use utils
78 use comm, only : pe_rank
79 use, intrinsic :: iso_c_binding
80 implicit none
81 private
82
83 type, public :: fdm_t
84 real(kind=rp), allocatable :: s(:,:,:,:)
85 real(kind=rp), allocatable :: d(:,:)
86 type(c_ptr) :: s_d = c_null_ptr
87 type(c_ptr) :: d_d = c_null_ptr
88 real(kind=rp), allocatable :: len_lr(:), len_ls(:), len_lt(:)
89 real(kind=rp), allocatable :: len_mr(:), len_ms(:), len_mt(:)
90 real(kind=rp), allocatable :: len_rr(:), len_rs(:), len_rt(:)
91 real(kind=rp), allocatable :: swplen(:,:,:,:)
92 type(c_ptr) :: swplen_d = c_null_ptr
93 type(space_t), pointer :: xh
94 type(dofmap_t), pointer :: dof
95 type(gs_t), pointer :: gs_h
96 type(mesh_t), pointer :: msh
97 contains
98 procedure, pass(this) :: init => fdm_init
99 procedure, pass(this) :: free => fdm_free
100 procedure, pass(this) :: compute => fdm_compute
101 end type fdm_t
102
103 interface sygv
104 module procedure sp_sygv, dp_sygv, qp_sygv
105 end interface sygv
106
107contains
108
109 subroutine fdm_init(this, Xh, dof, gs_h)
110 class(fdm_t), intent(inout) :: this
111 type(space_t), target, intent(inout) :: Xh
112 type(dofmap_t), target, intent(in) :: dof
113 type(gs_t), target, intent(inout) :: gs_h
114 !We only really use ah, bh
115 real(kind=rp), dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
116 real(kind=rp), dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
117 integer :: nl, n, nelv
118
119 n = xh%lx -1 !Polynomnial degree
120 nl = xh%lx + 2 !Schwarz!
121 nelv = dof%msh%nelv
122 call fdm_free(this)
123 allocate(this%s(nl*nl,2,dof%msh%gdim, dof%msh%nelv))
124 allocate(this%d(nl**3,dof%msh%nelv))
125 allocate(this%swplen(xh%lx, xh%lx, xh%lx,dof%msh%nelv))
126 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
127 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
128 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
129
130 ! Zeroing here enables easier debugging since then
131 ! MPI messages in GS are deterministic
132 call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
133
134 if (neko_bcknd_device .eq. 1) then
135 call device_map(this%s, this%s_d,nl*nl*2*dof%msh%gdim*dof%msh%nelv)
136 call device_map(this%d, this%d_d,nl**dof%msh%gdim*dof%msh%nelv)
137 call device_map(this%swplen,this%swplen_d, xh%lxyz*dof%msh%nelv)
138 end if
139
140 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
141 this%Xh => xh
142 this%dof => dof
143 this%gs_h => gs_h
144 this%msh => dof%msh
145
146 call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
147
148 call fdm_setup_fast(this, ah, bh, nl, n)
149
150 if (neko_bcknd_device .eq. 1) then
151 call device_memcpy(this%s, this%s_d, &
152 nl*nl*2*dof%msh%gdim*dof%msh%nelv, host_to_device, sync=.false.)
153 call device_memcpy(this%d, this%d_d, &
154 nl**dof%msh%gdim*dof%msh%nelv, host_to_device, sync=.false.)
155 call device_memcpy(this%swplen, this%swplen_d, &
156 xh%lxyz*dof%msh%nelv, host_to_device, sync=.false.)
157 end if
158 end subroutine fdm_init
159
160 subroutine swap_lengths(this, x, y, z, nelv, gdim)
161 type(fdm_t), intent(inout) :: this
162 integer, intent(in) :: gdim, nelv
163 real(kind=rp), dimension(this%Xh%lxyz,nelv) , intent(in):: x, y, z
164 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
165
166 associate(l => this%swplen, xh =>this%Xh, &
167 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
168 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
169 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
170 lx1 = this%Xh%lx
171 n2 = lx1 - 1
172 nz0 = 1
173 nzn = 1
174 nx = lx1 - 2
175 if (gdim .eq. 3) then
176 nz0 = 0
177 nzn = n2
178 end if
179 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
180 nx, n2, nz0, nzn, nelv, gdim)
181 n = n2 + 1
182 if (gdim .eq. 3) then
183 do e = 1,nelv
184 do j = 2,n2
185 do k = 2,n2
186 l(1,k,j,e) = lmr(e)
187 l(n,k,j,e) = lmr(e)
188 l(k,1,j,e) = lms(e)
189 l(k,n,j,e) = lms(e)
190 l(k,j,1,e) = lmt(e)
191 l(k,j,n,e) = lmt(e)
192 end do
193 end do
194 end do
195 if (neko_bcknd_device .eq. 1) then
196 call device_memcpy(l, this%swplen_d, this%dof%size(), &
197 host_to_device, sync=.false.)
198 call this%gs_h%op(l, this%dof%size(), gs_op_add)
199 call device_memcpy(l, this%swplen_d, this%dof%size(), &
200 device_to_host, sync=.false.)
201 else
202 call this%gs_h%op(l, this%dof%size(), gs_op_add)
203 end if
204
205 do e = 1,nelv
206 llr(e) = l(1,2,2,e) - lmr(e)
207 lrr(e) = l(n,2,2,e) - lmr(e)
208 lls(e) = l(2,1,2,e) - lms(e)
209 lrs(e) = l(2,n,2,e) - lms(e)
210 llt(e) = l(2,2,1,e) - lmt(e)
211 lrt(e) = l(2,2,n,e) - lmt(e)
212 end do
213 else
214 do e = 1,nelv
215 do j = 2,n2
216 l(1,j,1,e) = lmr(e)
217 l(n,j,1,e) = lmr(e)
218 l(j,1,1,e) = lms(e)
219 l(j,n,1,e) = lms(e)
220 end do
221 end do
222
223 if (neko_bcknd_device .eq. 1) then
224 call device_memcpy(l, this%swplen_d, this%dof%size(), &
225 host_to_device, sync=.false.)
226 call this%gs_h%op(l, this%dof%size(), gs_op_add)
227 call device_memcpy(l, this%swplen_d, this%dof%size(), &
228 device_to_host, sync=.true.)
229 else
230 call this%gs_h%op(l, this%dof%size(), gs_op_add)
231 end if
232
233 do e = 1,nelv
234 llr(e) = l(1,2,1,e) - lmr(e)
235 lrr(e) = l(n,2,1,e) - lmr(e)
236 lls(e) = l(2,1,1,e) - lms(e)
237 lrs(e) = l(2,n,1,e) - lms(e)
238 end do
239 end if
240 end associate
241 end subroutine swap_lengths
242
246 subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, &
247 nx, nxn, nz0, nzn, nelv, gdim)
248 integer, intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
249 real(kind=rp), intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
250 real(kind=rp), intent(inout) :: w(nx)
251 real(kind=rp), intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
252 real(kind=rp), intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
253 real(kind=rp), intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
254 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
255 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
256 ny = nx
257 nz = nx
258 j1 = i1
259 k1 = i1
260 j2 = i2
261 k2 = i2
262 ! Now, for each element, compute lr,ls,lt between specified planes
263 do ie = 1,nelv
264 if (gdim .eq. 3) then
265 lr2 = 0d0
266 wsum = 0d0
267 do k = 1,nz
268 do j = 1,ny
269 weight = w(j)*w(k)
270 lr2 = lr2 + weight /&
271 ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
272 + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
273 + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
274 wsum = wsum + weight
275 end do
276 end do
277 lr2 = lr2/wsum
278 lr(ie) = 1d0/sqrt(lr2)
279 ls2 = 0d0
280 wsum = 0d0
281 do k = 1,nz
282 do i = 1,nx
283 weight = w(i)*w(k)
284 ls2 = ls2 + weight / &
285 ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
286 + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
287 + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
288 wsum = wsum + weight
289 end do
290 end do
291 ls2 = ls2/wsum
292 ls(ie) = 1d0/sqrt(ls2)
293 lt2 = 0d0
294 wsum = 0d0
295 do j=1,ny
296 do i=1,nx
297 weight = w(i)*w(j)
298 lt2 = lt2 + weight / &
299 ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
300 + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
301 + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
302 wsum = wsum + weight
303 end do
304 end do
305 lt2 = lt2/wsum
306 lt(ie) = 1d0/sqrt(lt2)
307 else ! 2D
308 lr2 = 0d0
309 wsum = 0d0
310 do j=1,ny
311 weight = w(j)
312 lr2 = lr2 + weight / &
313 ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
314 + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
315 wsum = wsum + weight
316 enddo
317 lr2 = lr2/wsum
318 lr(ie) = 1d0/sqrt(lr2)
319 ls2 = 0d0
320 wsum = 0d0
321 do i=1,nx
322 weight = w(i)
323 ls2 = ls2 + weight / &
324 ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
325 + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
326 wsum = wsum + weight
327 enddo
328 ls2 = ls2/wsum
329 ls(ie) = 1d0/sqrt(ls2)
330 endif
331 enddo
332 ie = 1014
333 end subroutine plane_space
334
336 subroutine fdm_setup_fast(this, ah, bh, nl, n)
337 integer, intent(in) :: nl, n
338 type(fdm_t), intent(inout) :: this
339 real(kind=rp), intent(inout) :: ah(n+1,n+1), bh(n+1)
340 real(kind=rp), dimension(2*this%Xh%lx + 4) :: lr, ls, lt
341 integer :: i, j, k
342 integer :: ie, il, nr, ns, nt
343 integer :: lbr, rbr, lbs, rbs, lbt, rbt
344 real(kind=rp) :: eps, diag
345
346 associate(s => this%s, d => this%d, &
347 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
348 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
349 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
350 do ie=1,this%dof%msh%nelv
351 lbr = this%dof%msh%facet_type(1, ie)
352 rbr = this%dof%msh%facet_type(2, ie)
353 lbs = this%dof%msh%facet_type(3, ie)
354 rbs = this%dof%msh%facet_type(4, ie)
355 lbt = this%dof%msh%facet_type(5, ie)
356 rbt = this%dof%msh%facet_type(6, ie)
357
358 nr = nl
359 ns = nl
360 nt = nl
361 call fdm_setup_fast1d(s(1,1,1,ie), lr, nr, lbr, rbr, &
362 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
363 call fdm_setup_fast1d(s(1,1,2,ie), ls, ns, lbs, rbs, &
364 lls(ie), lms(ie), lrs(ie), ah, bh, n)
365 if(this%dof%msh%gdim .eq. 3) then
366 call fdm_setup_fast1d(s(1,1,3,ie), lt, nt, lbt, rbt, &
367 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
368 end if
369
370 il = 1
371 if(.not. this%dof%msh%gdim .eq. 3) then
372 eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
373 do j = 1, ns
374 do i = 1, nr
375 diag = lr(i) + ls(j)
376 if (diag .gt. eps) then
377 d(il,ie) = 1.0_rp / diag
378 else
379 d(il,ie) = 0.0_rp
380 endif
381 il = il + 1
382 end do
383 end do
384 else
385 eps = 1d-5 * (vlmax(lr(2), nr-2) + &
386 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
387 do k = 1, nt
388 do j = 1, ns
389 do i = 1, nr
390 diag = lr(i) + ls(j) + lt(k)
391 if (diag .gt. eps) then
392 d(il,ie) = 1.0_rp / diag
393 else
394 d(il,ie) = 0.0_rp
395 endif
396 il = il + 1
397 end do
398 end do
399 end do
400 endif
401 end do
402 end associate
403
404 end subroutine fdm_setup_fast
405
406 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
407 integer, intent(in) :: nl, lbc, rbc, n
408 real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
409 real(kind=rp), intent(inout) :: ah(0:n, 0:n), bh(0:n)
410 integer :: lx1, lxm
411 real(kind=rp) :: b(2*(n+3)**2)
412
413 lx1 = n + 1
414 lxm = lx1 + 2
415
416 call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)
417 call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
418 call generalev(s, b, lam, nl, lx1)
419 if(lbc .gt. 0) call row_zero(s, nl, nl, 1)
420 if(lbc .eq. 1) call row_zero(s, nl, nl, 2)
421 if(rbc .gt. 0) call row_zero(s, nl, nl, nl)
422 if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1)
423
424 call trsp(s(1,1,2), nl, s, nl)
425
426 end subroutine fdm_setup_fast1d
427
431 subroutine generalev(a, b, lam, n, lx)
432 integer, intent(in) :: n, lx
433 real(kind=rp), intent(inout) :: a(n,n), b(n,n), lam(n)
434 integer :: lbw, lw
435 real(kind=rp) :: bw(4*(lx+2)**3)
436
437 lbw = 4*(lx+2)**3
438 lw = n*n
439 call sygv(a, b, lam, n, lx, bw, lbw)
440
441 end subroutine generalev
442
443 subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
444 integer, intent(in) :: n, lx, lbw
445 real(kind=sp), intent(inout) :: a(n,n), b(n,n), lam(n)
446 real(kind=sp) :: bw(4*(lx+2)**3)
447 integer :: info = 0
448 call ssygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
449 end subroutine sp_sygv
450
451 subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
452 integer, intent(in) :: n, lx, lbw
453 real(kind=dp), intent(inout) :: a(n,n), b(n,n), lam(n)
454 real(kind=dp) :: bw(4*(lx+2)**3)
455 integer :: info = 0
456 call dsygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
457 end subroutine dp_sygv
458
459 subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
460 integer, intent(in) :: n, lx, lbw
461 real(kind=qp), intent(inout) :: a(n,n), b(n,n), lam(n)
462 real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
463 real(kind=qp) :: bw(4*(lx+2)**3)
464 real(kind=dp) :: bw2(4*(lx+2)**3)
465 integer :: info = 0
466
467 a2 = real(a, dp)
468 b2 = real(b, dp)
469 lam2 = real(lam, dp)
470 call dsygv(1, 'V', 'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
471 a = real(a2, qp)
472 b = real(b2, qp)
473 lam = real(lam2, qp)
474 if (pe_rank .eq. 0) then
475 call neko_warning('Real precision choice not supported for fdm, treating it as double')
476 end if
477
478 end subroutine qp_sygv
479
480 subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
481 integer, intent(in) ::lbc, rbc, n
482 real(kind=rp), intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
483 real(kind=rp), intent(inout) :: ah(0:n,0:n)
484 real(kind=rp) :: fac
485 integer :: i, j, i0, i1
486
487 i0 = 0
488 if(lbc .eq. 1) i0 = 1
489 i1 = n
490 if(rbc .eq. 1) i1 = n - 1
491
492 call rzero(a, (n+3) * (n+3))
493
494 fac = 2.0_rp / lm
495 a(1,1) = 1.0_rp
496 a(n+1,n+1) = 1.0-rp
497
498 do j = i0, i1
499 do i = i0, i1
500 a(i+1,j+1) = fac * ah(i,j)
501 enddo
502 enddo
503
504 if(lbc .eq. 0) then
505 fac = 2.0_rp / ll
506 a(0,0) = fac * ah(n-1,n-1)
507 a(1,0) = fac * ah(n ,n-1)
508 a(0,1) = fac * ah(n-1,n )
509 a(1,1) = a(1,1) + fac * ah(n,n)
510 else
511 a(0,0) = 1.0_rp
512 endif
513
514 if(rbc .eq. 0) then
515 fac = 2.0_rp / lr
516 a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
517 a(n+2,n+1) = fac * ah(1,0)
518 a(n+1,n+2) = fac * ah(0,1)
519 a(n+2,n+2) = fac * ah(1,1)
520 else
521 a(n+2,n+2) = 1.0_rp
522 endif
523
524 end subroutine fdm_setup_fast1d_a
525
526 subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
527 integer, intent(in) :: lbc, rbc, n
528 real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
529 real(kind=rp), intent(inout) :: bh(0:n)
530 real(kind=rp) :: fac
531 integer :: i, i0, i1
532
533 i0 = 0
534 if(lbc .eq. 1) i0 = 1
535 i1 = n
536 if(rbc .eq. 1) i1 = n - 1
537
538 call rzero(b, (n + 3) * (n + 3))
539
540 fac = 0.5_rp * lm
541 b(1,1) = 1.0_rp
542 b(n+1,n+1) = 1.0_rp
543
544 do i = i0, i1
545 b(i+1,i+1) = fac * bh(i)
546 end do
547
548 if(lbc .eq. 0) then
549 fac = 0.5_rp * ll
550 b(0,0) = fac * bh(n-1)
551 b(1,1) = b(1,1) + fac * bh(n)
552 else
553 b(0,0) = 1.0_rp
554 end if
555
556 if(rbc .eq. 0) then
557 fac = 0.5_rp * lr
558 b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
559 b(n+2,n+2) = fac * bh(1)
560 else
561 b(n+2,n+2) = 1.0_rp
562 end if
563
564 end subroutine fdm_setup_fast1d_b
565
566 subroutine fdm_free(this)
567 class(fdm_t), intent(inout) :: this
568
569 if(allocated(this%s)) then
570 deallocate(this%s)
571 end if
572
573 if(allocated(this%d)) then
574 deallocate(this%d)
575 end if
576
577 if(allocated(this%len_lr)) then
578 deallocate(this%len_lr)
579 end if
580
581 if(allocated(this%len_ls)) then
582 deallocate(this%len_ls)
583 end if
584
585 if(allocated(this%len_lt)) then
586 deallocate(this%len_lt)
587 end if
588
589 if(allocated(this%len_mr)) then
590 deallocate(this%len_mr)
591 end if
592
593 if(allocated(this%len_ms)) then
594 deallocate(this%len_ms)
595 end if
596
597 if(allocated(this%len_mt)) then
598 deallocate(this%len_mt)
599 end if
600
601 if(allocated(this%len_rr)) then
602 deallocate(this%len_rr)
603 end if
604
605 if(allocated(this%len_rs)) then
606 deallocate(this%len_rs)
607 end if
608
609 if(allocated(this%len_rt)) then
610 deallocate(this%len_rt)
611 end if
612
613 if(allocated(this%swplen)) then
614 deallocate(this%swplen)
615 end if
616
617 nullify(this%Xh)
618 nullify(this%dof)
619 nullify(this%gs_h)
620 nullify(this%msh)
621
622 end subroutine fdm_free
623
624 subroutine fdm_compute(this, e, r, stream)
625 class(fdm_t), intent(inout) :: this
626 real(kind=rp), dimension((this%Xh%lx+2)**3, this%msh%nelv), intent(inout) :: e, r
627 type(c_ptr), optional :: stream
628 type(c_ptr) :: strm
629
630 if (present(stream)) then
631 strm = stream
632 else
633 strm = glb_cmd_queue
634 end if
635
636 if (neko_bcknd_sx .eq. 1) then
637 call fdm_do_fast_sx(e, r, this%s, this%d, &
638 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
639 else if (neko_bcknd_xsmm .eq. 1) then
640 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
641 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
642 else if (neko_bcknd_device .eq. 1) then
643 call fdm_do_fast_device(e, r, this%s, this%d, &
644 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
645 else
646 call fdm_do_fast_cpu(e, r, this%s, this%d, &
647 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
648 end if
649
650 end subroutine fdm_compute
651
652
653end module fdm
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:28
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Definition fast3d.f90:168
Fast Diagonalization.
Definition fdm_cpu.f90:2
Fast Diagonalization SX-Aurora backend.
Definition fdm_sx.f90:34
Fast Diagonalization libxsmm backend.
Definition fdm_xsmm.f90:2
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
subroutine swap_lengths(this, x, y, z, nelv, gdim)
Definition fdm.f90:161
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
Definition fdm.f90:337
subroutine fdm_init(this, xh, dof, gs_h)
Definition fdm.f90:110
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:460
subroutine fdm_compute(this, e, r, stream)
Definition fdm.f90:625
subroutine fdm_free(this)
Definition fdm.f90:567
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:452
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
Definition fdm.f90:432
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
Definition fdm.f90:407
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition fdm.f90:527
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
Definition fdm.f90:248
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition fdm.f90:481
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:444
Gather-scatter.
Definition math.f90:60
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148
Tensor operations.
Definition tensor.f90:61
Utilities.
Definition utils.f90:35
The function space for the SEM solution fields.
Definition space.f90:62