Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
fdm.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module fdm
62 use neko_config
63 use num_types, only : rp, sp, dp, qp
64 use mesh, only : mesh_t
65 use space, only : space_t
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
68 use fdm_cpu, only : fdm_do_fast_cpu
70 use fdm_sx, only : fdm_do_fast_sx
71 use fdm_xsmm, only : fdm_do_fast_xsmm
72 use utils, only : neko_error, neko_warning
73 use comm, only : pe_rank
74 use math, only : vlmax
77 use fast3d, only : semhat
78 use tensor, only : trsp
79 use math, only : rzero, row_zero
80 use, intrinsic :: iso_c_binding
81 implicit none
82 private
83
84 type, public :: fdm_t
85 real(kind=rp), allocatable :: s(:,:,:,:)
86 real(kind=rp), allocatable :: d(:,:)
87 type(c_ptr) :: s_d = c_null_ptr
88 type(c_ptr) :: d_d = c_null_ptr
89 real(kind=rp), allocatable :: len_lr(:), len_ls(:), len_lt(:)
90 real(kind=rp), allocatable :: len_mr(:), len_ms(:), len_mt(:)
91 real(kind=rp), allocatable :: len_rr(:), len_rs(:), len_rt(:)
92 real(kind=rp), allocatable :: swplen(:,:,:,:)
93 type(c_ptr) :: swplen_d = c_null_ptr
94 type(space_t), pointer :: xh => null()
95 type(dofmap_t), pointer :: dof => null()
96 type(gs_t), pointer :: gs_h => null()
97 type(mesh_t), pointer :: msh => null()
98 contains
99 procedure, pass(this) :: init => fdm_init
100 procedure, pass(this) :: free => fdm_free
101 procedure, pass(this) :: compute => fdm_compute
102 end type fdm_t
103
104 interface sygv
105 module procedure sp_sygv, dp_sygv, qp_sygv
106 end interface sygv
107
108contains
109
110 subroutine fdm_init(this, Xh, dof, gs_h)
111 class(fdm_t), intent(inout) :: this
112 type(space_t), target, intent(inout) :: Xh
113 type(dofmap_t), target, intent(in) :: dof
114 type(gs_t), target, intent(inout) :: gs_h
115 !We only really use ah, bh
116 real(kind=rp), dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
117 real(kind=rp), dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
118 integer :: nl, n, nelv
119
120 n = xh%lx -1 !Polynomnial degree
121 nl = xh%lx + 2 !Schwarz!
122 nelv = dof%msh%nelv
123 call fdm_free(this)
124 allocate(this%s(nl*nl,2,dof%msh%gdim, dof%msh%nelv))
125 allocate(this%d(nl**3,dof%msh%nelv))
126 allocate(this%swplen(xh%lx, xh%lx, xh%lx,dof%msh%nelv))
127 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
128 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
129 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
130
131 ! Zeroing here enables easier debugging since then
132 ! MPI messages in GS are deterministic
133 call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
134
135 if (neko_bcknd_device .eq. 1) then
136 call device_map(this%s, this%s_d,nl*nl*2*dof%msh%gdim*dof%msh%nelv)
137 call device_map(this%d, this%d_d,nl**dof%msh%gdim*dof%msh%nelv)
138 call device_map(this%swplen,this%swplen_d, xh%lxyz*dof%msh%nelv)
139 end if
140
141 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
142 this%Xh => xh
143 this%dof => dof
144 this%gs_h => gs_h
145 this%msh => dof%msh
146
147 call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
148
149 call fdm_setup_fast(this, ah, bh, nl, n)
150
151 if (neko_bcknd_device .eq. 1) then
152 call device_memcpy(this%s, this%s_d, &
153 nl*nl*2*dof%msh%gdim*dof%msh%nelv, host_to_device, sync=.false.)
154 call device_memcpy(this%d, this%d_d, &
155 nl**dof%msh%gdim*dof%msh%nelv, host_to_device, sync=.false.)
156 call device_memcpy(this%swplen, this%swplen_d, &
157 xh%lxyz*dof%msh%nelv, host_to_device, sync=.false.)
158 end if
159 end subroutine fdm_init
160
161 subroutine swap_lengths(this, x, y, z, nelv, gdim)
162 type(fdm_t), intent(inout) :: this
163 integer, intent(in) :: gdim, nelv
164 real(kind=rp), dimension(this%Xh%lxyz,nelv) , intent(in):: x, y, z
165 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
166
167 associate(l => this%swplen, xh =>this%Xh, &
168 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
169 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
170 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
171 lx1 = this%Xh%lx
172 n2 = lx1 - 1
173 nz0 = 1
174 nzn = 1
175 nx = lx1 - 2
176 if (gdim .eq. 3) then
177 nz0 = 0
178 nzn = n2
179 end if
180 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z,&
181 nx, n2, nz0, nzn, nelv, gdim)
182 n = n2 + 1
183 if (gdim .eq. 3) then
184 do e = 1,nelv
185 do j = 2,n2
186 do k = 2,n2
187 l(1,k,j,e) = lmr(e)
188 l(n,k,j,e) = lmr(e)
189 l(k,1,j,e) = lms(e)
190 l(k,n,j,e) = lms(e)
191 l(k,j,1,e) = lmt(e)
192 l(k,j,n,e) = lmt(e)
193 end do
194 end do
195 end do
196 if (neko_bcknd_device .eq. 1) then
197 call device_memcpy(l, this%swplen_d, this%dof%size(), &
198 host_to_device, sync=.false.)
199 call this%gs_h%op(l, this%dof%size(), gs_op_add)
200 call device_memcpy(l, this%swplen_d, this%dof%size(), &
201 device_to_host, sync=.true.)
202 else
203 call this%gs_h%op(l, this%dof%size(), gs_op_add)
204 end if
205
206 do e = 1,nelv
207 llr(e) = l(1,2,2,e) - lmr(e)
208 lrr(e) = l(n,2,2,e) - lmr(e)
209 lls(e) = l(2,1,2,e) - lms(e)
210 lrs(e) = l(2,n,2,e) - lms(e)
211 llt(e) = l(2,2,1,e) - lmt(e)
212 lrt(e) = l(2,2,n,e) - lmt(e)
213 end do
214 else
215 do e = 1,nelv
216 do j = 2,n2
217 l(1,j,1,e) = lmr(e)
218 l(n,j,1,e) = lmr(e)
219 l(j,1,1,e) = lms(e)
220 l(j,n,1,e) = lms(e)
221 end do
222 end do
223
224 if (neko_bcknd_device .eq. 1) then
225 call device_memcpy(l, this%swplen_d, this%dof%size(), &
226 host_to_device, sync=.false.)
227 call this%gs_h%op(l, this%dof%size(), gs_op_add)
228 call device_memcpy(l, this%swplen_d, this%dof%size(), &
229 device_to_host, sync=.true.)
230 else
231 call this%gs_h%op(l, this%dof%size(), gs_op_add)
232 end if
233
234 do e = 1,nelv
235 llr(e) = l(1,2,1,e) - lmr(e)
236 lrr(e) = l(n,2,1,e) - lmr(e)
237 lls(e) = l(2,1,1,e) - lms(e)
238 lrs(e) = l(2,n,1,e) - lms(e)
239 end do
240 end if
241 end associate
242 end subroutine swap_lengths
243
247 subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, &
248 nx, nxn, nz0, nzn, nelv, gdim)
249 integer, intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
250 real(kind=rp), intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
251 real(kind=rp), intent(inout) :: w(nx)
252 real(kind=rp), intent(in) :: x(0:nxn,0:nxn,nz0:nzn,nelv)
253 real(kind=rp), intent(in) :: y(0:nxn,0:nxn,nz0:nzn,nelv)
254 real(kind=rp), intent(in) :: z(0:nxn,0:nxn,nz0:nzn,nelv)
255 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
256 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
257 ny = nx
258 nz = nx
259 j1 = i1
260 k1 = i1
261 j2 = i2
262 k2 = i2
263 ! Now, for each element, compute lr,ls,lt between specified planes
264 do ie = 1,nelv
265 if (gdim .eq. 3) then
266 lr2 = 0d0
267 wsum = 0d0
268 do k = 1,nz
269 do j = 1,ny
270 weight = w(j)*w(k)
271 lr2 = lr2 + weight /&
272 ( (x(i2,j,k,ie)-x(i1,j,k,ie))**2&
273 + (y(i2,j,k,ie)-y(i1,j,k,ie))**2&
274 + (z(i2,j,k,ie)-z(i1,j,k,ie))**2 )
275 wsum = wsum + weight
276 end do
277 end do
278 lr2 = lr2/wsum
279 lr(ie) = 1d0/sqrt(lr2)
280 ls2 = 0d0
281 wsum = 0d0
282 do k = 1,nz
283 do i = 1,nx
284 weight = w(i)*w(k)
285 ls2 = ls2 + weight / &
286 ( (x(i,j2,k,ie)-x(i,j1,k,ie))**2 &
287 + (y(i,j2,k,ie)-y(i,j1,k,ie))**2 &
288 + (z(i,j2,k,ie)-z(i,j1,k,ie))**2 )
289 wsum = wsum + weight
290 end do
291 end do
292 ls2 = ls2/wsum
293 ls(ie) = 1d0/sqrt(ls2)
294 lt2 = 0d0
295 wsum = 0d0
296 do j=1,ny
297 do i=1,nx
298 weight = w(i)*w(j)
299 lt2 = lt2 + weight / &
300 ( (x(i,j,k2,ie)-x(i,j,k1,ie))**2 &
301 + (y(i,j,k2,ie)-y(i,j,k1,ie))**2 &
302 + (z(i,j,k2,ie)-z(i,j,k1,ie))**2 )
303 wsum = wsum + weight
304 end do
305 end do
306 lt2 = lt2/wsum
307 lt(ie) = 1d0/sqrt(lt2)
308 else ! 2D
309 lr2 = 0d0
310 wsum = 0d0
311 do j=1,ny
312 weight = w(j)
313 lr2 = lr2 + weight / &
314 ( (x(i2,j,1,ie)-x(i1,j,1,ie))**2 &
315 + (y(i2,j,1,ie)-y(i1,j,1,ie))**2 )
316 wsum = wsum + weight
317 enddo
318 lr2 = lr2/wsum
319 lr(ie) = 1d0/sqrt(lr2)
320 ls2 = 0d0
321 wsum = 0d0
322 do i=1,nx
323 weight = w(i)
324 ls2 = ls2 + weight / &
325 ( (x(i,j2,1,ie)-x(i,j1,1,ie))**2 &
326 + (y(i,j2,1,ie)-y(i,j1,1,ie))**2 )
327 wsum = wsum + weight
328 enddo
329 ls2 = ls2/wsum
330 ls(ie) = 1d0/sqrt(ls2)
331 endif
332 enddo
333 ie = 1014
334 end subroutine plane_space
335
337 subroutine fdm_setup_fast(this, ah, bh, nl, n)
338 integer, intent(in) :: nl, n
339 type(fdm_t), intent(inout) :: this
340 real(kind=rp), intent(inout) :: ah(n+1,n+1), bh(n+1)
341 real(kind=rp), dimension(2*this%Xh%lx + 4) :: lr, ls, lt
342 integer :: i, j, k
343 integer :: ie, il, nr, ns, nt
344 integer :: lbr, rbr, lbs, rbs, lbt, rbt
345 real(kind=rp) :: eps, diag
346
347 associate(s => this%s, d => this%d, &
348 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
349 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
350 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
351 do ie=1,this%dof%msh%nelv
352 lbr = this%dof%msh%facet_type(1, ie)
353 rbr = this%dof%msh%facet_type(2, ie)
354 lbs = this%dof%msh%facet_type(3, ie)
355 rbs = this%dof%msh%facet_type(4, ie)
356 lbt = this%dof%msh%facet_type(5, ie)
357 rbt = this%dof%msh%facet_type(6, ie)
358
359 nr = nl
360 ns = nl
361 nt = nl
362 call fdm_setup_fast1d(s(1,1,1,ie), lr, nr, lbr, rbr, &
363 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
364 call fdm_setup_fast1d(s(1,1,2,ie), ls, ns, lbs, rbs, &
365 lls(ie), lms(ie), lrs(ie), ah, bh, n)
366 if(this%dof%msh%gdim .eq. 3) then
367 call fdm_setup_fast1d(s(1,1,3,ie), lt, nt, lbt, rbt, &
368 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
369 end if
370
371 il = 1
372 if(.not. this%dof%msh%gdim .eq. 3) then
373 eps = 1d-5 * (vlmax(lr(2), nr-2) + vlmax(ls(2), ns-2))
374 do j = 1, ns
375 do i = 1, nr
376 diag = lr(i) + ls(j)
377 if (diag .gt. eps) then
378 d(il,ie) = 1.0_rp / diag
379 else
380 d(il,ie) = 0.0_rp
381 endif
382 il = il + 1
383 end do
384 end do
385 else
386 eps = 1d-5 * (vlmax(lr(2), nr-2) + &
387 vlmax(ls(2),ns-2) + vlmax(lt(2), nt-2))
388 do k = 1, nt
389 do j = 1, ns
390 do i = 1, nr
391 diag = lr(i) + ls(j) + lt(k)
392 if (diag .gt. eps) then
393 d(il,ie) = 1.0_rp / diag
394 else
395 d(il,ie) = 0.0_rp
396 endif
397 il = il + 1
398 end do
399 end do
400 end do
401 endif
402 end do
403 end associate
404
405 end subroutine fdm_setup_fast
406
407 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
408 integer, intent(in) :: nl, lbc, rbc, n
409 real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
410 real(kind=rp), intent(inout) :: ah(0:n, 0:n), bh(0:n)
411 integer :: lx1, lxm
412 real(kind=rp) :: b(2*(n+3)**2)
413
414 lx1 = n + 1
415 lxm = lx1 + 2
416
417 call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)
418 call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
419 call generalev(s, b, lam, nl, lx1)
420 if(lbc .gt. 0) call row_zero(s, nl, nl, 1)
421 if(lbc .eq. 1) call row_zero(s, nl, nl, 2)
422 if(rbc .gt. 0) call row_zero(s, nl, nl, nl)
423 if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1)
424
425 call trsp(s(1,1,2), nl, s, nl)
426
427 end subroutine fdm_setup_fast1d
428
432 subroutine generalev(a, b, lam, n, lx)
433 integer, intent(in) :: n, lx
434 real(kind=rp), intent(inout) :: a(n,n), b(n,n), lam(n)
435 integer :: lbw, lw
436 real(kind=rp) :: bw(4*(lx+2)**3)
437
438 lbw = 4*(lx+2)**3
439 lw = n*n
440 call sygv(a, b, lam, n, lx, bw, lbw)
441
442 end subroutine generalev
443
444 subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
445 integer, intent(in) :: n, lx, lbw
446 real(kind=sp), intent(inout) :: a(n,n), b(n,n), lam(n)
447 real(kind=sp) :: bw(4*(lx+2)**3)
448 integer :: info = 0
449 call ssygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
450 end subroutine sp_sygv
451
452 subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
453 integer, intent(in) :: n, lx, lbw
454 real(kind=dp), intent(inout) :: a(n,n), b(n,n), lam(n)
455 real(kind=dp) :: bw(4*(lx+2)**3)
456 integer :: info = 0
457 call dsygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
458 end subroutine dp_sygv
459
460 subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
461 integer, intent(in) :: n, lx, lbw
462 real(kind=qp), intent(inout) :: a(n,n), b(n,n), lam(n)
463 real(kind=dp) :: a2(n,n), b2(n,n), lam2(n)
464 real(kind=qp) :: bw(4*(lx+2)**3)
465 real(kind=dp) :: bw2(4*(lx+2)**3)
466 integer :: info = 0
467
468 a2 = real(a, dp)
469 b2 = real(b, dp)
470 lam2 = real(lam, dp)
471 call dsygv(1, 'V', 'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
472 a = real(a2, qp)
473 b = real(b2, qp)
474 lam = real(lam2, qp)
475 if (pe_rank .eq. 0) then
476 call neko_warning('Real precision choice not supported for fdm, treating it as double')
477 end if
478
479 end subroutine qp_sygv
480
481 subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
482 integer, intent(in) ::lbc, rbc, n
483 real(kind=rp), intent(inout) :: a(0:n+2,0:n+2), ll, lm, lr
484 real(kind=rp), intent(inout) :: ah(0:n,0:n)
485 real(kind=rp) :: fac
486 integer :: i, j, i0, i1
487
488 i0 = 0
489 if(lbc .eq. 1) i0 = 1
490 i1 = n
491 if(rbc .eq. 1) i1 = n - 1
492
493 call rzero(a, (n+3) * (n+3))
494
495 fac = 2.0_rp / lm
496 a(1,1) = 1.0_rp
497 a(n+1,n+1) = 1.0-rp
498
499 do j = i0, i1
500 do i = i0, i1
501 a(i+1,j+1) = fac * ah(i,j)
502 enddo
503 enddo
504
505 if(lbc .eq. 0) then
506 fac = 2.0_rp / ll
507 a(0,0) = fac * ah(n-1,n-1)
508 a(1,0) = fac * ah(n ,n-1)
509 a(0,1) = fac * ah(n-1,n )
510 a(1,1) = a(1,1) + fac * ah(n,n)
511 else
512 a(0,0) = 1.0_rp
513 endif
514
515 if(rbc .eq. 0) then
516 fac = 2.0_rp / lr
517 a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
518 a(n+2,n+1) = fac * ah(1,0)
519 a(n+1,n+2) = fac * ah(0,1)
520 a(n+2,n+2) = fac * ah(1,1)
521 else
522 a(n+2,n+2) = 1.0_rp
523 endif
524
525 end subroutine fdm_setup_fast1d_a
526
527 subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
528 integer, intent(in) :: lbc, rbc, n
529 real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
530 real(kind=rp), intent(inout) :: bh(0:n)
531 real(kind=rp) :: fac
532 integer :: i, i0, i1
533
534 i0 = 0
535 if(lbc .eq. 1) i0 = 1
536 i1 = n
537 if(rbc .eq. 1) i1 = n - 1
538
539 call rzero(b, (n + 3) * (n + 3))
540
541 fac = 0.5_rp * lm
542 b(1,1) = 1.0_rp
543 b(n+1,n+1) = 1.0_rp
544
545 do i = i0, i1
546 b(i+1,i+1) = fac * bh(i)
547 end do
548
549 if(lbc .eq. 0) then
550 fac = 0.5_rp * ll
551 b(0,0) = fac * bh(n-1)
552 b(1,1) = b(1,1) + fac * bh(n)
553 else
554 b(0,0) = 1.0_rp
555 end if
556
557 if(rbc .eq. 0) then
558 fac = 0.5_rp * lr
559 b(n+1,n+1) = b(n+1,n+1) + fac * bh(0)
560 b(n+2,n+2) = fac * bh(1)
561 else
562 b(n+2,n+2) = 1.0_rp
563 end if
564
565 end subroutine fdm_setup_fast1d_b
566
567 subroutine fdm_free(this)
568 class(fdm_t), intent(inout) :: this
569
570 if(allocated(this%s)) then
571 deallocate(this%s)
572 end if
573
574 if(allocated(this%d)) then
575 deallocate(this%d)
576 end if
577
578 if(allocated(this%len_lr)) then
579 deallocate(this%len_lr)
580 end if
581
582 if(allocated(this%len_ls)) then
583 deallocate(this%len_ls)
584 end if
585
586 if(allocated(this%len_lt)) then
587 deallocate(this%len_lt)
588 end if
589
590 if(allocated(this%len_mr)) then
591 deallocate(this%len_mr)
592 end if
593
594 if(allocated(this%len_ms)) then
595 deallocate(this%len_ms)
596 end if
597
598 if(allocated(this%len_mt)) then
599 deallocate(this%len_mt)
600 end if
601
602 if(allocated(this%len_rr)) then
603 deallocate(this%len_rr)
604 end if
605
606 if(allocated(this%len_rs)) then
607 deallocate(this%len_rs)
608 end if
609
610 if(allocated(this%len_rt)) then
611 deallocate(this%len_rt)
612 end if
613
614 if(allocated(this%swplen)) then
615 deallocate(this%swplen)
616 end if
617
618 nullify(this%Xh)
619 nullify(this%dof)
620 nullify(this%gs_h)
621 nullify(this%msh)
622
623 end subroutine fdm_free
624
625 subroutine fdm_compute(this, e, r, stream)
626 class(fdm_t), intent(inout) :: this
627 real(kind=rp), dimension((this%Xh%lx+2)**3, this%msh%nelv), intent(inout) :: e, r
628 type(c_ptr), optional :: stream
629 type(c_ptr) :: strm
630
631 if (present(stream)) then
632 strm = stream
633 else
634 strm = glb_cmd_queue
635 end if
636
637 if (neko_bcknd_sx .eq. 1) then
638 call fdm_do_fast_sx(e, r, this%s, this%d, &
639 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
640 else if (neko_bcknd_xsmm .eq. 1) then
641 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
642 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
643 else if (neko_bcknd_device .eq. 1) then
644 call fdm_do_fast_device(e, r, this%s, this%d, &
645 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
646 else
647 call fdm_do_fast_cpu(e, r, this%s, this%d, &
648 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
649 end if
650
651 end subroutine fdm_compute
652
653
654end module fdm
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Definition comm.F90:1
integer, public pe_rank
MPI rank.
Definition comm.F90:55
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
integer, parameter, public device_to_host
Definition device.F90:47
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Definition fast3d.f90:168
Fast Diagonalization.
Definition fdm_cpu.f90:2
subroutine, public fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
Definition fdm_cpu.f90:13
subroutine, public fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
Fast Diagonalization SX-Aurora backend.
Definition fdm_sx.f90:34
subroutine, public fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
Definition fdm_sx.f90:45
Fast Diagonalization libxsmm backend.
Definition fdm_xsmm.f90:2
subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
Definition fdm_xsmm.f90:10
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
subroutine swap_lengths(this, x, y, z, nelv, gdim)
Definition fdm.f90:162
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
Definition fdm.f90:338
subroutine fdm_init(this, xh, dof, gs_h)
Definition fdm.f90:111
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:461
subroutine fdm_compute(this, e, r, stream)
Definition fdm.f90:626
subroutine fdm_free(this)
Definition fdm.f90:568
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:453
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
Definition fdm.f90:433
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
Definition fdm.f90:408
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition fdm.f90:528
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
Definition fdm.f90:249
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition fdm.f90:482
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:445
Gather-scatter.
Definition math.f90:60
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
Definition math.f90:233
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
Definition math.f90:597
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:121
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:284
The function space for the SEM solution fields.
Definition space.f90:62