Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
fdm.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module fdm
63 use num_types, only : rp, sp, dp, qp
64 use mesh, only : mesh_t
65 use space, only : space_t
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
68 use fdm_cpu, only : fdm_do_fast_cpu
70 use fdm_sx, only : fdm_do_fast_sx
71 use fdm_xsmm, only : fdm_do_fast_xsmm
72 use utils, only : neko_error, neko_warning
73 use comm, only : pe_rank
74 use math, only : vlmax
77 use fast3d, only : semhat
78 use tensor, only : trsp
79 use math, only : rzero, row_zero
80 use, intrinsic :: iso_c_binding
81 implicit none
82 private
83
84 type, public :: fdm_t
85 real(kind=rp), allocatable :: s(:,:,:,:)
86 real(kind=rp), allocatable :: d(:,:)
87 type(c_ptr) :: s_d = c_null_ptr
88 type(c_ptr) :: d_d = c_null_ptr
89 real(kind=rp), allocatable :: len_lr(:), len_ls(:), len_lt(:)
90 real(kind=rp), allocatable :: len_mr(:), len_ms(:), len_mt(:)
91 real(kind=rp), allocatable :: len_rr(:), len_rs(:), len_rt(:)
92 real(kind=rp), allocatable :: swplen(:,:,:,:)
93 type(c_ptr) :: swplen_d = c_null_ptr
94 type(space_t), pointer :: xh => null()
95 type(dofmap_t), pointer :: dof => null()
96 type(gs_t), pointer :: gs_h => null()
97 type(mesh_t), pointer :: msh => null()
98 contains
99 procedure, pass(this) :: init => fdm_init
100 procedure, pass(this) :: free => fdm_free
101 procedure, pass(this) :: compute => fdm_compute
102 end type fdm_t
103
104 interface sygv
105 module procedure sp_sygv, dp_sygv, qp_sygv
106 end interface sygv
107
108contains
109
110 subroutine fdm_init(this, Xh, dof, gs_h)
111 class(fdm_t), intent(inout) :: this
112 type(space_t), target, intent(inout) :: Xh
113 type(dofmap_t), target, intent(in) :: dof
114 type(gs_t), target, intent(inout) :: gs_h
115 !We only really use ah, bh
116 real(kind=rp), dimension((Xh%lx)**2) :: ah, bh, ch, dh, zh
117 real(kind=rp), dimension((Xh%lx)**2) :: dph, jph, bgl, zglhat, dgl, jgl, wh
118 integer :: nl, n, nelv
119
120 n = xh%lx - 1 !Polynomnial degree
121 nl = xh%lx + 2 !Schwarz!
122 nelv = dof%msh%nelv
123 call fdm_free(this)
124 allocate(this%s(nl*nl, 2, dof%msh%gdim, dof%msh%nelv))
125 allocate(this%d(nl**3, dof%msh%nelv))
126 allocate(this%swplen(xh%lx, xh%lx, xh%lx, dof%msh%nelv))
127 allocate(this%len_lr(nelv), this%len_ls(nelv), this%len_lt(nelv))
128 allocate(this%len_mr(nelv), this%len_ms(nelv), this%len_mt(nelv))
129 allocate(this%len_rr(nelv), this%len_rs(nelv), this%len_rt(nelv))
130
131 ! Zeroing here enables easier debugging since then
132 ! MPI messages in GS are deterministic
133 call rzero(this%swplen, xh%lxyz * dof%msh%nelv)
134
135 if (neko_bcknd_device .eq. 1) then
136 call device_map(this%s, this%s_d, nl*nl*2*dof%msh%gdim*dof%msh%nelv)
137 call device_map(this%d, this%d_d, nl**dof%msh%gdim*dof%msh%nelv)
138 call device_map(this%swplen, this%swplen_d, xh%lxyz*dof%msh%nelv)
139 end if
140
141 call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
142 this%Xh => xh
143 this%dof => dof
144 this%gs_h => gs_h
145 this%msh => dof%msh
146
147 call swap_lengths(this, dof%x, dof%y, dof%z, dof%msh%nelv, dof%msh%gdim)
148
149 call fdm_setup_fast(this, ah, bh, nl, n)
150
151 if (neko_bcknd_device .eq. 1) then
152 call device_memcpy(this%s, this%s_d, &
153 nl*nl*2*dof%msh%gdim*dof%msh%nelv, host_to_device, sync = .false.)
154 call device_memcpy(this%d, this%d_d, &
155 nl**dof%msh%gdim*dof%msh%nelv, host_to_device, sync = .false.)
156 call device_memcpy(this%swplen, this%swplen_d, &
157 xh%lxyz*dof%msh%nelv, host_to_device, sync = .false.)
158 end if
159 end subroutine fdm_init
160
161 subroutine swap_lengths(this, x, y, z, nelv, gdim)
162 type(fdm_t), intent(inout) :: this
163 integer, intent(in) :: gdim, nelv
164 real(kind=rp), dimension(this%Xh%lxyz, nelv), intent(in) :: x, y, z
165 integer :: j, k, e, n2, nz0, nzn, nx, lx1, n
166
167 associate(l => this%swplen, xh => this%Xh, &
168 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
169 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
170 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
171 lx1 = this%Xh%lx
172 n2 = lx1 - 1
173 nz0 = 1
174 nzn = 1
175 nx = lx1 - 2
176 if (gdim .eq. 3) then
177 nz0 = 0
178 nzn = n2
179 end if
180 call plane_space(lmr, lms, lmt, 0, n2, xh%wx, x, y, z, &
181 nx, n2, nz0, nzn, nelv, gdim)
182 n = n2 + 1
183 if (gdim .eq. 3) then
184 do e = 1, nelv
185 do j = 2, n2
186 do k = 2, n2
187 l(1, k, j, e) = lmr(e)
188 l(n, k, j, e) = lmr(e)
189 l(k, 1, j, e) = lms(e)
190 l(k, n, j, e) = lms(e)
191 l(k, j, 1, e) = lmt(e)
192 l(k, j, n, e) = lmt(e)
193 end do
194 end do
195 end do
196 if (neko_bcknd_device .eq. 1) then
197 call device_memcpy(l, this%swplen_d, this%dof%size(), &
198 host_to_device, sync = .false.)
199 call this%gs_h%op(l, this%dof%size(), gs_op_add)
200 call device_memcpy(l, this%swplen_d, this%dof%size(), &
201 device_to_host, sync = .true.)
202 else
203 call this%gs_h%op(l, this%dof%size(), gs_op_add)
204 end if
205
206 do e = 1, nelv
207 llr(e) = l(1, 2, 2, e) - lmr(e)
208 lrr(e) = l(n, 2, 2, e) - lmr(e)
209 lls(e) = l(2, 1, 2, e) - lms(e)
210 lrs(e) = l(2, n, 2, e) - lms(e)
211 llt(e) = l(2, 2, 1, e) - lmt(e)
212 lrt(e) = l(2, 2, n, e) - lmt(e)
213 end do
214 else
215 do e = 1, nelv
216 do j = 2, n2
217 l(1, j, 1, e) = lmr(e)
218 l(n, j, 1, e) = lmr(e)
219 l(j, 1, 1, e) = lms(e)
220 l(j, n, 1, e) = lms(e)
221 end do
222 end do
223
224 if (neko_bcknd_device .eq. 1) then
225 call device_memcpy(l, this%swplen_d, this%dof%size(), &
226 host_to_device, sync = .false.)
227 call this%gs_h%op(l, this%dof%size(), gs_op_add)
228 call device_memcpy(l, this%swplen_d, this%dof%size(), &
229 device_to_host, sync = .true.)
230 else
231 call this%gs_h%op(l, this%dof%size(), gs_op_add)
232 end if
233
234 do e = 1, nelv
235 llr(e) = l(1, 2, 1, e) - lmr(e)
236 lrr(e) = l(n, 2, 1, e) - lmr(e)
237 lls(e) = l(2, 1, 1, e) - lms(e)
238 lrs(e) = l(2, n, 1, e) - lms(e)
239 end do
240 end if
241 end associate
242 end subroutine swap_lengths
243
247 subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, &
248 nx, nxn, nz0, nzn, nelv, gdim)
249 integer, intent(in) :: nxn, nzn, i1, i2, nelv, gdim, nx, nz0
250 real(kind=rp), intent(inout) :: lr(nelv), ls(nelv), lt(nelv)
251 real(kind=rp), intent(inout) :: w(nx)
252 real(kind=rp), intent(in) :: x(0:nxn, 0:nxn, nz0:nzn, nelv)
253 real(kind=rp), intent(in) :: y(0:nxn, 0:nxn, nz0:nzn, nelv)
254 real(kind=rp), intent(in) :: z(0:nxn, 0:nxn, nz0:nzn, nelv)
255 real(kind=rp) :: lr2, ls2, lt2, weight, wsum
256 integer :: ny, nz, j1, k1, j2, k2, i, j, k, ie
257 ny = nx
258 nz = nx
259 j1 = i1
260 k1 = i1
261 j2 = i2
262 k2 = i2
263 ! Now, for each element, compute lr,ls,lt between specified planes
264 do ie = 1, nelv
265 if (gdim .eq. 3) then
266 lr2 = 0d0
267 wsum = 0d0
268 do k = 1, nz
269 do j = 1, ny
270 weight = w(j)*w(k)
271 lr2 = lr2 + weight /&
272 ( (x(i2, j, k, ie) - x(i1, j, k, ie))**2&
273 + (y(i2, j, k, ie) - y(i1, j, k, ie))**2&
274 + (z(i2, j, k, ie) - z(i1, j, k, ie))**2 )
275 wsum = wsum + weight
276 end do
277 end do
278 lr2 = lr2/wsum
279 lr(ie) = 1d0/sqrt(lr2)
280 ls2 = 0d0
281 wsum = 0d0
282 do k = 1, nz
283 do i = 1, nx
284 weight = w(i)*w(k)
285 ls2 = ls2 + weight / &
286 ( (x(i, j2, k, ie) - x(i, j1, k, ie))**2 &
287 + (y(i, j2, k, ie) - y(i, j1, k, ie))**2 &
288 + (z(i, j2, k, ie) - z(i, j1, k, ie))**2 )
289 wsum = wsum + weight
290 end do
291 end do
292 ls2 = ls2/wsum
293 ls(ie) = 1d0/sqrt(ls2)
294 lt2 = 0d0
295 wsum = 0d0
296 do j = 1, ny
297 do i = 1, nx
298 weight = w(i)*w(j)
299 lt2 = lt2 + weight / &
300 ( (x(i, j, k2, ie) - x(i, j, k1, ie))**2 &
301 + (y(i, j, k2, ie) - y(i, j, k1, ie))**2 &
302 + (z(i, j, k2, ie) - z(i, j, k1, ie))**2 )
303 wsum = wsum + weight
304 end do
305 end do
306 lt2 = lt2/wsum
307 lt(ie) = 1d0/sqrt(lt2)
308 else ! 2D
309 lr2 = 0d0
310 wsum = 0d0
311 do j = 1, ny
312 weight = w(j)
313 lr2 = lr2 + weight / &
314 ( (x(i2, j, 1, ie) - x(i1, j, 1, ie))**2 &
315 + (y(i2, j, 1, ie) - y(i1, j, 1, ie))**2 )
316 wsum = wsum + weight
317 end do
318 lr2 = lr2/wsum
319 lr(ie) = 1d0/sqrt(lr2)
320 ls2 = 0d0
321 wsum = 0d0
322 do i = 1, nx
323 weight = w(i)
324 ls2 = ls2 + weight / &
325 ( (x(i, j2, 1, ie) - x(i, j1, 1, ie))**2 &
326 + (y(i, j2, 1, ie) - y(i, j1, 1, ie))**2 )
327 wsum = wsum + weight
328 end do
329 ls2 = ls2/wsum
330 ls(ie) = 1d0/sqrt(ls2)
331 end if
332 end do
333 ie = 1014
334 end subroutine plane_space
335
337 subroutine fdm_setup_fast(this, ah, bh, nl, n)
338 integer, intent(in) :: nl, n
339 type(fdm_t), intent(inout) :: this
340 real(kind=rp), intent(inout) :: ah(n+1, n+1), bh(n+1)
341 real(kind=rp), dimension(2*this%Xh%lx + 4) :: lr, ls, lt
342 integer :: i, j, k
343 integer :: ie, il, nr, ns, nt
344 integer :: lbr, rbr, lbs, rbs, lbt, rbt
345 real(kind=rp) :: eps, diag
346
347 associate(s => this%s, d => this%d, &
348 llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
349 lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
350 lrr => this%len_rr, lrs => this%len_rs, lrt => this%len_rt)
351 do ie = 1, this%dof%msh%nelv
352 lbr = this%dof%msh%facet_type(1, ie)
353 rbr = this%dof%msh%facet_type(2, ie)
354 lbs = this%dof%msh%facet_type(3, ie)
355 rbs = this%dof%msh%facet_type(4, ie)
356 lbt = this%dof%msh%facet_type(5, ie)
357 rbt = this%dof%msh%facet_type(6, ie)
358
359 nr = nl
360 ns = nl
361 nt = nl
362 call fdm_setup_fast1d(s(1, 1, 1, ie), lr, nr, lbr, rbr, &
363 llr(ie), lmr(ie), lrr(ie), ah, bh, n)
364 call fdm_setup_fast1d(s(1, 1, 2, ie), ls, ns, lbs, rbs, &
365 lls(ie), lms(ie), lrs(ie), ah, bh, n)
366 if (this%dof%msh%gdim .eq. 3) then
367 call fdm_setup_fast1d(s(1, 1, 3, ie), lt, nt, lbt, rbt, &
368 llt(ie), lmt(ie), lrt(ie), ah, bh, n)
369 end if
370
371 il = 1
372 if (.not. this%dof%msh%gdim .eq. 3) then
373 eps = 1d-5 * (vlmax(lr(2), nr - 2) + vlmax(ls(2), ns - 2))
374 do j = 1, ns
375 do i = 1, nr
376 diag = lr(i) + ls(j)
377 if (diag .gt. eps) then
378 d(il, ie) = 1.0_rp / diag
379 else
380 d(il, ie) = 0.0_rp
381 end if
382 il = il + 1
383 end do
384 end do
385 else
386 eps = 1d-5 * (vlmax(lr(2), nr - 2) + &
387 vlmax(ls(2), ns - 2) + vlmax(lt(2), nt - 2))
388 do k = 1, nt
389 do j = 1, ns
390 do i = 1, nr
391 diag = lr(i) + ls(j) + lt(k)
392 if (diag .gt. eps) then
393 d(il, ie) = 1.0_rp / diag
394 else
395 d(il, ie) = 0.0_rp
396 end if
397 il = il + 1
398 end do
399 end do
400 end do
401 end if
402 end do
403 end associate
404
405 end subroutine fdm_setup_fast
406
407 subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
408 integer, intent(in) :: nl, lbc, rbc, n
409 real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
410 real(kind=rp), intent(inout) :: ah(0:n, 0:n), bh(0:n)
411 integer :: lx1, lxm
412 real(kind=rp) :: b(2*(n+3)**2)
413
414 lx1 = n + 1
415 lxm = lx1 + 2
416
417 call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)
418 call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
419 call generalev(s, b, lam, nl, lx1)
420 if (lbc .gt. 0) call row_zero(s, nl, nl, 1)
421 if (lbc .eq. 1) call row_zero(s, nl, nl, 2)
422 if (rbc .gt. 0) call row_zero(s, nl, nl, nl)
423 if (rbc .eq. 1) call row_zero(s, nl, nl, nl-1)
424
425 call trsp(s(1, 1, 2), nl, s, nl)
426
427 end subroutine fdm_setup_fast1d
428
432 subroutine generalev(a, b, lam, n, lx)
433 integer, intent(in) :: n, lx
434 real(kind=rp), intent(inout) :: a(n, n), b(n, n), lam(n)
435 integer :: lbw, lw
436 real(kind=rp) :: bw(4*(lx+2)**3)
437
438 lbw = 4*(lx+2)**3
439 lw = n*n
440 call sygv(a, b, lam, n, lx, bw, lbw)
441
442 end subroutine generalev
443
444 subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
445 integer, intent(in) :: n, lx, lbw
446 real(kind=sp), intent(inout) :: a(n, n), b(n, n), lam(n)
447 real(kind=sp) :: bw(4*(lx+2)**3)
448 integer :: info = 0
449 call ssygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
450 end subroutine sp_sygv
451
452 subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
453 integer, intent(in) :: n, lx, lbw
454 real(kind=dp), intent(inout) :: a(n, n), b(n, n), lam(n)
455 real(kind=dp) :: bw(4*(lx+2)**3)
456 integer :: info = 0
457 call dsygv(1, 'V', 'U', n, a, n, b, n, lam, bw, lbw, info)
458 end subroutine dp_sygv
459
460 subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
461 integer, intent(in) :: n, lx, lbw
462 real(kind=qp), intent(inout) :: a(n, n), b(n, n), lam(n)
463 real(kind=dp) :: a2(n, n), b2(n, n), lam2(n)
464 real(kind=qp) :: bw(4*(lx+2)**3)
465 real(kind=dp) :: bw2(4*(lx+2)**3)
466 integer :: info = 0
467
468 a2 = real(a, dp)
469 b2 = real(b, dp)
470 lam2 = real(lam, dp)
471 call dsygv(1, 'V', 'U', n, a2, n, b2, n, lam2, bw2, lbw, info)
472 a = real(a2, qp)
473 b = real(b2, qp)
474 lam = real(lam2, qp)
475 if (pe_rank .eq. 0) then
476 call neko_warning('Real precision choice not supported for fdm, ' // &
477 'treating it as double')
478 end if
479
480 end subroutine qp_sygv
481
482 subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
483 integer, intent(in) ::lbc, rbc, n
484 real(kind=rp), intent(inout) :: a(0:n+2, 0:n+2), ll, lm, lr
485 real(kind=rp), intent(inout) :: ah(0:n, 0:n)
486 real(kind=rp) :: fac
487 integer :: i, j, i0, i1
488
489 i0 = 0
490 if (lbc .eq. 1) i0 = 1
491 i1 = n
492 if (rbc .eq. 1) i1 = n - 1
493
494 call rzero(a, (n+3) * (n+3))
495
496 fac = 2.0_rp / lm
497 a(1, 1) = 1.0_rp
498 a(n+1, n+1) = 1.0-rp
499
500 do j = i0, i1
501 do i = i0, i1
502 a(i+1, j+1) = fac * ah(i, j)
503 end do
504 end do
505
506 if (lbc .eq. 0) then
507 fac = 2.0_rp / ll
508 a(0, 0) = fac * ah(n-1, n-1)
509 a(1, 0) = fac * ah(n, n-1)
510 a(0, 1) = fac * ah(n-1, n)
511 a(1, 1) = a(1, 1) + fac * ah(n, n)
512 else
513 a(0, 0) = 1.0_rp
514 end if
515
516 if (rbc .eq. 0) then
517 fac = 2.0_rp / lr
518 a(n+1, n+1) = a(n+1, n+1) + fac * ah(0, 0)
519 a(n+2, n+1) = fac * ah(1, 0)
520 a(n+1, n+2) = fac * ah(0, 1)
521 a(n+2, n+2) = fac * ah(1, 1)
522 else
523 a(n+2, n+2) = 1.0_rp
524 end if
525
526 end subroutine fdm_setup_fast1d_a
527
528 subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
529 integer, intent(in) :: lbc, rbc, n
530 real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
531 real(kind=rp), intent(inout) :: bh(0:n)
532 real(kind=rp) :: fac
533 integer :: i, i0, i1
534
535 i0 = 0
536 if (lbc .eq. 1) i0 = 1
537 i1 = n
538 if (rbc .eq. 1) i1 = n - 1
539
540 call rzero(b, (n + 3) * (n + 3))
541
542 fac = 0.5_rp * lm
543 b(1, 1) = 1.0_rp
544 b(n+1, n+1) = 1.0_rp
545
546 do i = i0, i1
547 b(i+1, i+1) = fac * bh(i)
548 end do
549
550 if (lbc .eq. 0) then
551 fac = 0.5_rp * ll
552 b(0, 0) = fac * bh(n-1)
553 b(1, 1) = b(1, 1) + fac * bh(n)
554 else
555 b(0, 0) = 1.0_rp
556 end if
557
558 if (rbc .eq. 0) then
559 fac = 0.5_rp * lr
560 b(n+1, n+1) = b(n+1, n+1) + fac * bh(0)
561 b(n+2, n+2) = fac * bh(1)
562 else
563 b(n+2, n+2) = 1.0_rp
564 end if
565
566 end subroutine fdm_setup_fast1d_b
567
568 subroutine fdm_free(this)
569 class(fdm_t), intent(inout) :: this
570
571 if (allocated(this%s)) then
572 deallocate(this%s)
573 end if
574
575 if (allocated(this%d)) then
576 deallocate(this%d)
577 end if
578
579 if (allocated(this%len_lr)) then
580 deallocate(this%len_lr)
581 end if
582
583 if (allocated(this%len_ls)) then
584 deallocate(this%len_ls)
585 end if
586
587 if (allocated(this%len_lt)) then
588 deallocate(this%len_lt)
589 end if
590
591 if (allocated(this%len_mr)) then
592 deallocate(this%len_mr)
593 end if
594
595 if (allocated(this%len_ms)) then
596 deallocate(this%len_ms)
597 end if
598
599 if (allocated(this%len_mt)) then
600 deallocate(this%len_mt)
601 end if
602
603 if (allocated(this%len_rr)) then
604 deallocate(this%len_rr)
605 end if
606
607 if (allocated(this%len_rs)) then
608 deallocate(this%len_rs)
609 end if
610
611 if (allocated(this%len_rt)) then
612 deallocate(this%len_rt)
613 end if
614
615 if (allocated(this%swplen)) then
616 deallocate(this%swplen)
617 end if
618
619 nullify(this%Xh)
620 nullify(this%dof)
621 nullify(this%gs_h)
622 nullify(this%msh)
623
624 if (c_associated(this%s_d)) then
625 call device_free(this%s_d)
626 end if
627 if (c_associated(this%d_d)) then
628 call device_free(this%d_d)
629 end if
630 if (c_associated(this%swplen_d)) then
631 call device_free(this%swplen_d)
632 end if
633
634 end subroutine fdm_free
635
636 subroutine fdm_compute(this, e, r, stream)
637 class(fdm_t), intent(inout) :: this
638 real(kind=rp), dimension((this%Xh%lx + 2)**3, this%msh%nelv), &
639 intent(inout) :: e, r
640 type(c_ptr), optional :: stream
641 type(c_ptr) :: strm
642
643 if (present(stream)) then
644 strm = stream
645 else
646 strm = glb_cmd_queue
647 end if
648
649 if (neko_bcknd_sx .eq. 1) then
650 call fdm_do_fast_sx(e, r, this%s, this%d, &
651 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
652 else if (neko_bcknd_xsmm .eq. 1) then
653 call fdm_do_fast_xsmm(e, r, this%s, this%d, &
654 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
655 else if (neko_bcknd_device .eq. 1) then
656 call fdm_do_fast_device(e, r, this%s, this%d, &
657 this%Xh%lx+2, this%msh%gdim, this%msh%nelv, strm)
658 else
659 call fdm_do_fast_cpu(e, r, this%s, this%d, &
660 this%Xh%lx+2, this%msh%gdim, this%msh%nelv)
661 end if
662
663 end subroutine fdm_compute
664
665
666end module fdm
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Definition comm.F90:1
integer, public pe_rank
MPI rank.
Definition comm.F90:56
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators: a = Laplacian b = diagonal mass matrix c = convec...
Definition fast3d.f90:169
Fast Diagonalization.
Definition fdm_cpu.f90:2
subroutine, public fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
Definition fdm_cpu.f90:13
subroutine, public fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
Fast Diagonalization SX-Aurora backend.
Definition fdm_sx.f90:34
subroutine, public fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
Definition fdm_sx.f90:45
Fast Diagonalization libxsmm backend.
Definition fdm_xsmm.f90:2
subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
Definition fdm_xsmm.f90:10
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
subroutine swap_lengths(this, x, y, z, nelv, gdim)
Definition fdm.f90:162
subroutine fdm_setup_fast(this, ah, bh, nl, n)
Setup the arrays s, d needed for the fast evaluation of the system.
Definition fdm.f90:338
subroutine fdm_init(this, xh, dof, gs_h)
Definition fdm.f90:111
subroutine qp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:461
subroutine fdm_compute(this, e, r, stream)
Definition fdm.f90:637
subroutine fdm_free(this)
Definition fdm.f90:569
subroutine dp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:453
subroutine generalev(a, b, lam, n, lx)
Solve the generalized eigenvalue problem /$ A x = lam B x/$ A – symm. B – symm., pos....
Definition fdm.f90:433
subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
Definition fdm.f90:408
subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition fdm.f90:529
subroutine plane_space(lr, ls, lt, i1, i2, w, x, y, z, nx, nxn, nz0, nzn, nelv, gdim)
Here, spacing is based on harmonic mean. pff 2/10/07 We no longer base this on the finest grid,...
Definition fdm.f90:249
subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition fdm.f90:483
subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
Definition fdm.f90:445
Gather-scatter.
Definition math.f90:60
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
Definition math.f90:227
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
Definition math.f90:591
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_xsmm
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:124
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
The function space for the SEM solution fields.
Definition space.f90:63