Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
space.f90
Go to the documentation of this file.
1! Copyright (c) 2019-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module space
36 use num_types, only : rp
37 use speclib, only : zwgll, zwgl, dgll, legendre_poly
38 use device
39 use matrix, only : matrix_t
40 use utils, only : neko_error
41 use fast3d, only : setup_intp
42 use tensor, only : trsp1
43 use mxm_wrapper, only : mxm
44 use math, only : copy
45 use, intrinsic :: iso_c_binding
46 implicit none
47 private
48
49 integer, public, parameter :: gl = 0, gll = 1, gj = 2
50
63 type, public :: space_t
64 integer :: t
65 integer :: lx
66 integer :: ly
67 integer :: lz
68 integer :: lxy
69 integer :: lyz
70 integer :: lxz
71 integer :: lxyz
72
73 real(kind=rp), allocatable :: zg(:,:)
74
75 real(kind=rp), allocatable :: dr_inv(:)
76 real(kind=rp), allocatable :: ds_inv(:)
77 real(kind=rp), allocatable :: dt_inv(:)
78
79 real(kind=rp), allocatable :: wx(:)
80 real(kind=rp), allocatable :: wy(:)
81 real(kind=rp), allocatable :: wz(:)
82
83 real(kind=rp), allocatable :: w3(:,:,:)
84
86 real(kind=rp), allocatable :: dx(:,:)
88 real(kind=rp), allocatable :: dy(:,:)
90 real(kind=rp), allocatable :: dz(:,:)
91
93 real(kind=rp), allocatable :: dxt(:,:)
95 real(kind=rp), allocatable :: dyt(:,:)
97 real(kind=rp), allocatable :: dzt(:,:)
98
100 real(kind=rp), allocatable :: v(:,:)
101 real(kind=rp), allocatable :: vt(:,:)
102 real(kind=rp), allocatable :: vinv(:,:)
103 real(kind=rp), allocatable :: vinvt(:,:)
105 real(kind=rp), allocatable :: w(:,:)
106
107 !
108 ! Device pointers (if present)
109 !
110 type(c_ptr) :: dr_inv_d = c_null_ptr
111 type(c_ptr) :: ds_inv_d = c_null_ptr
112 type(c_ptr) :: dt_inv_d = c_null_ptr
113 type(c_ptr) :: dxt_d = c_null_ptr
114 type(c_ptr) :: dyt_d = c_null_ptr
115 type(c_ptr) :: dzt_d = c_null_ptr
116 type(c_ptr) :: dx_d = c_null_ptr
117 type(c_ptr) :: dy_d = c_null_ptr
118 type(c_ptr) :: dz_d = c_null_ptr
119 type(c_ptr) :: wx_d = c_null_ptr
120 type(c_ptr) :: wy_d = c_null_ptr
121 type(c_ptr) :: wz_d = c_null_ptr
122 type(c_ptr) :: zg_d = c_null_ptr
123 type(c_ptr) :: w3_d = c_null_ptr
124 type(c_ptr) :: v_d = c_null_ptr
125 type(c_ptr) :: vt_d = c_null_ptr
126 type(c_ptr) :: vinv_d = c_null_ptr
127 type(c_ptr) :: vinvt_d = c_null_ptr
128 type(c_ptr) :: w_d = c_null_ptr
129 contains
130 procedure, pass(s) :: init => space_init
131 procedure, pass(s) :: free => space_free
132
133 end type space_t
134
135 interface operator(.eq.)
136 module procedure space_eq
137 end interface operator(.eq.)
138
139 interface operator(.ne.)
140 module procedure space_ne
141 end interface operator(.ne.)
142
143 public :: operator(.eq.), operator(.ne.)
144
145contains
146
148 subroutine space_init(s, t, lx, ly, lz)
149 class(space_t), intent(inout) :: s
150 integer, intent(in) :: t
151 integer, intent(in) :: lx
152 integer, intent(in) :: ly
153 integer, optional, intent(in) :: lz
154 integer :: ix, iy, iz
155
156 call space_free(s)
157
158 s%lx = lx
159 s%ly = ly
160 s%t = t
161 if (present(lz)) then
162 if (lz .ne. 1) then
163 s%lz = lz
164 if (lx .ne. ly .or. lx .ne. lz) then
165 call neko_error("Unsupported polynomial dimension")
166 end if
167 end if
168 else
169 if (lx .ne. ly) then
170 call neko_error("Unsupported polynomial dimension")
171 end if
172 s%lz = 1
173 end if
174 s%lxy = s%ly*s%lx
175 s%lyz = s%ly*s%lz
176 s%lxz = s%lx*s%lz
177 s%lxyz = s%lx*s%ly*s%lz
178
179 allocate(s%zg(lx, 3))
180
181 allocate(s%wx(s%lx))
182 allocate(s%wy(s%ly))
183 allocate(s%wz(s%lz))
184
185 allocate(s%dr_inv(s%lx))
186 allocate(s%ds_inv(s%ly))
187 allocate(s%dt_inv(s%lz))
188
189 allocate(s%w3(s%lx, s%ly, s%lz))
190
191 allocate(s%dx(s%lx, s%lx))
192 allocate(s%dy(s%ly, s%ly))
193 allocate(s%dz(s%lz, s%lz))
194
195 allocate(s%dxt(s%lx, s%lx))
196 allocate(s%dyt(s%ly, s%ly))
197 allocate(s%dzt(s%lz, s%lz))
198
199 allocate(s%v(s%lx, s%lx))
200 allocate(s%vt(s%lx, s%lx))
201 allocate(s%vinv(s%lx, s%lx))
202 allocate(s%vinvt(s%lx, s%lx))
203 allocate(s%w(s%lx, s%lx))
204
205 ! Call low-level routines to compute nodes and quadrature weights
206 if (t .eq. gll) then
207 call zwgll(s%zg(1,1), s%wx, s%lx)
208 call zwgll(s%zg(1,2), s%wy, s%ly)
209 if (s%lz .gt. 1) then
210 call zwgll(s%zg(1,3), s%wz, s%lz)
211 else
212 s%zg(:,3) = 0d0
213 s%wz = 1d0
214 end if
215 else if (t .eq. gl) then
216 call zwgl(s%zg(1,1), s%wx, s%lx)
217 call zwgl(s%zg(1,2), s%wy, s%ly)
218 if (s%lz .gt. 1) then
219 call zwgl(s%zg(1,3), s%wz, s%lz)
220 else
221 s%zg(:,3) = 0d0
222 s%wz = 1d0
223 end if
224 else
225 call neko_error("Invalid quadrature rule")
226 end if
227
228 do iz = 1, s%lz
229 do iy = 1, s%ly
230 do ix = 1, s%lx
231 s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)
232 end do
233 end do
234 end do
236 if (t .eq. gll) then
237 call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
238 call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
239 if (s%lz .gt. 1) then
240 call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
241 else
242 s%dz = 0d0
243 s%dzt = 0d0
244 end if
245 else if (t .eq. gl) then
246 call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
247 call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
248 if (s%lz .gt. 1) then
249 call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
250 else
251 s%dz = 0d0
252 s%dzt = 0d0
253 end if
254 else
255 call neko_error("Invalid quadrature rule")
256 end if
257
258 call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
259 call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
260 if (s%lz .gt. 1) then
261 call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)
262 else
263 s%dt_inv = 0d0
264 end if
266
267 if (neko_bcknd_device .eq. 1) then
268 call device_map(s%dr_inv, s%dr_inv_d, s%lx)
269 call device_map(s%ds_inv, s%ds_inv_d, s%lx)
270 call device_map(s%dt_inv, s%dt_inv_d, s%lx)
271 call device_map(s%wx, s%wx_d, s%lx)
272 call device_map(s%wy, s%wy_d, s%lx)
273 call device_map(s%wz, s%wz_d, s%lx)
274 call device_map(s%dx, s%dx_d, s%lxy)
275 call device_map(s%dy, s%dy_d, s%lxy)
276 call device_map(s%dz, s%dz_d, s%lxy)
277 call device_map(s%dxt, s%dxt_d, s%lxy)
278 call device_map(s%dyt, s%dyt_d, s%lxy)
279 call device_map(s%dzt, s%dzt_d, s%lxy)
280 call device_map(s%w3, s%w3_d, s%lxyz)
281 call device_map(s%v, s%v_d, s%lxy)
282 call device_map(s%vt, s%vt_d, s%lxy)
283 call device_map(s%vinv, s%vinv_d, s%lxy)
284 call device_map(s%vinvt, s%vinvt_d, s%lxy)
285 call device_map(s%w, s%w_d, s%lxy)
286
287 call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, &
288 sync = .false.)
289 call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, &
290 sync = .false.)
291 call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, &
292 sync = .false.)
293 call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync = .false.)
294 call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync = .false.)
295 call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync = .false.)
296 call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync = .false.)
297 call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync = .false.)
298 call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync = .false.)
299 call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync = .false.)
300 call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync = .false.)
301 call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync = .false.)
302 call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync = .false.)
303 call device_memcpy(s%v, s%v_d, s%lxy, host_to_device, sync = .false.)
304 call device_memcpy(s%vt, s%vt_d, s%lxy, host_to_device, sync = .false.)
305 call device_memcpy(s%vinv, s%vinv_d, s%lxy, host_to_device, &
306 sync = .false.)
307 call device_memcpy(s%vinvt, s%vinvt_d, s%lxy, host_to_device, &
308 sync = .false.)
309 call device_memcpy(s%w, s%w_d, s%lxy, host_to_device, sync = .false.)
310
311 ix = s%lx * 3
312 call device_map(s%zg, s%zg_d, ix)
313 call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync = .true.)
314 end if
315
316
317 call device_sync()
318
319 end subroutine space_init
320
322 subroutine space_free(s)
323 class(space_t), intent(inout) :: s
324
325 if (allocated(s%zg)) then
326 if (neko_bcknd_device .eq. 1) call device_unmap(s%zg, s%zg_d)
327 deallocate(s%zg)
328 end if
329
330 if (allocated(s%wx)) then
331 if (neko_bcknd_device .eq. 1) call device_unmap(s%wx, s%wx_d)
332 deallocate(s%wx)
333 end if
334
335 if (allocated(s%wy)) then
336 if (neko_bcknd_device .eq. 1) call device_unmap(s%wy, s%wy_d)
337 deallocate(s%wy)
338 end if
339
340 if (allocated(s%wz)) then
341 if (neko_bcknd_device .eq. 1) call device_unmap(s%wz, s%wz_d)
342 deallocate(s%wz)
343 end if
344
345 if (allocated(s%w3)) then
346 if (neko_bcknd_device .eq. 1) call device_unmap(s%w3, s%w3_d)
347 deallocate(s%w3)
348 end if
349
350 if (allocated(s%dx)) then
351 if (neko_bcknd_device .eq. 1) call device_unmap(s%dx, s%dx_d)
352 deallocate(s%dx)
353 end if
354
355 if (allocated(s%dy)) then
356 if (neko_bcknd_device .eq. 1) call device_unmap(s%dy, s%dy_d)
357 deallocate(s%dy)
358 end if
359
360 if (allocated(s%dz)) then
361 if (neko_bcknd_device .eq. 1) call device_unmap(s%dz, s%dz_d)
362 deallocate(s%dz)
363 end if
364
365 if (allocated(s%dxt)) then
366 if (neko_bcknd_device .eq. 1) call device_unmap(s%dxt, s%dxt_d)
367 deallocate(s%dxt)
368 end if
369
370 if (allocated(s%dyt)) then
371 if (neko_bcknd_device .eq. 1) call device_unmap(s%dyt, s%dyt_d)
372 deallocate(s%dyt)
373 end if
374
375 if (allocated(s%dzt)) then
376 if (neko_bcknd_device .eq. 1) call device_unmap(s%dzt, s%dzt_d)
377 deallocate(s%dzt)
378 end if
379
380 if (allocated(s%dr_inv)) then
381 if (neko_bcknd_device .eq. 1) call device_unmap(s%dr_inv, s%dr_inv_d)
382 deallocate(s%dr_inv)
383 end if
384
385 if (allocated(s%ds_inv)) then
386 if (neko_bcknd_device .eq. 1) call device_unmap(s%ds_inv, s%ds_inv_d)
387 deallocate(s%ds_inv)
388 end if
389
390 if (allocated(s%dt_inv)) then
391 if (neko_bcknd_device .eq. 1) call device_unmap(s%dt_inv, s%dt_inv_d)
392 deallocate(s%dt_inv)
393 end if
394
395 if (allocated(s%v)) then
396 if (neko_bcknd_device .eq. 1) call device_unmap(s%v, s%v_d)
397 deallocate(s%v)
398 end if
399
400 if (allocated(s%vt)) then
401 if (neko_bcknd_device .eq. 1) call device_unmap(s%vt, s%vt_d)
402 deallocate(s%vt)
403 end if
404
405 if (allocated(s%vinv)) then
406 if (neko_bcknd_device .eq. 1) call device_unmap(s%vinv, s%vinv_d)
407 deallocate(s%vinv)
408 end if
409
410 if (allocated(s%vinvt)) then
411 if (neko_bcknd_device .eq. 1) call device_unmap(s%vinvt, s%vinvt_d)
412 deallocate(s%vinvt)
413 end if
414
415 if (allocated(s%w)) then
416 if (neko_bcknd_device .eq. 1) call device_unmap(s%w, s%w_d)
417 deallocate(s%w)
418 end if
419
420 end subroutine space_free
421
424 pure function space_eq(Xh, Yh) result(res)
425 type(space_t), intent(in) :: xh
426 type(space_t), intent(in) :: yh
427 logical :: res
428
429 if ( (xh%lx .eq. yh%lx) .and. &
430 (xh%ly .eq. yh%ly) .and. &
431 (xh%lz .eq. yh%lz) ) then
432 res = .true.
433 else
434 res = .false.
435 end if
436
437 end function space_eq
438
441 pure function space_ne(Xh, Yh) result(res)
442 type(space_t), intent(in) :: xh
443 type(space_t), intent(in) :: yh
444 logical :: res
445
446 if ( (xh%lx .eq. yh%lx) .and. &
447 (xh%ly .eq. yh%ly) .and. &
448 (xh%lz .eq. yh%lz) ) then
449 res = .false.
450 else
451 res = .true.
452 end if
453
454 end function space_ne
455
456 subroutine space_compute_dist(dx, x, lx)
457 integer, intent(in) :: lx
458 real(kind=rp), intent(inout) :: dx(lx), x(lx)
459 integer :: i
460 dx(1) = x(2) - x(1)
461 do i = 2, lx - 1
462 dx(i) = 0.5*(x(i+1) - x(i-1))
463 end do
464 dx(lx) = x(lx) - x(lx-1)
465 do i = 1, lx
466 dx(i) = 1.0_rp / dx(i)
467 end do
468 end subroutine space_compute_dist
469
470
474 type(space_t), intent(inout) :: Xh
475
476 real(kind=rp) :: l(0:xh%lx-1)
477 real(kind=rp) :: delta(xh%lx)
478 integer :: i, kj, j, kk
479 type(matrix_t) :: m
480 logical :: scaled = .false.
481
482 associate(v=> xh%v, vt => xh%vt, &
483 vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)
484 ! Get the Legendre polynomials for each point
485 ! Then proceed to compose the transform matrix
486 kj = 0
487 do j = 1, xh%lx
488 call legendre_poly(l, xh%zg(j, 1), xh%lx - 1)
489 do kk = 1, xh%lx
490 kj = kj+1
491 v(kj,1) = l(kk-1)
492 end do
493 end do
494
495 ! transpose the matrix
496 call trsp1(v, xh%lx)
497
498 if (scaled) then
499
500 ! Calculate the nominal scaling factors
501 do i = 1, xh%lx
502 delta(i) = 2.0_rp / (2*(i-1)+1)
503 end do
504 ! modify last entry
505 delta(xh%lx) = 2.0_rp / (xh%lx-1)
506
507 ! calculate the inverse to multiply the matrix
508 do i = 1, xh%lx
509 delta(i) = sqrt(1.0_rp / delta(i))
510 end do
511 ! scale the matrix
512 do i = 1, xh%lx
513 do j = 1, xh%lx
514 v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
515 end do
516 end do
517
518 ! get the trasposed
519 call copy(vt, v, xh%lx * xh%lx)
520 call trsp1(vt, xh%lx)
521
522 !populate the mass matrix
523 kk = 1
524 do i = 1, xh%lx
525 do j = 1, xh%lx
526 if (i .eq. j) then
527 w(i,j) = xh%wx(kk)
528 kk = kk+1
529 else
530 w(i,j) = 0
531 end if
532 end do
533 end do
534
535 !Get the inverse of the transform matrix
536 call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)
537
538 !get the transposed of the inverse
539 call copy(vinvt, vinv, xh%lx * xh%lx)
540 call trsp1(vinvt, xh%lx)
541 else
542 call copy(vt, v, xh%lxy)
543 call trsp1(vt, xh%lx)
544 call m%init(xh%lx, xh%lx)
545 call copy(m%x, v, xh%lxy)
546 call m%inverse_on_host()
547 call copy(vinv, m%x, xh%lxy)
548 call copy(vinvt, vinv, xh%lx * xh%lx)
549 call trsp1(vinvt, xh%lx)
550 call m%free()
551 end if
552 end associate
553
555
556end module space
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Synchronize a device or stream.
Definition device.F90:114
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:48
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition fast3d.f90:244
Definition math.f90:60
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
Defines a matrix.
Definition matrix.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
pure logical function space_ne(xh, yh)
Check if .
Definition space.f90:442
pure logical function space_eq(xh, yh)
Check if .
Definition space.f90:425
integer, parameter, public gll
Definition space.f90:49
subroutine space_compute_dist(dx, x, lx)
Definition space.f90:457
integer, parameter, public gj
Definition space.f90:49
subroutine space_free(s)
Deallocate a space s.
Definition space.f90:323
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition space.f90:149
integer, parameter, public gl
Definition space.f90:49
subroutine space_generate_transformation_matrices(xh)
Generate spectral tranform matrices.
Definition space.f90:474
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148
subroutine dgll(d, dt, z, nz, nzd)
Compute the derivative matrix D and its transpose DT associated with the Nth order Lagrangian interpo...
Definition speclib.f90:880
subroutine zwgll(z, w, np)
Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) associated with Jacobi polynomial P(N)(...
Definition speclib.f90:179
subroutine legendre_poly(l, x, n)
Evaluate Legendre polynomials of degrees 0-N at point x and store in array L.
Definition speclib.f90:1003
subroutine zwgl(z, w, np)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition speclib.f90:164
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:140
Utilities.
Definition utils.f90:35
The function space for the SEM solution fields.
Definition space.f90:63