Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
space.f90
Go to the documentation of this file.
1! Copyright (c) 2019-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module space
35 use neko_config
36 use num_types, only : rp
37 use speclib, only : zwgll, zwgl, dgll, legendre_poly
38 use device
39 use matrix, only : matrix_t
40 use utils, only : neko_error
41 use fast3d, only : setup_intp
42 use tensor, only : trsp1
43 use mxm_wrapper, only: mxm
44 use math, only : copy
45 use, intrinsic :: iso_c_binding
46 implicit none
47 private
48
49 integer, public, parameter :: gl = 0, gll = 1, gj = 2
50
63 type, public :: space_t
64 integer :: t
65 integer :: lx
66 integer :: ly
67 integer :: lz
68 integer :: lxy
69 integer :: lyz
70 integer :: lxz
71 integer :: lxyz
72
73 real(kind=rp), allocatable :: zg(:,:)
74
75 real(kind=rp), allocatable :: dr_inv(:)
76 real(kind=rp), allocatable :: ds_inv(:)
77 real(kind=rp), allocatable :: dt_inv(:)
78
79 real(kind=rp), allocatable :: wx(:)
80 real(kind=rp), allocatable :: wy(:)
81 real(kind=rp), allocatable :: wz(:)
82
83 real(kind=rp), allocatable :: w3(:,:,:)
84
86 real(kind=rp), allocatable :: dx(:,:)
88 real(kind=rp), allocatable :: dy(:,:)
90 real(kind=rp), allocatable :: dz(:,:)
91
93 real(kind=rp), allocatable :: dxt(:,:)
95 real(kind=rp), allocatable :: dyt(:,:)
97 real(kind=rp), allocatable :: dzt(:,:)
98
100 real(kind=rp), allocatable :: v(:,:)
101 real(kind=rp), allocatable :: vt(:,:)
102 real(kind=rp), allocatable :: vinv(:,:)
103 real(kind=rp), allocatable :: vinvt(:,:)
105 real(kind=rp), allocatable :: w(:,:)
106
107 !
108 ! Device pointers (if present)
109 !
110 type(c_ptr) :: dr_inv_d = c_null_ptr
111 type(c_ptr) :: ds_inv_d = c_null_ptr
112 type(c_ptr) :: dt_inv_d = c_null_ptr
113 type(c_ptr) :: dxt_d = c_null_ptr
114 type(c_ptr) :: dyt_d = c_null_ptr
115 type(c_ptr) :: dzt_d = c_null_ptr
116 type(c_ptr) :: dx_d = c_null_ptr
117 type(c_ptr) :: dy_d = c_null_ptr
118 type(c_ptr) :: dz_d = c_null_ptr
119 type(c_ptr) :: wx_d = c_null_ptr
120 type(c_ptr) :: wy_d = c_null_ptr
121 type(c_ptr) :: wz_d = c_null_ptr
122 type(c_ptr) :: zg_d = c_null_ptr
123 type(c_ptr) :: w3_d = c_null_ptr
124 type(c_ptr) :: v_d = c_null_ptr
125 type(c_ptr) :: vt_d = c_null_ptr
126 type(c_ptr) :: vinv_d = c_null_ptr
127 type(c_ptr) :: vinvt_d = c_null_ptr
128 type(c_ptr) :: w_d = c_null_ptr
129 contains
130 procedure, pass(s) :: init => space_init
131 procedure, pass(s) :: free => space_free
132
133 end type space_t
134
135 interface operator(.eq.)
136 module procedure space_eq
137 end interface operator(.eq.)
138
139 interface operator(.ne.)
140 module procedure space_ne
141 end interface operator(.ne.)
142
143 public :: operator(.eq.), operator(.ne.)
144
145contains
146
148 subroutine space_init(s, t, lx, ly, lz)
149 class(space_t), intent(inout) :: s
150 integer, intent(in) :: t
151 integer, intent(in) :: lx
152 integer, intent(in) :: ly
153 integer, optional, intent(in) :: lz
154 integer :: ix, iy, iz
155
156 call space_free(s)
157
158 s%lx = lx
159 s%ly = ly
160 s%t = t
161 if (present(lz)) then
162 if (lz .ne. 1) then
163 s%lz = lz
164 if (lx .ne. ly .or. lx .ne. lz) then
165 call neko_error("Unsupported polynomial dimension")
166 end if
167 end if
168 else
169 if (lx .ne. ly) then
170 call neko_error("Unsupported polynomial dimension")
171 end if
172 s%lz = 1
173 end if
174 s%lxy = s%ly*s%lx
175 s%lyz = s%ly*s%lz
176 s%lxz = s%lx*s%lz
177 s%lxyz = s%lx*s%ly*s%lz
178
179 allocate(s%zg(lx, 3))
180
181 allocate(s%wx(s%lx))
182 allocate(s%wy(s%ly))
183 allocate(s%wz(s%lz))
184
185 allocate(s%dr_inv(s%lx))
186 allocate(s%ds_inv(s%ly))
187 allocate(s%dt_inv(s%lz))
188
189 allocate(s%w3(s%lx, s%ly, s%lz))
190
191 allocate(s%dx(s%lx, s%lx))
192 allocate(s%dy(s%ly, s%ly))
193 allocate(s%dz(s%lz, s%lz))
194
195 allocate(s%dxt(s%lx, s%lx))
196 allocate(s%dyt(s%ly, s%ly))
197 allocate(s%dzt(s%lz, s%lz))
198
199 allocate(s%v(s%lx, s%lx))
200 allocate(s%vt(s%lx, s%lx))
201 allocate(s%vinv(s%lx, s%lx))
202 allocate(s%vinvt(s%lx, s%lx))
203 allocate(s%w(s%lx, s%lx))
204
205 ! Call low-level routines to compute nodes and quadrature weights
206 if (t .eq. gll) then
207 call zwgll(s%zg(1,1), s%wx, s%lx)
208 call zwgll(s%zg(1,2), s%wy, s%ly)
209 if (s%lz .gt. 1) then
210 call zwgll(s%zg(1,3), s%wz, s%lz)
211 else
212 s%zg(:,3) = 0d0
213 s%wz = 1d0
214 end if
215 else if (t .eq. gl) then
216 call zwgl(s%zg(1,1), s%wx, s%lx)
217 call zwgl(s%zg(1,2), s%wy, s%ly)
218 if (s%lz .gt. 1) then
219 call zwgl(s%zg(1,3), s%wz, s%lz)
220 else
221 s%zg(:,3) = 0d0
222 s%wz = 1d0
223 end if
224 else
225 call neko_error("Invalid quadrature rule")
226 end if
227
228 do iz = 1, s%lz
229 do iy = 1, s%ly
230 do ix = 1, s%lx
231 s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)
232 end do
233 end do
234 end do
236 if (t .eq. gll) then
237 call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
238 call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
239 if (s%lz .gt. 1) then
240 call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
241 else
242 s%dz = 0d0
243 s%dzt = 0d0
244 end if
245 else if (t .eq. gl) then
246 call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
247 call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
248 if (s%lz .gt. 1) then
249 call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
250 else
251 s%dz = 0d0
252 s%dzt = 0d0
253 end if
254 else
255 call neko_error("Invalid quadrature rule")
256 end if
257
258 call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
259 call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
260 if (s%lz .gt. 1) then
261 call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)
262 else
263 s%dt_inv = 0d0
264 end if
266
267 if (neko_bcknd_device .eq. 1) then
268 call device_map(s%dr_inv, s%dr_inv_d, s%lx)
269 call device_map(s%ds_inv, s%ds_inv_d, s%lx)
270 call device_map(s%dt_inv, s%dt_inv_d, s%lx)
271 call device_map(s%wx, s%wx_d, s%lx)
272 call device_map(s%wy, s%wy_d, s%lx)
273 call device_map(s%wz, s%wz_d, s%lx)
274 call device_map(s%dx, s%dx_d, s%lxy)
275 call device_map(s%dy, s%dy_d, s%lxy)
276 call device_map(s%dz, s%dz_d, s%lxy)
277 call device_map(s%dxt, s%dxt_d, s%lxy)
278 call device_map(s%dyt, s%dyt_d, s%lxy)
279 call device_map(s%dzt, s%dzt_d, s%lxy)
280 call device_map(s%w3, s%w3_d, s%lxyz)
281 call device_map(s%v, s%v_d, s%lxy)
282 call device_map(s%vt, s%vt_d, s%lxy)
283 call device_map(s%vinv, s%vinv_d, s%lxy)
284 call device_map(s%vinvt, s%vinvt_d, s%lxy)
285 call device_map(s%w, s%w_d, s%lxy)
286
287 call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, sync=.false.)
288 call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, sync=.false.)
289 call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, sync=.false.)
290 call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync=.false.)
291 call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync=.false.)
292 call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync=.false.)
293 call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync=.false.)
294 call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync=.false.)
295 call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync=.false.)
296 call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync=.false.)
297 call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync=.false.)
298 call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync=.false.)
299 call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync=.false.)
300 call device_memcpy(s%v, s%v_d, s%lxy, host_to_device, sync = .false.)
301 call device_memcpy(s%vt, s%vt_d, s%lxy, host_to_device, sync = .false.)
302 call device_memcpy(s%vinv, s%vinv_d, s%lxy, host_to_device, &
303 sync = .false.)
304 call device_memcpy(s%vinvt, s%vinvt_d, s%lxy, host_to_device, &
305 sync = .false.)
306 call device_memcpy(s%w, s%w_d, s%lxy, host_to_device, sync = .false.)
307
308 ix = s%lx * 3
309 call device_map(s%zg, s%zg_d, ix)
310 call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync=.true.)
311 end if
312
313
314 call device_sync()
315
316 end subroutine space_init
317
319 subroutine space_free(s)
320 class(space_t), intent(inout) :: s
321
322 if (allocated(s%zg)) then
323 deallocate(s%zg)
324 end if
325
326 if (allocated(s%wx)) then
327 deallocate(s%wx)
328 end if
329
330 if (allocated(s%wy)) then
331 deallocate(s%wy)
332 end if
333
334 if (allocated(s%wz)) then
335 deallocate(s%wz)
336 end if
337
338 if (allocated(s%w3)) then
339 deallocate(s%w3)
340 end if
341
342 if (allocated(s%dx)) then
343 deallocate(s%dx)
344 end if
345
346 if (allocated(s%dy)) then
347 deallocate(s%dy)
348 end if
349
350 if (allocated(s%dz)) then
351 deallocate(s%dz)
352 end if
353
354 if (allocated(s%dxt)) then
355 deallocate(s%dxt)
356 end if
357
358 if (allocated(s%dyt)) then
359 deallocate(s%dyt)
360 end if
361
362 if (allocated(s%dzt)) then
363 deallocate(s%dzt)
364 end if
365
366 if (allocated(s%dr_inv)) then
367 deallocate(s%dr_inv)
368 end if
369
370 if (allocated(s%ds_inv)) then
371 deallocate(s%ds_inv)
372 end if
373
374 if (allocated(s%dt_inv)) then
375 deallocate(s%dt_inv)
376 end if
377
378 if(allocated(s%v)) then
379 deallocate(s%v)
380 end if
381
382 if(allocated(s%vt)) then
383 deallocate(s%vt)
384 end if
385
386 if(allocated(s%vinv)) then
387 deallocate(s%vinv)
388 end if
389
390 if(allocated(s%vinvt)) then
391 deallocate(s%vinvt)
392 end if
393
394 if(allocated(s%w)) then
395 deallocate(s%w)
396 end if
397
398 !
399 ! Cleanup the device (if present)
400 !
401
402 if (c_associated(s%dr_inv_d)) then
403 call device_free(s%dr_inv_d)
404 end if
405
406 if (c_associated(s%ds_inv_d)) then
407 call device_free(s%ds_inv_d)
408 end if
409
410 if (c_associated(s%dt_inv_d)) then
411 call device_free(s%dt_inv_d)
412 end if
413
414 if (c_associated(s%dxt_d)) then
415 call device_free(s%dxt_d)
416 end if
417
418 if (c_associated(s%dyt_d)) then
419 call device_free(s%dyt_d)
420 end if
421
422 if (c_associated(s%dzt_d)) then
423 call device_free(s%dzt_d)
424 end if
425
426 if (c_associated(s%dx_d)) then
427 call device_free(s%dx_d)
428 end if
429
430 if (c_associated(s%dy_d)) then
431 call device_free(s%dy_d)
432 end if
433
434 if (c_associated(s%dz_d)) then
435 call device_free(s%dz_d)
436 end if
437
438 if (c_associated(s%wx_d)) then
439 call device_free(s%wx_d)
440 end if
441
442 if (c_associated(s%wy_d)) then
443 call device_free(s%wy_d)
444 end if
445
446 if (c_associated(s%wz_d)) then
447 call device_free(s%wz_d)
448 end if
449
450 if (c_associated(s%w3_d)) then
451 call device_free(s%w3_d)
452 end if
453
454 if (c_associated(s%zg_d)) then
455 call device_free(s%zg_d)
456 end if
457
458 if (c_associated(s%v_d)) then
459 call device_free(s%v_d)
460 end if
461
462 if (c_associated(s%vt_d)) then
463 call device_free(s%vt_d)
464 end if
465
466 if (c_associated(s%vinv_d)) then
467 call device_free(s%vinv_d)
468 end if
469
470 if (c_associated(s%vinvt_d)) then
471 call device_free(s%vinvt_d)
472 end if
473
474 if (c_associated(s%w_d)) then
475 call device_free(s%w_d)
476 end if
477
478 end subroutine space_free
479
482 pure function space_eq(Xh, Yh) result(res)
483 type(space_t), intent(in) :: xh
484 type(space_t), intent(in) :: yh
485 logical :: res
486
487 if ( (xh%lx .eq. yh%lx) .and. &
488 (xh%ly .eq. yh%ly) .and. &
489 (xh%lz .eq. yh%lz) ) then
490 res = .true.
491 else
492 res = .false.
493 end if
494
495 end function space_eq
496
499 pure function space_ne(Xh, Yh) result(res)
500 type(space_t), intent(in) :: xh
501 type(space_t), intent(in) :: yh
502 logical :: res
503
504 if ( (xh%lx .eq. yh%lx) .and. &
505 (xh%ly .eq. yh%ly) .and. &
506 (xh%lz .eq. yh%lz) ) then
507 res = .false.
508 else
509 res = .true.
510 end if
511
512 end function space_ne
513
514 subroutine space_compute_dist(dx, x, lx)
515 integer, intent(in) :: lx
516 real(kind=rp), intent(inout) :: dx(lx), x(lx)
517 integer :: i
518 dx(1) = x(2) - x(1)
519 do i = 2, lx - 1
520 dx(i) = 0.5*(x(i+1) - x(i-1))
521 enddo
522 dx(lx) = x(lx) - x(lx-1)
523 do i = 1, lx
524 dx(i) = 1.0_rp / dx(i)
525 end do
526 end subroutine space_compute_dist
527
528
532 type(space_t), intent(inout) :: Xh
533
534 real(kind=rp) :: l(0:xh%lx-1)
535 real(kind=rp) :: delta(xh%lx)
536 integer :: i, kj, j, j2, kk
537 type(matrix_t) :: m
538 logical :: scaled = .false.
539
540 associate(v=> xh%v, vt => xh%vt, &
541 vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)
542 ! Get the Legendre polynomials for each point
543 ! Then proceed to compose the transform matrix
544 kj = 0
545 do j = 1, xh%lx
546 call legendre_poly(l, xh%zg(j, 1), xh%lx - 1)
547 do kk = 1, xh%lx
548 kj = kj+1
549 v(kj,1) = l(kk-1)
550 end do
551 end do
552
553 ! transpose the matrix
554 call trsp1(v, xh%lx)
555
556 if (scaled) then
557
558 ! Calculate the nominal scaling factors
559 do i = 1, xh%lx
560 delta(i) = 2.0_rp / (2*(i-1)+1)
561 end do
562 ! modify last entry
563 delta(xh%lx) = 2.0_rp / (xh%lx-1)
564
565 ! calculate the inverse to multiply the matrix
566 do i = 1, xh%lx
567 delta(i) = sqrt(1.0_rp / delta(i))
568 end do
569 ! scale the matrix
570 do i = 1, xh%lx
571 do j = 1, xh%lx
572 v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
573 end do
574 end do
575
576 ! get the trasposed
577 call copy(vt, v, xh%lx * xh%lx)
578 call trsp1(vt, xh%lx)
579
580 !populate the mass matrix
581 kk = 1
582 do i = 1, xh%lx
583 do j = 1, xh%lx
584 if (i .eq. j) then
585 w(i,j) = xh%wx(kk)
586 kk = kk+1
587 else
588 w(i,j) = 0
589 end if
590 end do
591 end do
592
593 !Get the inverse of the transform matrix
594 call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)
595
596 !get the transposed of the inverse
597 call copy(vinvt, vinv, xh%lx * xh%lx)
598 call trsp1(vinvt, xh%lx)
599 else
600 call copy(vt, v, xh%lxy)
601 call trsp1(vt, xh%lx)
602 call m%init(xh%lx, xh%lx)
603 call copy(m%x, v, xh%lxy)
604 call m%inverse_on_host()
605 call copy(vinv, m%x, xh%lxy)
606 call copy(vinvt, vinv, xh%lx * xh%lx)
607 call trsp1(vinvt, xh%lx)
608 call m%free()
609 end if
610 end associate
611
613
614end module space
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:107
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition fast3d.f90:244
Definition math.f90:60
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
Defines a matrix.
Definition matrix.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
pure logical function space_ne(xh, yh)
Check if .
Definition space.f90:500
pure logical function space_eq(xh, yh)
Check if .
Definition space.f90:483
integer, parameter, public gll
Definition space.f90:49
subroutine space_compute_dist(dx, x, lx)
Definition space.f90:515
integer, parameter, public gj
Definition space.f90:49
subroutine space_free(s)
Deallocate a space s.
Definition space.f90:320
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition space.f90:149
integer, parameter, public gl
Definition space.f90:49
subroutine space_generate_transformation_matrices(xh)
Generate spectral tranform matrices.
Definition space.f90:532
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148
subroutine dgll(d, dt, z, nz, nzd)
Compute the derivative matrix D and its transpose DT associated with the Nth order Lagrangian interpo...
Definition speclib.f90:880
subroutine zwgll(z, w, np)
Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) associated with Jacobi polynomial P(N)(...
Definition speclib.f90:179
subroutine legendre_poly(l, x, n)
Evaluate Legendre polynomials of degrees 0-N at point x and store in array L.
Definition speclib.f90:1003
subroutine zwgl(z, w, np)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition speclib.f90:164
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:140
Utilities.
Definition utils.f90:35
The function space for the SEM solution fields.
Definition space.f90:63