Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
space.f90
Go to the documentation of this file.
1! Copyright (c) 2019-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module space
35 use neko_config
36 use num_types, only : rp
37 use speclib
38 use device
39 use utils, only : neko_error
40 use fast3d, only : setup_intp
41 use math
42 use tensor, only : trsp1
43 use mxm_wrapper, only: mxm
44 use, intrinsic :: iso_c_binding
45 implicit none
46 private
47
48 integer, public, parameter :: gl = 0, gll = 1, gj = 2
49
62 type, public :: space_t
63 integer :: t
64 integer :: lx
65 integer :: ly
66 integer :: lz
67 integer :: lxy
68 integer :: lyz
69 integer :: lxz
70 integer :: lxyz
71
72 real(kind=rp), allocatable :: zg(:,:)
73
74 real(kind=rp), allocatable :: dr_inv(:)
75 real(kind=rp), allocatable :: ds_inv(:)
76 real(kind=rp), allocatable :: dt_inv(:)
77
78 real(kind=rp), allocatable :: wx(:)
79 real(kind=rp), allocatable :: wy(:)
80 real(kind=rp), allocatable :: wz(:)
81
82 real(kind=rp), allocatable :: w3(:,:,:)
83
85 real(kind=rp), allocatable :: dx(:,:)
87 real(kind=rp), allocatable :: dy(:,:)
89 real(kind=rp), allocatable :: dz(:,:)
90
92 real(kind=rp), allocatable :: dxt(:,:)
94 real(kind=rp), allocatable :: dyt(:,:)
96 real(kind=rp), allocatable :: dzt(:,:)
97
99 real(kind=rp), allocatable :: v(:,:)
100 real(kind=rp), allocatable :: vt(:,:)
101 real(kind=rp), allocatable :: vinv(:,:)
102 real(kind=rp), allocatable :: vinvt(:,:)
104 real(kind=rp), allocatable :: w(:,:)
105
106 !
107 ! Device pointers (if present)
108 !
109 type(c_ptr) :: dr_inv_d = c_null_ptr
110 type(c_ptr) :: ds_inv_d = c_null_ptr
111 type(c_ptr) :: dt_inv_d = c_null_ptr
112 type(c_ptr) :: dxt_d = c_null_ptr
113 type(c_ptr) :: dyt_d = c_null_ptr
114 type(c_ptr) :: dzt_d = c_null_ptr
115 type(c_ptr) :: dx_d = c_null_ptr
116 type(c_ptr) :: dy_d = c_null_ptr
117 type(c_ptr) :: dz_d = c_null_ptr
118 type(c_ptr) :: wx_d = c_null_ptr
119 type(c_ptr) :: wy_d = c_null_ptr
120 type(c_ptr) :: wz_d = c_null_ptr
121 type(c_ptr) :: zg_d = c_null_ptr
122 type(c_ptr) :: w3_d = c_null_ptr
123 type(c_ptr) :: v_d = c_null_ptr
124 type(c_ptr) :: vt_d = c_null_ptr
125 type(c_ptr) :: vinv_d = c_null_ptr
126 type(c_ptr) :: vinvt_d = c_null_ptr
127 type(c_ptr) :: w_d = c_null_ptr
128 contains
129 procedure, pass(s) :: init => space_init
130 procedure, pass(s) :: free => space_free
131
132 end type space_t
133
134 interface operator(.eq.)
135 module procedure space_eq
136 end interface operator(.eq.)
137
138 interface operator(.ne.)
139 module procedure space_ne
140 end interface operator(.ne.)
141
142 public :: operator(.eq.), operator(.ne.)
143
144contains
145
147 subroutine space_init(s, t, lx, ly, lz)
148 class(space_t), intent(inout) :: s
149 integer, intent(in) :: t
150 integer, intent(in) :: lx
151 integer, intent(in) :: ly
152 integer, optional, intent(in) :: lz
153 integer :: ix, iy, iz
154
155 call space_free(s)
156
157 s%lx = lx
158 s%ly = ly
159 s%t = t
160 if (present(lz)) then
161 if (lz .ne. 1) then
162 s%lz = lz
163 if (lx .ne. ly .or. lx .ne. lz) then
164 call neko_error("Unsupported polynomial dimension")
165 end if
166 end if
167 else
168 if (lx .ne. ly) then
169 call neko_error("Unsupported polynomial dimension")
170 end if
171 s%lz = 1
172 end if
173 s%lxy = s%ly*s%lx
174 s%lyz = s%ly*s%lz
175 s%lxz = s%lx*s%lz
176 s%lxyz = s%lx*s%ly*s%lz
177
178 allocate(s%zg(lx, 3))
179
180 allocate(s%wx(s%lx))
181 allocate(s%wy(s%ly))
182 allocate(s%wz(s%lz))
183
184 allocate(s%dr_inv(s%lx))
185 allocate(s%ds_inv(s%ly))
186 allocate(s%dt_inv(s%lz))
187
188 allocate(s%w3(s%lx, s%ly, s%lz))
189
190 allocate(s%dx(s%lx, s%lx))
191 allocate(s%dy(s%ly, s%ly))
192 allocate(s%dz(s%lz, s%lz))
193
194 allocate(s%dxt(s%lx, s%lx))
195 allocate(s%dyt(s%ly, s%ly))
196 allocate(s%dzt(s%lz, s%lz))
197
198 allocate(s%v(s%lx, s%lx))
199 allocate(s%vt(s%lx, s%lx))
200 allocate(s%vinv(s%lx, s%lx))
201 allocate(s%vinvt(s%lx, s%lx))
202 allocate(s%w(s%lx, s%lx))
203
204 ! Call low-level routines to compute nodes and quadrature weights
205 if (t .eq. gll) then
206 call zwgll(s%zg(1,1), s%wx, s%lx)
207 call zwgll(s%zg(1,2), s%wy, s%ly)
208 if (s%lz .gt. 1) then
209 call zwgll(s%zg(1,3), s%wz, s%lz)
210 else
211 s%zg(:,3) = 0d0
212 s%wz = 1d0
213 end if
214 else if (t .eq. gl) then
215 call zwgl(s%zg(1,1), s%wx, s%lx)
216 call zwgl(s%zg(1,2), s%wy, s%ly)
217 if (s%lz .gt. 1) then
218 call zwgl(s%zg(1,3), s%wz, s%lz)
219 else
220 s%zg(:,3) = 0d0
221 s%wz = 1d0
222 end if
223 else
224 call neko_error("Invalid quadrature rule")
225 end if
226
227 do iz = 1, s%lz
228 do iy = 1, s%ly
229 do ix = 1, s%lx
230 s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)
231 end do
232 end do
233 end do
235 if (t .eq. gll) then
236 call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
237 call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
238 if (s%lz .gt. 1) then
239 call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
240 else
241 s%dz = 0d0
242 s%dzt = 0d0
243 end if
244 else if (t .eq. gl) then
245 call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
246 call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
247 if (s%lz .gt. 1) then
248 call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
249 else
250 s%dz = 0d0
251 s%dzt = 0d0
252 end if
253 else
254 call neko_error("Invalid quadrature rule")
255 end if
256
257 call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
258 call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
259 if (s%lz .gt. 1) then
260 call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)
261 else
262 s%dt_inv = 0d0
263 end if
264
265 if (neko_bcknd_device .eq. 1) then
266 call device_map(s%dr_inv, s%dr_inv_d, s%lx)
267 call device_map(s%ds_inv, s%ds_inv_d, s%lx)
268 call device_map(s%dt_inv, s%dt_inv_d, s%lx)
269 call device_map(s%wx, s%wx_d, s%lx)
270 call device_map(s%wy, s%wy_d, s%lx)
271 call device_map(s%wz, s%wz_d, s%lx)
272 call device_map(s%dx, s%dx_d, s%lxy)
273 call device_map(s%dy, s%dy_d, s%lxy)
274 call device_map(s%dz, s%dz_d, s%lxy)
275 call device_map(s%dxt, s%dxt_d, s%lxy)
276 call device_map(s%dyt, s%dyt_d, s%lxy)
277 call device_map(s%dzt, s%dzt_d, s%lxy)
278 call device_map(s%w3, s%w3_d, s%lxyz)
279 call device_map(s%v, s%v_d, s%lxy)
280 call device_map(s%vt, s%vt_d, s%lxy)
281 call device_map(s%vinv, s%vinv_d, s%lxy)
282 call device_map(s%vinvt, s%vinvt_d, s%lxy)
283 call device_map(s%w, s%w_d, s%lxy)
284
285 call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, sync=.false.)
286 call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, sync=.false.)
287 call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, sync=.false.)
288 call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync=.false.)
289 call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync=.false.)
290 call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync=.false.)
291 call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync=.false.)
292 call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync=.false.)
293 call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync=.false.)
294 call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync=.false.)
295 call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync=.false.)
296 call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync=.false.)
297 call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync=.false.)
298
299 ix = s%lx * 3
300 call device_map(s%zg, s%zg_d, ix)
301 call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync=.false.)
302 end if
303
305
306 end subroutine space_init
307
309 subroutine space_free(s)
310 class(space_t), intent(inout) :: s
311
312 if (allocated(s%zg)) then
313 deallocate(s%zg)
314 end if
315
316 if (allocated(s%wx)) then
317 deallocate(s%wx)
318 end if
319
320 if (allocated(s%wy)) then
321 deallocate(s%wy)
322 end if
323
324 if (allocated(s%wz)) then
325 deallocate(s%wz)
326 end if
327
328 if (allocated(s%w3)) then
329 deallocate(s%w3)
330 end if
331
332 if (allocated(s%dx)) then
333 deallocate(s%dx)
334 end if
335
336 if (allocated(s%dy)) then
337 deallocate(s%dy)
338 end if
339
340 if (allocated(s%dz)) then
341 deallocate(s%dz)
342 end if
343
344 if (allocated(s%dxt)) then
345 deallocate(s%dxt)
346 end if
347
348 if (allocated(s%dyt)) then
349 deallocate(s%dyt)
350 end if
351
352 if (allocated(s%dzt)) then
353 deallocate(s%dzt)
354 end if
355
356 if (allocated(s%dr_inv)) then
357 deallocate(s%dr_inv)
358 end if
359
360 if (allocated(s%ds_inv)) then
361 deallocate(s%ds_inv)
362 end if
363
364 if (allocated(s%dt_inv)) then
365 deallocate(s%dt_inv)
366 end if
367
368 if(allocated(s%v)) then
369 deallocate(s%v)
370 end if
371
372 if(allocated(s%vt)) then
373 deallocate(s%vt)
374 end if
375
376 if(allocated(s%vinv)) then
377 deallocate(s%vinv)
378 end if
379
380 if(allocated(s%vinvt)) then
381 deallocate(s%vinvt)
382 end if
383
384 if(allocated(s%w)) then
385 deallocate(s%w)
386 end if
387
388 !
389 ! Cleanup the device (if present)
390 !
391
392 if (c_associated(s%dr_inv_d)) then
393 call device_free(s%dr_inv_d)
394 end if
395
396 if (c_associated(s%ds_inv_d)) then
397 call device_free(s%ds_inv_d)
398 end if
399
400 if (c_associated(s%dt_inv_d)) then
401 call device_free(s%dt_inv_d)
402 end if
403
404 if (c_associated(s%dxt_d)) then
405 call device_free(s%dxt_d)
406 end if
407
408 if (c_associated(s%dyt_d)) then
409 call device_free(s%dyt_d)
410 end if
411
412 if (c_associated(s%dzt_d)) then
413 call device_free(s%dzt_d)
414 end if
415
416 if (c_associated(s%dx_d)) then
417 call device_free(s%dx_d)
418 end if
419
420 if (c_associated(s%dy_d)) then
421 call device_free(s%dy_d)
422 end if
423
424 if (c_associated(s%dz_d)) then
425 call device_free(s%dz_d)
426 end if
427
428 if (c_associated(s%wx_d)) then
429 call device_free(s%wx_d)
430 end if
431
432 if (c_associated(s%wy_d)) then
433 call device_free(s%wy_d)
434 end if
435
436 if (c_associated(s%wz_d)) then
437 call device_free(s%wz_d)
438 end if
439
440 if (c_associated(s%w3_d)) then
441 call device_free(s%w3_d)
442 end if
443
444 if (c_associated(s%zg_d)) then
445 call device_free(s%zg_d)
446 end if
447
448 if (c_associated(s%v_d)) then
449 call device_free(s%v_d)
450 end if
451
452 if (c_associated(s%vt_d)) then
453 call device_free(s%vt_d)
454 end if
455
456 if (c_associated(s%vinv_d)) then
457 call device_free(s%vinv_d)
458 end if
459
460 if (c_associated(s%vinvt_d)) then
461 call device_free(s%vinvt_d)
462 end if
463
464 if (c_associated(s%w_d)) then
465 call device_free(s%w_d)
466 end if
467
468 end subroutine space_free
469
472 pure function space_eq(Xh, Yh) result(res)
473 type(space_t), intent(in) :: xh
474 type(space_t), intent(in) :: yh
475 logical :: res
476
477 if ( (xh%lx .eq. yh%lx) .and. &
478 (xh%ly .eq. yh%ly) .and. &
479 (xh%lz .eq. yh%lz) ) then
480 res = .true.
481 else
482 res = .false.
483 end if
484
485 end function space_eq
486
489 pure function space_ne(Xh, Yh) result(res)
490 type(space_t), intent(in) :: xh
491 type(space_t), intent(in) :: yh
492 logical :: res
493
494 if ( (xh%lx .eq. yh%lx) .and. &
495 (xh%ly .eq. yh%ly) .and. &
496 (xh%lz .eq. yh%lz) ) then
497 res = .false.
498 else
499 res = .true.
500 end if
501
502 end function space_ne
503
504 subroutine space_compute_dist(dx, x, lx)
505 integer, intent(in) :: lx
506 real(kind=rp), intent(inout) :: dx(lx), x(lx)
507 integer :: i
508 dx(1) = x(2) - x(1)
509 do i = 2, lx - 1
510 dx(i) = 0.5*(x(i+1) - x(i-1))
511 enddo
512 dx(lx) = x(lx) - x(lx-1)
513 do i = 1, lx
514 dx(i) = 1.0_rp / dx(i)
515 end do
516 end subroutine space_compute_dist
517
518
522 type(space_t), intent(inout) :: Xh
523
524 real(kind=rp) :: l(0:xh%lx-1)
525 real(kind=rp) :: delta(xh%lx)
526 integer :: i, kj, j, j2, kk
527
528 associate(v=> xh%v, vt => xh%vt, &
529 vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)
530 ! Get the Legendre polynomials for each point
531 ! Then proceed to compose the transform matrix
532 kj = 0
533 do j = 1, xh%lx
534 l(0) = 1.
535 l(1) = xh%zg(j,1)
536 do j2 = 2, xh%lx-1
537 l(j2) = ( (2*j2-1) * xh%zg(j,1) * l(j2-1) &
538 - (j2-1) * l(j2-2) ) / j2
539 end do
540 do kk = 1, xh%lx
541 kj = kj+1
542 v(kj,1) = l(kk-1)
543 end do
544 end do
545
546 ! transpose the matrix
547 call trsp1(v, xh%lx)
548
549 ! Calculate the nominal scaling factors
550 do i = 1, xh%lx
551 delta(i) = 2.0_rp / (2*(i-1)+1)
552 end do
553 ! modify last entry
554 delta(xh%lx) = 2.0_rp / (xh%lx-1)
555
556 ! calculate the inverse to multiply the matrix
557 do i = 1, xh%lx
558 delta(i) = sqrt(1.0_rp / delta(i))
559 end do
560 ! scale the matrix
561 do i = 1, xh%lx
562 do j = 1, xh%lx
563 v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
564 end do
565 end do
566
567 ! get the trasposed
568 call copy(vt, v, xh%lx * xh%lx)
569 call trsp1(vt, xh%lx)
570
571 !populate the mass matrix
572 kk = 1
573 do i = 1, xh%lx
574 do j = 1, xh%lx
575 if (i .eq. j) then
576 w(i,j) = xh%wx(kk)
577 kk = kk+1
578 else
579 w(i,j) = 0
580 end if
581 end do
582 end do
583
584 !Get the inverse of the transform matrix
585 call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)
586
587 !get the transposed of the inverse
588 call copy(vinvt, vinv, xh%lx * xh%lx)
589 call trsp1(vinvt, xh%lx)
590 end associate
591
592 ! Copy the data to the GPU
593 ! Move all this to space.f90 to for next version
594 if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
595 (neko_bcknd_opencl .eq. 1)) then
596
597 call device_memcpy(xh%v, xh%v_d, xh%lxy, &
598 host_to_device, sync=.false.)
599 call device_memcpy(xh%vt, xh%vt_d, xh%lxy, &
600 host_to_device, sync=.false.)
601 call device_memcpy(xh%vinv, xh%vinv_d, xh%lxy, &
602 host_to_device, sync=.false.)
603 call device_memcpy(xh%vinvt, xh%vinvt_d, xh%lxy, &
604 host_to_device, sync=.false.)
605 call device_memcpy(xh%w, xh%w_d, xh%lxy, &
606 host_to_device, sync=.false.)
607
608 end if
609
611
612end module space
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition fast3d.f90:243
Definition math.f90:60
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
integer, parameter neko_bcknd_cuda
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
pure logical function space_ne(xh, yh)
Check if .
Definition space.f90:490
pure logical function space_eq(xh, yh)
Check if .
Definition space.f90:473
integer, parameter, public gll
Definition space.f90:48
subroutine space_compute_dist(dx, x, lx)
Definition space.f90:505
integer, parameter, public gj
Definition space.f90:48
subroutine space_free(s)
Deallocate a space s.
Definition space.f90:310
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition space.f90:148
integer, parameter, public gl
Definition space.f90:48
subroutine space_generate_transformation_matrices(xh)
Generate spectral tranform matrices.
Definition space.f90:522
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148
subroutine dgll(d, dt, z, nz, nzd)
Definition speclib.f90:865
subroutine zwgll(z, w, np)
Definition speclib.f90:169
subroutine zwgl(z, w, np)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition speclib.f90:161
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137
Utilities.
Definition utils.f90:35
The function space for the SEM solution fields.
Definition space.f90:62