Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
space.f90
Go to the documentation of this file.
1! Copyright (c) 2019-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module space
35 use neko_config
36 use num_types, only : rp
37 use speclib, only : zwgll, zwgl, dgll
38 use device
39 use utils, only : neko_error
40 use fast3d, only : setup_intp
41 use tensor, only : trsp1
42 use mxm_wrapper, only: mxm
43 use math, only : copy
44 use, intrinsic :: iso_c_binding
45 implicit none
46 private
47
48 integer, public, parameter :: gl = 0, gll = 1, gj = 2
49
62 type, public :: space_t
63 integer :: t
64 integer :: lx
65 integer :: ly
66 integer :: lz
67 integer :: lxy
68 integer :: lyz
69 integer :: lxz
70 integer :: lxyz
71
72 real(kind=rp), allocatable :: zg(:,:)
73
74 real(kind=rp), allocatable :: dr_inv(:)
75 real(kind=rp), allocatable :: ds_inv(:)
76 real(kind=rp), allocatable :: dt_inv(:)
77
78 real(kind=rp), allocatable :: wx(:)
79 real(kind=rp), allocatable :: wy(:)
80 real(kind=rp), allocatable :: wz(:)
81
82 real(kind=rp), allocatable :: w3(:,:,:)
83
85 real(kind=rp), allocatable :: dx(:,:)
87 real(kind=rp), allocatable :: dy(:,:)
89 real(kind=rp), allocatable :: dz(:,:)
90
92 real(kind=rp), allocatable :: dxt(:,:)
94 real(kind=rp), allocatable :: dyt(:,:)
96 real(kind=rp), allocatable :: dzt(:,:)
97
99 real(kind=rp), allocatable :: v(:,:)
100 real(kind=rp), allocatable :: vt(:,:)
101 real(kind=rp), allocatable :: vinv(:,:)
102 real(kind=rp), allocatable :: vinvt(:,:)
104 real(kind=rp), allocatable :: w(:,:)
105
106 !
107 ! Device pointers (if present)
108 !
109 type(c_ptr) :: dr_inv_d = c_null_ptr
110 type(c_ptr) :: ds_inv_d = c_null_ptr
111 type(c_ptr) :: dt_inv_d = c_null_ptr
112 type(c_ptr) :: dxt_d = c_null_ptr
113 type(c_ptr) :: dyt_d = c_null_ptr
114 type(c_ptr) :: dzt_d = c_null_ptr
115 type(c_ptr) :: dx_d = c_null_ptr
116 type(c_ptr) :: dy_d = c_null_ptr
117 type(c_ptr) :: dz_d = c_null_ptr
118 type(c_ptr) :: wx_d = c_null_ptr
119 type(c_ptr) :: wy_d = c_null_ptr
120 type(c_ptr) :: wz_d = c_null_ptr
121 type(c_ptr) :: zg_d = c_null_ptr
122 type(c_ptr) :: w3_d = c_null_ptr
123 type(c_ptr) :: v_d = c_null_ptr
124 type(c_ptr) :: vt_d = c_null_ptr
125 type(c_ptr) :: vinv_d = c_null_ptr
126 type(c_ptr) :: vinvt_d = c_null_ptr
127 type(c_ptr) :: w_d = c_null_ptr
128 contains
129 procedure, pass(s) :: init => space_init
130 procedure, pass(s) :: free => space_free
131
132 end type space_t
133
134 interface operator(.eq.)
135 module procedure space_eq
136 end interface operator(.eq.)
137
138 interface operator(.ne.)
139 module procedure space_ne
140 end interface operator(.ne.)
141
142 public :: operator(.eq.), operator(.ne.)
143
144contains
145
147 subroutine space_init(s, t, lx, ly, lz)
148 class(space_t), intent(inout) :: s
149 integer, intent(in) :: t
150 integer, intent(in) :: lx
151 integer, intent(in) :: ly
152 integer, optional, intent(in) :: lz
153 integer :: ix, iy, iz
154
155 call space_free(s)
156
157 s%lx = lx
158 s%ly = ly
159 s%t = t
160 if (present(lz)) then
161 if (lz .ne. 1) then
162 s%lz = lz
163 if (lx .ne. ly .or. lx .ne. lz) then
164 call neko_error("Unsupported polynomial dimension")
165 end if
166 end if
167 else
168 if (lx .ne. ly) then
169 call neko_error("Unsupported polynomial dimension")
170 end if
171 s%lz = 1
172 end if
173 s%lxy = s%ly*s%lx
174 s%lyz = s%ly*s%lz
175 s%lxz = s%lx*s%lz
176 s%lxyz = s%lx*s%ly*s%lz
177
178 allocate(s%zg(lx, 3))
179
180 allocate(s%wx(s%lx))
181 allocate(s%wy(s%ly))
182 allocate(s%wz(s%lz))
183
184 allocate(s%dr_inv(s%lx))
185 allocate(s%ds_inv(s%ly))
186 allocate(s%dt_inv(s%lz))
187
188 allocate(s%w3(s%lx, s%ly, s%lz))
189
190 allocate(s%dx(s%lx, s%lx))
191 allocate(s%dy(s%ly, s%ly))
192 allocate(s%dz(s%lz, s%lz))
193
194 allocate(s%dxt(s%lx, s%lx))
195 allocate(s%dyt(s%ly, s%ly))
196 allocate(s%dzt(s%lz, s%lz))
197
198 allocate(s%v(s%lx, s%lx))
199 allocate(s%vt(s%lx, s%lx))
200 allocate(s%vinv(s%lx, s%lx))
201 allocate(s%vinvt(s%lx, s%lx))
202 allocate(s%w(s%lx, s%lx))
203
204 ! Call low-level routines to compute nodes and quadrature weights
205 if (t .eq. gll) then
206 call zwgll(s%zg(1,1), s%wx, s%lx)
207 call zwgll(s%zg(1,2), s%wy, s%ly)
208 if (s%lz .gt. 1) then
209 call zwgll(s%zg(1,3), s%wz, s%lz)
210 else
211 s%zg(:,3) = 0d0
212 s%wz = 1d0
213 end if
214 else if (t .eq. gl) then
215 call zwgl(s%zg(1,1), s%wx, s%lx)
216 call zwgl(s%zg(1,2), s%wy, s%ly)
217 if (s%lz .gt. 1) then
218 call zwgl(s%zg(1,3), s%wz, s%lz)
219 else
220 s%zg(:,3) = 0d0
221 s%wz = 1d0
222 end if
223 else
224 call neko_error("Invalid quadrature rule")
225 end if
226
227 do iz = 1, s%lz
228 do iy = 1, s%ly
229 do ix = 1, s%lx
230 s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)
231 end do
232 end do
233 end do
235 if (t .eq. gll) then
236 call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
237 call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
238 if (s%lz .gt. 1) then
239 call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
240 else
241 s%dz = 0d0
242 s%dzt = 0d0
243 end if
244 else if (t .eq. gl) then
245 call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
246 call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
247 if (s%lz .gt. 1) then
248 call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
249 else
250 s%dz = 0d0
251 s%dzt = 0d0
252 end if
253 else
254 call neko_error("Invalid quadrature rule")
255 end if
256
257 call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
258 call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
259 if (s%lz .gt. 1) then
260 call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)
261 else
262 s%dt_inv = 0d0
263 end if
264
265 if (neko_bcknd_device .eq. 1) then
266 call device_map(s%dr_inv, s%dr_inv_d, s%lx)
267 call device_map(s%ds_inv, s%ds_inv_d, s%lx)
268 call device_map(s%dt_inv, s%dt_inv_d, s%lx)
269 call device_map(s%wx, s%wx_d, s%lx)
270 call device_map(s%wy, s%wy_d, s%lx)
271 call device_map(s%wz, s%wz_d, s%lx)
272 call device_map(s%dx, s%dx_d, s%lxy)
273 call device_map(s%dy, s%dy_d, s%lxy)
274 call device_map(s%dz, s%dz_d, s%lxy)
275 call device_map(s%dxt, s%dxt_d, s%lxy)
276 call device_map(s%dyt, s%dyt_d, s%lxy)
277 call device_map(s%dzt, s%dzt_d, s%lxy)
278 call device_map(s%w3, s%w3_d, s%lxyz)
279 call device_map(s%v, s%v_d, s%lxy)
280 call device_map(s%vt, s%vt_d, s%lxy)
281 call device_map(s%vinv, s%vinv_d, s%lxy)
282 call device_map(s%vinvt, s%vinvt_d, s%lxy)
283 call device_map(s%w, s%w_d, s%lxy)
284
285 call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, sync=.false.)
286 call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, sync=.false.)
287 call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, sync=.false.)
288 call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync=.false.)
289 call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync=.false.)
290 call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync=.false.)
291 call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync=.false.)
292 call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync=.false.)
293 call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync=.false.)
294 call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync=.false.)
295 call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync=.false.)
296 call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync=.false.)
297 call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync=.false.)
298
299 ix = s%lx * 3
300 call device_map(s%zg, s%zg_d, ix)
301 call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync=.false.)
302 end if
303
305
306 call device_sync()
307
308 end subroutine space_init
309
311 subroutine space_free(s)
312 class(space_t), intent(inout) :: s
313
314 if (allocated(s%zg)) then
315 deallocate(s%zg)
316 end if
317
318 if (allocated(s%wx)) then
319 deallocate(s%wx)
320 end if
321
322 if (allocated(s%wy)) then
323 deallocate(s%wy)
324 end if
325
326 if (allocated(s%wz)) then
327 deallocate(s%wz)
328 end if
329
330 if (allocated(s%w3)) then
331 deallocate(s%w3)
332 end if
333
334 if (allocated(s%dx)) then
335 deallocate(s%dx)
336 end if
337
338 if (allocated(s%dy)) then
339 deallocate(s%dy)
340 end if
341
342 if (allocated(s%dz)) then
343 deallocate(s%dz)
344 end if
345
346 if (allocated(s%dxt)) then
347 deallocate(s%dxt)
348 end if
349
350 if (allocated(s%dyt)) then
351 deallocate(s%dyt)
352 end if
353
354 if (allocated(s%dzt)) then
355 deallocate(s%dzt)
356 end if
357
358 if (allocated(s%dr_inv)) then
359 deallocate(s%dr_inv)
360 end if
361
362 if (allocated(s%ds_inv)) then
363 deallocate(s%ds_inv)
364 end if
365
366 if (allocated(s%dt_inv)) then
367 deallocate(s%dt_inv)
368 end if
369
370 if(allocated(s%v)) then
371 deallocate(s%v)
372 end if
373
374 if(allocated(s%vt)) then
375 deallocate(s%vt)
376 end if
377
378 if(allocated(s%vinv)) then
379 deallocate(s%vinv)
380 end if
381
382 if(allocated(s%vinvt)) then
383 deallocate(s%vinvt)
384 end if
385
386 if(allocated(s%w)) then
387 deallocate(s%w)
388 end if
389
390 !
391 ! Cleanup the device (if present)
392 !
393
394 if (c_associated(s%dr_inv_d)) then
395 call device_free(s%dr_inv_d)
396 end if
397
398 if (c_associated(s%ds_inv_d)) then
399 call device_free(s%ds_inv_d)
400 end if
401
402 if (c_associated(s%dt_inv_d)) then
403 call device_free(s%dt_inv_d)
404 end if
405
406 if (c_associated(s%dxt_d)) then
407 call device_free(s%dxt_d)
408 end if
409
410 if (c_associated(s%dyt_d)) then
411 call device_free(s%dyt_d)
412 end if
413
414 if (c_associated(s%dzt_d)) then
415 call device_free(s%dzt_d)
416 end if
417
418 if (c_associated(s%dx_d)) then
419 call device_free(s%dx_d)
420 end if
421
422 if (c_associated(s%dy_d)) then
423 call device_free(s%dy_d)
424 end if
425
426 if (c_associated(s%dz_d)) then
427 call device_free(s%dz_d)
428 end if
429
430 if (c_associated(s%wx_d)) then
431 call device_free(s%wx_d)
432 end if
433
434 if (c_associated(s%wy_d)) then
435 call device_free(s%wy_d)
436 end if
437
438 if (c_associated(s%wz_d)) then
439 call device_free(s%wz_d)
440 end if
441
442 if (c_associated(s%w3_d)) then
443 call device_free(s%w3_d)
444 end if
445
446 if (c_associated(s%zg_d)) then
447 call device_free(s%zg_d)
448 end if
449
450 if (c_associated(s%v_d)) then
451 call device_free(s%v_d)
452 end if
453
454 if (c_associated(s%vt_d)) then
455 call device_free(s%vt_d)
456 end if
457
458 if (c_associated(s%vinv_d)) then
459 call device_free(s%vinv_d)
460 end if
461
462 if (c_associated(s%vinvt_d)) then
463 call device_free(s%vinvt_d)
464 end if
465
466 if (c_associated(s%w_d)) then
467 call device_free(s%w_d)
468 end if
469
470 end subroutine space_free
471
474 pure function space_eq(Xh, Yh) result(res)
475 type(space_t), intent(in) :: xh
476 type(space_t), intent(in) :: yh
477 logical :: res
478
479 if ( (xh%lx .eq. yh%lx) .and. &
480 (xh%ly .eq. yh%ly) .and. &
481 (xh%lz .eq. yh%lz) ) then
482 res = .true.
483 else
484 res = .false.
485 end if
486
487 end function space_eq
488
491 pure function space_ne(Xh, Yh) result(res)
492 type(space_t), intent(in) :: xh
493 type(space_t), intent(in) :: yh
494 logical :: res
495
496 if ( (xh%lx .eq. yh%lx) .and. &
497 (xh%ly .eq. yh%ly) .and. &
498 (xh%lz .eq. yh%lz) ) then
499 res = .false.
500 else
501 res = .true.
502 end if
503
504 end function space_ne
505
506 subroutine space_compute_dist(dx, x, lx)
507 integer, intent(in) :: lx
508 real(kind=rp), intent(inout) :: dx(lx), x(lx)
509 integer :: i
510 dx(1) = x(2) - x(1)
511 do i = 2, lx - 1
512 dx(i) = 0.5*(x(i+1) - x(i-1))
513 enddo
514 dx(lx) = x(lx) - x(lx-1)
515 do i = 1, lx
516 dx(i) = 1.0_rp / dx(i)
517 end do
518 end subroutine space_compute_dist
519
520
524 type(space_t), intent(inout) :: Xh
525
526 real(kind=rp) :: l(0:xh%lx-1)
527 real(kind=rp) :: delta(xh%lx)
528 integer :: i, kj, j, j2, kk
529
530 associate(v=> xh%v, vt => xh%vt, &
531 vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)
532 ! Get the Legendre polynomials for each point
533 ! Then proceed to compose the transform matrix
534 kj = 0
535 do j = 1, xh%lx
536 l(0) = 1.
537 l(1) = xh%zg(j,1)
538 do j2 = 2, xh%lx-1
539 l(j2) = ( (2*j2-1) * xh%zg(j,1) * l(j2-1) &
540 - (j2-1) * l(j2-2) ) / j2
541 end do
542 do kk = 1, xh%lx
543 kj = kj+1
544 v(kj,1) = l(kk-1)
545 end do
546 end do
547
548 ! transpose the matrix
549 call trsp1(v, xh%lx)
550
551 ! Calculate the nominal scaling factors
552 do i = 1, xh%lx
553 delta(i) = 2.0_rp / (2*(i-1)+1)
554 end do
555 ! modify last entry
556 delta(xh%lx) = 2.0_rp / (xh%lx-1)
557
558 ! calculate the inverse to multiply the matrix
559 do i = 1, xh%lx
560 delta(i) = sqrt(1.0_rp / delta(i))
561 end do
562 ! scale the matrix
563 do i = 1, xh%lx
564 do j = 1, xh%lx
565 v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
566 end do
567 end do
568
569 ! get the trasposed
570 call copy(vt, v, xh%lx * xh%lx)
571 call trsp1(vt, xh%lx)
572
573 !populate the mass matrix
574 kk = 1
575 do i = 1, xh%lx
576 do j = 1, xh%lx
577 if (i .eq. j) then
578 w(i,j) = xh%wx(kk)
579 kk = kk+1
580 else
581 w(i,j) = 0
582 end if
583 end do
584 end do
585
586 !Get the inverse of the transform matrix
587 call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)
588
589 !get the transposed of the inverse
590 call copy(vinvt, vinv, xh%lx * xh%lx)
591 call trsp1(vinvt, xh%lx)
592 end associate
593
594 ! Copy the data to the GPU
595 ! Move all this to space.f90 to for next version
596 if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
597 (neko_bcknd_opencl .eq. 1)) then
598
599 call device_memcpy(xh%v, xh%v_d, xh%lxy, &
600 host_to_device, sync=.false.)
601 call device_memcpy(xh%vt, xh%vt_d, xh%lxy, &
602 host_to_device, sync=.false.)
603 call device_memcpy(xh%vinv, xh%vinv_d, xh%lxy, &
604 host_to_device, sync=.false.)
605 call device_memcpy(xh%vinvt, xh%vinvt_d, xh%lxy, &
606 host_to_device, sync=.false.)
607 call device_memcpy(xh%w, xh%w_d, xh%lxy, &
608 host_to_device, sync=.false.)
609
610 end if
611
613
614end module space
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Synchronize a device or stream.
Definition device.F90:102
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
Fast diagonalization methods from NEKTON.
Definition fast3d.f90:61
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition fast3d.f90:243
Definition math.f90:60
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
integer, parameter neko_bcknd_cuda
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
pure logical function space_ne(xh, yh)
Check if .
Definition space.f90:492
pure logical function space_eq(xh, yh)
Check if .
Definition space.f90:475
integer, parameter, public gll
Definition space.f90:48
subroutine space_compute_dist(dx, x, lx)
Definition space.f90:507
integer, parameter, public gj
Definition space.f90:48
subroutine space_free(s)
Deallocate a space s.
Definition space.f90:312
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition space.f90:148
integer, parameter, public gl
Definition space.f90:48
subroutine space_generate_transformation_matrices(xh)
Generate spectral tranform matrices.
Definition space.f90:524
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition speclib.f90:148
subroutine dgll(d, dt, z, nz, nzd)
Compute the derivative matrix D and its transpose DT associated with the Nth order Lagrangian interpo...
Definition speclib.f90:880
subroutine zwgll(z, w, np)
Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) associated with Jacobi polynomial P(N)(...
Definition speclib.f90:179
subroutine zwgl(z, w, np)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition speclib.f90:164
Tensor operations.
Definition tensor.f90:61
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137
Utilities.
Definition utils.f90:35
The function space for the SEM solution fields.
Definition space.f90:62