Neko  0.9.99
A portable framework for high-order spectral element flow simulations
space.f90
Go to the documentation of this file.
1 ! Copyright (c) 2019-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module space
35  use neko_config
36  use num_types, only : rp
37  use speclib
38  use device
39  use utils, only : neko_error
40  use fast3d, only : setup_intp
41  use math
42  use tensor, only : trsp1
43  use mxm_wrapper, only: mxm
44  use, intrinsic :: iso_c_binding
45  implicit none
46  private
47 
48  integer, public, parameter :: gl = 0, gll = 1, gj = 2
49 
62  type, public :: space_t
63  integer :: t
64  integer :: lx
65  integer :: ly
66  integer :: lz
67  integer :: lxy
68  integer :: lyz
69  integer :: lxz
70  integer :: lxyz
71 
72  real(kind=rp), allocatable :: zg(:,:)
73 
74  real(kind=rp), allocatable :: dr_inv(:)
75  real(kind=rp), allocatable :: ds_inv(:)
76  real(kind=rp), allocatable :: dt_inv(:)
77 
78  real(kind=rp), allocatable :: wx(:)
79  real(kind=rp), allocatable :: wy(:)
80  real(kind=rp), allocatable :: wz(:)
81 
82  real(kind=rp), allocatable :: w3(:,:,:)
83 
85  real(kind=rp), allocatable :: dx(:,:)
87  real(kind=rp), allocatable :: dy(:,:)
89  real(kind=rp), allocatable :: dz(:,:)
90 
92  real(kind=rp), allocatable :: dxt(:,:)
94  real(kind=rp), allocatable :: dyt(:,:)
96  real(kind=rp), allocatable :: dzt(:,:)
97 
99  real(kind=rp), allocatable :: v(:,:)
100  real(kind=rp), allocatable :: vt(:,:)
101  real(kind=rp), allocatable :: vinv(:,:)
102  real(kind=rp), allocatable :: vinvt(:,:)
104  real(kind=rp), allocatable :: w(:,:)
105 
106  !
107  ! Device pointers (if present)
108  !
109  type(c_ptr) :: dr_inv_d = c_null_ptr
110  type(c_ptr) :: ds_inv_d = c_null_ptr
111  type(c_ptr) :: dt_inv_d = c_null_ptr
112  type(c_ptr) :: dxt_d = c_null_ptr
113  type(c_ptr) :: dyt_d = c_null_ptr
114  type(c_ptr) :: dzt_d = c_null_ptr
115  type(c_ptr) :: dx_d = c_null_ptr
116  type(c_ptr) :: dy_d = c_null_ptr
117  type(c_ptr) :: dz_d = c_null_ptr
118  type(c_ptr) :: wx_d = c_null_ptr
119  type(c_ptr) :: wy_d = c_null_ptr
120  type(c_ptr) :: wz_d = c_null_ptr
121  type(c_ptr) :: zg_d = c_null_ptr
122  type(c_ptr) :: w3_d = c_null_ptr
123  type(c_ptr) :: v_d = c_null_ptr
124  type(c_ptr) :: vt_d = c_null_ptr
125  type(c_ptr) :: vinv_d = c_null_ptr
126  type(c_ptr) :: vinvt_d = c_null_ptr
127  type(c_ptr) :: w_d = c_null_ptr
128  contains
129  procedure, pass(s) :: init => space_init
130  procedure, pass(s) :: free => space_free
131 
132  end type space_t
133 
134  interface operator(.eq.)
135  module procedure space_eq
136  end interface operator(.eq.)
137 
138  interface operator(.ne.)
139  module procedure space_ne
140  end interface operator(.ne.)
141 
142  public :: operator(.eq.), operator(.ne.)
143 
144 contains
145 
147  subroutine space_init(s, t, lx, ly, lz)
148  class(space_t), intent(inout) :: s
149  integer, intent(in) :: t
150  integer, intent(in) :: lx
151  integer, intent(in) :: ly
152  integer, optional, intent(in) :: lz
153  integer :: ix, iy, iz
154 
155  call space_free(s)
156 
157  s%lx = lx
158  s%ly = ly
159  s%t = t
160  if (present(lz)) then
161  if (lz .ne. 1) then
162  s%lz = lz
163  if (lx .ne. ly .or. lx .ne. lz) then
164  call neko_error("Unsupported polynomial dimension")
165  end if
166  end if
167  else
168  if (lx .ne. ly) then
169  call neko_error("Unsupported polynomial dimension")
170  end if
171  s%lz = 1
172  end if
173  s%lxy = s%ly*s%lx
174  s%lyz = s%ly*s%lz
175  s%lxz = s%lx*s%lz
176  s%lxyz = s%lx*s%ly*s%lz
177 
178  allocate(s%zg(lx, 3))
179 
180  allocate(s%wx(s%lx))
181  allocate(s%wy(s%ly))
182  allocate(s%wz(s%lz))
183 
184  allocate(s%dr_inv(s%lx))
185  allocate(s%ds_inv(s%ly))
186  allocate(s%dt_inv(s%lz))
187 
188  allocate(s%w3(s%lx, s%ly, s%lz))
189 
190  allocate(s%dx(s%lx, s%lx))
191  allocate(s%dy(s%ly, s%ly))
192  allocate(s%dz(s%lz, s%lz))
193 
194  allocate(s%dxt(s%lx, s%lx))
195  allocate(s%dyt(s%ly, s%ly))
196  allocate(s%dzt(s%lz, s%lz))
197 
198  allocate(s%v(s%lx, s%lx))
199  allocate(s%vt(s%lx, s%lx))
200  allocate(s%vinv(s%lx, s%lx))
201  allocate(s%vinvt(s%lx, s%lx))
202  allocate(s%w(s%lx, s%lx))
203 
204  ! Call low-level routines to compute nodes and quadrature weights
205  if (t .eq. gll) then
206  call zwgll(s%zg(1,1), s%wx, s%lx)
207  call zwgll(s%zg(1,2), s%wy, s%ly)
208  if (s%lz .gt. 1) then
209  call zwgll(s%zg(1,3), s%wz, s%lz)
210  else
211  s%zg(:,3) = 0d0
212  s%wz = 1d0
213  end if
214  else if (t .eq. gl) then
215  call zwgl(s%zg(1,1), s%wx, s%lx)
216  call zwgl(s%zg(1,2), s%wy, s%ly)
217  if (s%lz .gt. 1) then
218  call zwgl(s%zg(1,3), s%wz, s%lz)
219  else
220  s%zg(:,3) = 0d0
221  s%wz = 1d0
222  end if
223  else
224  call neko_error("Invalid quadrature rule")
225  end if
226 
227  do iz = 1, s%lz
228  do iy = 1, s%ly
229  do ix = 1, s%lx
230  s%w3(ix, iy, iz) = s%wx(ix) * s%wy(iy) * s%wz(iz)
231  end do
232  end do
233  end do
235  if (t .eq. gll) then
236  call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
237  call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
238  if (s%lz .gt. 1) then
239  call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
240  else
241  s%dz = 0d0
242  s%dzt = 0d0
243  end if
244  else if (t .eq. gl) then
245  call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
246  call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
247  if (s%lz .gt. 1) then
248  call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
249  else
250  s%dz = 0d0
251  s%dzt = 0d0
252  end if
253  else
254  call neko_error("Invalid quadrature rule")
255  end if
256 
257  call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
258  call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
259  if (s%lz .gt. 1) then
260  call space_compute_dist(s%dt_inv, s%zg(1,3), s%lz)
261  else
262  s%dt_inv = 0d0
263  end if
264 
265  if (neko_bcknd_device .eq. 1) then
266  call device_map(s%dr_inv, s%dr_inv_d, s%lx)
267  call device_map(s%ds_inv, s%ds_inv_d, s%lx)
268  call device_map(s%dt_inv, s%dt_inv_d, s%lx)
269  call device_map(s%wx, s%wx_d, s%lx)
270  call device_map(s%wy, s%wy_d, s%lx)
271  call device_map(s%wz, s%wz_d, s%lx)
272  call device_map(s%dx, s%dx_d, s%lxy)
273  call device_map(s%dy, s%dy_d, s%lxy)
274  call device_map(s%dz, s%dz_d, s%lxy)
275  call device_map(s%dxt, s%dxt_d, s%lxy)
276  call device_map(s%dyt, s%dyt_d, s%lxy)
277  call device_map(s%dzt, s%dzt_d, s%lxy)
278  call device_map(s%w3, s%w3_d, s%lxyz)
279  call device_map(s%v, s%v_d, s%lxy)
280  call device_map(s%vt, s%vt_d, s%lxy)
281  call device_map(s%vinv, s%vinv_d, s%lxy)
282  call device_map(s%vinvt, s%vinvt_d, s%lxy)
283  call device_map(s%w, s%w_d, s%lxy)
284 
285  call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, host_to_device, sync=.false.)
286  call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, host_to_device, sync=.false.)
287  call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, host_to_device, sync=.false.)
288  call device_memcpy(s%wx, s%wx_d, s%lx, host_to_device, sync=.false.)
289  call device_memcpy(s%wy, s%wy_d, s%lx, host_to_device, sync=.false.)
290  call device_memcpy(s%wz, s%wz_d, s%lx, host_to_device, sync=.false.)
291  call device_memcpy(s%dx, s%dx_d, s%lxy, host_to_device, sync=.false.)
292  call device_memcpy(s%dy, s%dy_d, s%lxy, host_to_device, sync=.false.)
293  call device_memcpy(s%dz, s%dz_d, s%lxy, host_to_device, sync=.false.)
294  call device_memcpy(s%dxt, s%dxt_d, s%lxy, host_to_device, sync=.false.)
295  call device_memcpy(s%dyt, s%dyt_d, s%lxy, host_to_device, sync=.false.)
296  call device_memcpy(s%dzt, s%dzt_d, s%lxy, host_to_device, sync=.false.)
297  call device_memcpy(s%w3, s%w3_d, s%lxyz, host_to_device, sync=.false.)
298 
299  ix = s%lx * 3
300  call device_map(s%zg, s%zg_d, ix)
301  call device_memcpy(s%zg, s%zg_d, ix, host_to_device, sync=.false.)
302  end if
303 
305 
306  end subroutine space_init
307 
309  subroutine space_free(s)
310  class(space_t), intent(inout) :: s
311 
312  if (allocated(s%zg)) then
313  deallocate(s%zg)
314  end if
315 
316  if (allocated(s%wx)) then
317  deallocate(s%wx)
318  end if
319 
320  if (allocated(s%wy)) then
321  deallocate(s%wy)
322  end if
323 
324  if (allocated(s%wz)) then
325  deallocate(s%wz)
326  end if
327 
328  if (allocated(s%w3)) then
329  deallocate(s%w3)
330  end if
331 
332  if (allocated(s%dx)) then
333  deallocate(s%dx)
334  end if
335 
336  if (allocated(s%dy)) then
337  deallocate(s%dy)
338  end if
339 
340  if (allocated(s%dz)) then
341  deallocate(s%dz)
342  end if
343 
344  if (allocated(s%dxt)) then
345  deallocate(s%dxt)
346  end if
347 
348  if (allocated(s%dyt)) then
349  deallocate(s%dyt)
350  end if
351 
352  if (allocated(s%dzt)) then
353  deallocate(s%dzt)
354  end if
355 
356  if (allocated(s%dr_inv)) then
357  deallocate(s%dr_inv)
358  end if
359 
360  if (allocated(s%ds_inv)) then
361  deallocate(s%ds_inv)
362  end if
363 
364  if (allocated(s%dt_inv)) then
365  deallocate(s%dt_inv)
366  end if
367 
368  if(allocated(s%v)) then
369  deallocate(s%v)
370  end if
371 
372  if(allocated(s%vt)) then
373  deallocate(s%vt)
374  end if
375 
376  if(allocated(s%vinv)) then
377  deallocate(s%vinv)
378  end if
379 
380  if(allocated(s%vinvt)) then
381  deallocate(s%vinvt)
382  end if
383 
384  if(allocated(s%w)) then
385  deallocate(s%w)
386  end if
387 
388  !
389  ! Cleanup the device (if present)
390  !
391 
392  if (c_associated(s%dr_inv_d)) then
393  call device_free(s%dr_inv_d)
394  end if
395 
396  if (c_associated(s%ds_inv_d)) then
397  call device_free(s%ds_inv_d)
398  end if
399 
400  if (c_associated(s%dt_inv_d)) then
401  call device_free(s%dt_inv_d)
402  end if
403 
404  if (c_associated(s%dxt_d)) then
405  call device_free(s%dxt_d)
406  end if
407 
408  if (c_associated(s%dyt_d)) then
409  call device_free(s%dyt_d)
410  end if
411 
412  if (c_associated(s%dzt_d)) then
413  call device_free(s%dzt_d)
414  end if
415 
416  if (c_associated(s%dx_d)) then
417  call device_free(s%dx_d)
418  end if
419 
420  if (c_associated(s%dy_d)) then
421  call device_free(s%dy_d)
422  end if
423 
424  if (c_associated(s%dz_d)) then
425  call device_free(s%dz_d)
426  end if
427 
428  if (c_associated(s%wx_d)) then
429  call device_free(s%wx_d)
430  end if
431 
432  if (c_associated(s%wy_d)) then
433  call device_free(s%wy_d)
434  end if
435 
436  if (c_associated(s%wz_d)) then
437  call device_free(s%wz_d)
438  end if
439 
440  if (c_associated(s%w3_d)) then
441  call device_free(s%w3_d)
442  end if
443 
444  if (c_associated(s%zg_d)) then
445  call device_free(s%zg_d)
446  end if
447 
448  if (c_associated(s%v_d)) then
449  call device_free(s%v_d)
450  end if
451 
452  if (c_associated(s%vt_d)) then
453  call device_free(s%vt_d)
454  end if
455 
456  if (c_associated(s%vinv_d)) then
457  call device_free(s%vinv_d)
458  end if
459 
460  if (c_associated(s%vinvt_d)) then
461  call device_free(s%vinvt_d)
462  end if
463 
464  if (c_associated(s%w_d)) then
465  call device_free(s%w_d)
466  end if
467 
468  end subroutine space_free
469 
472  pure function space_eq(Xh, Yh) result(res)
473  type(space_t), intent(in) :: xh
474  type(space_t), intent(in) :: yh
475  logical :: res
476 
477  if ( (xh%lx .eq. yh%lx) .and. &
478  (xh%ly .eq. yh%ly) .and. &
479  (xh%lz .eq. yh%lz) ) then
480  res = .true.
481  else
482  res = .false.
483  end if
484 
485  end function space_eq
486 
489  pure function space_ne(Xh, Yh) result(res)
490  type(space_t), intent(in) :: xh
491  type(space_t), intent(in) :: yh
492  logical :: res
493 
494  if ( (xh%lx .eq. yh%lx) .and. &
495  (xh%ly .eq. yh%ly) .and. &
496  (xh%lz .eq. yh%lz) ) then
497  res = .false.
498  else
499  res = .true.
500  end if
501 
502  end function space_ne
503 
504  subroutine space_compute_dist(dx, x, lx)
505  integer, intent(in) :: lx
506  real(kind=rp), intent(inout) :: dx(lx), x(lx)
507  integer :: i
508  dx(1) = x(2) - x(1)
509  do i = 2, lx - 1
510  dx(i) = 0.5*(x(i+1) - x(i-1))
511  enddo
512  dx(lx) = x(lx) - x(lx-1)
513  do i = 1, lx
514  dx(i) = 1.0_rp / dx(i)
515  end do
516  end subroutine space_compute_dist
517 
518 
522  type(space_t), intent(inout) :: Xh
523 
524  real(kind=rp) :: l(0:xh%lx-1)
525  real(kind=rp) :: delta(xh%lx)
526  integer :: i, kj, j, j2, kk
527 
528  associate(v=> xh%v, vt => xh%vt, &
529  vinv => xh%vinv, vinvt => xh%vinvt, w => xh%w)
530  ! Get the Legendre polynomials for each point
531  ! Then proceed to compose the transform matrix
532  kj = 0
533  do j = 1, xh%lx
534  l(0) = 1.
535  l(1) = xh%zg(j,1)
536  do j2 = 2, xh%lx-1
537  l(j2) = ( (2*j2-1) * xh%zg(j,1) * l(j2-1) &
538  - (j2-1) * l(j2-2) ) / j2
539  end do
540  do kk = 1, xh%lx
541  kj = kj+1
542  v(kj,1) = l(kk-1)
543  end do
544  end do
545 
546  ! transpose the matrix
547  call trsp1(v, xh%lx)
548 
549  ! Calculate the nominal scaling factors
550  do i = 1, xh%lx
551  delta(i) = 2.0_rp / (2*(i-1)+1)
552  end do
553  ! modify last entry
554  delta(xh%lx) = 2.0_rp / (xh%lx-1)
555 
556  ! calculate the inverse to multiply the matrix
557  do i = 1, xh%lx
558  delta(i) = sqrt(1.0_rp / delta(i))
559  end do
560  ! scale the matrix
561  do i = 1, xh%lx
562  do j = 1, xh%lx
563  v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
564  end do
565  end do
566 
567  ! get the trasposed
568  call copy(vt, v, xh%lx * xh%lx)
569  call trsp1(vt, xh%lx)
570 
571  !populate the mass matrix
572  kk = 1
573  do i = 1, xh%lx
574  do j = 1, xh%lx
575  if (i .eq. j) then
576  w(i,j) = xh%wx(kk)
577  kk = kk+1
578  else
579  w(i,j) = 0
580  end if
581  end do
582  end do
583 
584  !Get the inverse of the transform matrix
585  call mxm(vt, xh%lx, w, xh%lx, vinv, xh%lx)
586 
587  !get the transposed of the inverse
588  call copy(vinvt, vinv, xh%lx * xh%lx)
589  call trsp1(vinvt, xh%lx)
590  end associate
591 
592  ! Copy the data to the GPU
593  ! Move all this to space.f90 to for next version
594  if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
595  (neko_bcknd_opencl .eq. 1)) then
596 
597  call device_memcpy(xh%v, xh%v_d, xh%lxy, &
598  host_to_device, sync=.false.)
599  call device_memcpy(xh%vt, xh%vt_d, xh%lxy, &
600  host_to_device, sync=.false.)
601  call device_memcpy(xh%vinv, xh%vinv_d, xh%lxy, &
602  host_to_device, sync=.false.)
603  call device_memcpy(xh%vinvt, xh%vinvt_d, xh%lxy, &
604  host_to_device, sync=.false.)
605  call device_memcpy(xh%w, xh%w_d, xh%lxy, &
606  host_to_device, sync=.false.)
607 
608  end if
609 
611 
612 end module space
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
Fast diagonalization methods from NEKTON.
Definition: fast3d.f90:61
subroutine, public setup_intp(jh, jht, z_to, z_from, n_to, n_from, derivative)
Compute interpolation weights for points z_to using values at points z_from.
Definition: fast3d.f90:243
Definition: math.f90:60
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_hip
Definition: neko_config.f90:42
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter neko_bcknd_opencl
Definition: neko_config.f90:43
integer, parameter neko_bcknd_cuda
Definition: neko_config.f90:41
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
pure logical function space_ne(Xh, Yh)
Check if .
Definition: space.f90:490
subroutine space_generate_transformation_matrices(Xh)
Generate spectral tranform matrices.
Definition: space.f90:522
pure logical function space_eq(Xh, Yh)
Check if .
Definition: space.f90:473
integer, parameter, public gll
Definition: space.f90:48
subroutine space_compute_dist(dx, x, lx)
Definition: space.f90:505
integer, parameter, public gj
Definition: space.f90:48
subroutine space_free(s)
Deallocate a space s.
Definition: space.f90:310
subroutine space_init(s, t, lx, ly, lz)
Initialize a function space s with given polynomial dimensions.
Definition: space.f90:148
integer, parameter, public gl
Definition: space.f90:48
LIBRARY ROUTINES FOR SPECTRAL METHODS.
Definition: speclib.f90:148
subroutine zwgll(Z, W, NP)
Definition: speclib.f90:169
subroutine dgll(D, DT, Z, NZ, NZD)
Definition: speclib.f90:865
subroutine zwgl(Z, W, NP)
Generate NP Gauss Legendre points Z and weights W associated with Jacobi polynomial ....
Definition: speclib.f90:161
Tensor operations.
Definition: tensor.f90:61
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition: tensor.f90:137
Utilities.
Definition: utils.f90:35
The function space for the SEM solution fields.
Definition: space.f90:62