Neko  0.9.0
A portable framework for high-order spectral element flow simulations
ax_helm_full_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use ax_helm_full, only : ax_helm_full_t
35  use num_types, only : rp
36  use coefs, only : coef_t
37  use space, only : space_t
38  use mesh, only : mesh_t
39  use device_math, only : device_addcol4
40  use device, only : device_get_ptr
41  use num_types, only : rp
42  use utils, only : neko_error
43  use, intrinsic :: iso_c_binding, only : c_ptr, c_int
44  implicit none
45  private
46 
47  type, public, extends(ax_helm_full_t) :: ax_helm_full_device_t
48  contains
49  procedure, pass(this) :: compute_vector => ax_helm_full_device_compute_vector
50  end type ax_helm_full_device_t
51 
52 #ifdef HAVE_HIP
53  interface
54  subroutine hip_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
55  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
56  h1_d, drdx_d, drdy_d, drdz_d, &
57  dsdx_d, dsdy_d, dsdz_d, &
58  dtdx_d, dtdy_d, dtdz_d, jacinv_d, weight3_d, nelv, lx) &
59  bind(c, name='hip_ax_helm_stress_vector')
60  use, intrinsic :: iso_c_binding
61  type(c_ptr), value :: au_d, av_d, aw_d
62  type(c_ptr), value :: u_d, v_d, w_d
63  type(c_ptr), value :: dx_d, dy_d, dz_d
64  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
65  type(c_ptr), value :: h1_d
66  type(c_ptr), value :: drdx_d, drdy_d, drdz_d
67  type(c_ptr), value :: dsdx_d, dsdy_d, dsdz_d
68  type(c_ptr), value :: dtdx_d, dtdy_d, dtdz_d
69  type(c_ptr), value :: jacinv_d, weight3_d
70  integer(c_int) :: nel, lx
71  end subroutine hip_ax_helm_stress_vector
72  end interface
73 
74  interface
75  subroutine hip_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
76  h2_d, B_d, n) bind(c, name='hip_ax_helm_stress_vector_part2')
77  use, intrinsic :: iso_c_binding
78  type(c_ptr), value :: au_d, av_d, aw_d
79  type(c_ptr), value :: u_d, v_d, w_d
80  type(c_ptr), value :: h2_d, B_d
81  integer(c_int) :: n
82  end subroutine hip_ax_helm_stress_vector_part2
83  end interface
84 
85 #elif HAVE_CUDA
86  interface
87  subroutine cuda_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
88  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
89  h1_d, drdx_d, drdy_d, drdz_d, &
90  dsdx_d, dsdy_d, dsdz_d, &
91  dtdx_d, dtdy_d, dtdz_d, jacinv_d, weight3_d, nelv, lx) &
92  bind(c, name='cuda_ax_helm_stress_vector')
93  use, intrinsic :: iso_c_binding
94  type(c_ptr), value :: au_d, av_d, aw_d
95  type(c_ptr), value :: u_d, v_d, w_d
96  type(c_ptr), value :: dx_d, dy_d, dz_d
97  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
98  type(c_ptr), value :: h1_d
99  type(c_ptr), value :: drdx_d, drdy_d, drdz_d
100  type(c_ptr), value :: dsdx_d, dsdy_d, dsdz_d
101  type(c_ptr), value :: dtdx_d, dtdy_d, dtdz_d
102  type(c_ptr), value :: jacinv_d, weight3_d
103  integer(c_int) :: nel, lx
104  end subroutine cuda_ax_helm_stress_vector
105  end interface
106 
107  interface
108  subroutine cuda_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
109  h2_d, B_d, n) bind(c, name='cuda_ax_helm_stress_vector_part2')
110  use, intrinsic :: iso_c_binding
111  type(c_ptr), value :: au_d, av_d, aw_d
112  type(c_ptr), value :: u_d, v_d, w_d
113  type(c_ptr), value :: h2_d, B_d
114  integer(c_int) :: n
115  end subroutine cuda_ax_helm_stress_vector_part2
116  end interface
117 #endif
118 
119 contains
120 
121  subroutine ax_helm_full_device_compute_vector(this, au, av, aw, &
122  u, v, w, coef, msh, Xh)
123  class(ax_helm_full_device_t), intent(in) :: this
124  type(space_t), intent(inout) :: Xh
125  type(mesh_t), intent(inout) :: msh
126  type(coef_t), intent(inout) :: coef
127  real(kind=rp), intent(inout) :: au(xh%lx, xh%ly, xh%lz, msh%nelv)
128  real(kind=rp), intent(inout) :: av(xh%lx, xh%ly, xh%lz, msh%nelv)
129  real(kind=rp), intent(inout) :: aw(xh%lx, xh%ly, xh%lz, msh%nelv)
130  real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
131  real(kind=rp), intent(inout) :: v(xh%lx, xh%ly, xh%lz, msh%nelv)
132  real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
133  type(c_ptr) :: u_d, v_d, w_d
134  type(c_ptr) :: au_d, av_d, aw_d
135 
136  u_d = device_get_ptr(u)
137  v_d = device_get_ptr(v)
138  w_d = device_get_ptr(w)
139 
140  au_d = device_get_ptr(au)
141  av_d = device_get_ptr(av)
142  aw_d = device_get_ptr(aw)
143 
144 #ifdef HAVE_HIP
145  call hip_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
146  xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
147  coef%drdx_d, coef%drdy_d, coef%drdz_d, &
148  coef%dsdx_d, coef%dsdy_d, coef%dsdz_d, &
149  coef%dtdx_d, coef%dtdy_d, coef%dtdz_d, &
150  coef%jacinv_d, xh%w3_d, msh%nelv, xh%lx)
151 #elif HAVE_CUDA
152  call cuda_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
153  xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
154  coef%drdx_d, coef%drdy_d, coef%drdz_d, &
155  coef%dsdx_d, coef%dsdy_d, coef%dsdz_d, &
156  coef%dtdx_d, coef%dtdy_d, coef%dtdz_d, &
157  coef%jacinv_d, xh%w3_d, msh%nelv, xh%lx)
158 #elif HAVE_OPENCL
159  call neko_error('OPENCL is not implemented for full stress formulation')
160 #endif
161 
162  if (coef%ifh2) then
163 #ifdef HAVE_HIP
164  call hip_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
165  coef%h2_d, coef%B_d, coef%dof%size())
166 #elif HAVE_CUDA
167  call cuda_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
168  coef%h2_d, coef%B_d, coef%dof%size())
169 #else
170  call device_addcol4(au_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
171  call device_addcol4(av_d ,coef%h2_d, coef%B_d, v_d, coef%dof%size())
172  call device_addcol4(aw_d ,coef%h2_d, coef%B_d, w_d, coef%dof%size())
173 #endif
174  end if
175 
177 
178 end module ax_helm_full_device
void cuda_ax_helm_stress_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *jacinv, void *w3, int *nelv, int *lx)
Definition: ax_helm_full.cu:51
void cuda_ax_helm_stress_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
subroutine ax_helm_full_device_compute_vector(this, au, av, aw, u, v, w, coef, msh, Xh)
Coefficients.
Definition: coef.f90:34
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Defines a mesh.
Definition: mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
Utilities.
Definition: utils.f90:35
Matrix-vector product for a Helmholtz problem.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
The function space for the SEM solution fields.
Definition: space.f90:62