Neko  0.9.0
A portable framework for high-order spectral element flow simulations
ax_helm_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use ax_helm, only : ax_helm_t
35  use num_types, only : rp
36  use coefs, only : coef_t
37  use space, only : space_t
38  use mesh, only : mesh_t
39  use device_math, only : device_addcol4
40  use device, only : device_get_ptr
41  use num_types, only : rp
42  use, intrinsic :: iso_c_binding, only : c_ptr, c_int
43  implicit none
44  private
45 
46  type, public, extends(ax_helm_t) :: ax_helm_device_t
47  contains
48  procedure, nopass :: compute => ax_helm_device_compute
49  procedure, pass(this) :: compute_vector => ax_helm_device_compute_vector
50  end type ax_helm_device_t
51 
52 #ifdef HAVE_HIP
53  interface
54  subroutine hip_ax_helm(w_d, u_d, &
55  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, &
56  h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
57  bind(c, name='hip_ax_helm')
58  use, intrinsic :: iso_c_binding
59  type(c_ptr), value :: w_d, u_d
60  type(c_ptr), value :: dx_d, dy_d, dz_d
61  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
62  type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
63  integer(c_int) :: nel, lx
64  end subroutine hip_ax_helm
65  end interface
66 
67  interface
68  subroutine hip_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
69  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
70  h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
71  bind(c, name='hip_ax_helm_vector')
72  use, intrinsic :: iso_c_binding
73  type(c_ptr), value :: au_d, av_d, aw_d
74  type(c_ptr), value :: u_d, v_d, w_d
75  type(c_ptr), value :: dx_d, dy_d, dz_d
76  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
77  type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
78  integer(c_int) :: nel, lx
79  end subroutine hip_ax_helm_vector
80  end interface
81 
82  interface
83  subroutine hip_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
84  h2_d, B_d, n) bind(c, name='hip_ax_helm_vector_part2')
85  use, intrinsic :: iso_c_binding
86  type(c_ptr), value :: au_d, av_d, aw_d
87  type(c_ptr), value :: u_d, v_d, w_d
88  type(c_ptr), value :: h2_d, B_d
89  integer(c_int) :: n
90  end subroutine hip_ax_helm_vector_part2
91  end interface
92 #elif HAVE_CUDA
93  interface
94  subroutine cuda_ax_helm(w_d, u_d, &
95  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
96  h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
97  bind(c, name='cuda_ax_helm')
98  use, intrinsic :: iso_c_binding
99  type(c_ptr), value :: w_d, u_d
100  type(c_ptr), value :: dx_d, dy_d, dz_d
101  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
102  type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
103  integer(c_int) :: nel, lx
104  end subroutine cuda_ax_helm
105  end interface
106 
107  interface
108  subroutine cuda_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
109  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
110  h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
111  bind(c, name='cuda_ax_helm_vector')
112  use, intrinsic :: iso_c_binding
113  type(c_ptr), value :: au_d, av_d, aw_d
114  type(c_ptr), value :: u_d, v_d, w_d
115  type(c_ptr), value :: dx_d, dy_d, dz_d
116  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
117  type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
118  integer(c_int) :: nel, lx
119  end subroutine cuda_ax_helm_vector
120  end interface
121 
122  interface
123  subroutine cuda_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
124  h2_d, B_d, n) bind(c, name='cuda_ax_helm_vector_part2')
125  use, intrinsic :: iso_c_binding
126  type(c_ptr), value :: au_d, av_d, aw_d
127  type(c_ptr), value :: u_d, v_d, w_d
128  type(c_ptr), value :: h2_d, B_d
129  integer(c_int) :: n
130  end subroutine cuda_ax_helm_vector_part2
131  end interface
132 #elif HAVE_OPENCL
133  interface
134  subroutine opencl_ax_helm(w_d, u_d, &
135  dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, &
136  h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
137  bind(c, name='opencl_ax_helm')
138  use, intrinsic :: iso_c_binding
139  type(c_ptr), value :: w_d, u_d
140  type(c_ptr), value :: dx_d, dy_d, dz_d
141  type(c_ptr), value :: dxt_d, dyt_d, dzt_d
142  type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
143  integer(c_int) :: nel, lx
144  end subroutine opencl_ax_helm
145  end interface
146 #endif
147 
148 contains
149 
150  subroutine ax_helm_device_compute(w, u, coef, msh, Xh)
151  type(mesh_t), intent(inout) :: msh
152  type(space_t), intent(inout) :: Xh
153  type(coef_t), intent(inout) :: coef
154  real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
155  real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
156  type(c_ptr) :: u_d, w_d
157 
158  u_d = device_get_ptr(u)
159  w_d = device_get_ptr(w)
160 
161 #ifdef HAVE_HIP
162  call hip_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
163  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
164  coef%G11_d, coef%G22_d, coef%G33_d, &
165  coef%G12_d, coef%G13_d, coef%G23_d, &
166  msh%nelv, xh%lx)
167 #elif HAVE_CUDA
168  call cuda_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
169  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
170  coef%G11_d, coef%G22_d, coef%G33_d, &
171  coef%G12_d, coef%G13_d, coef%G23_d, &
172  msh%nelv, xh%lx)
173 #elif HAVE_OPENCL
174  call opencl_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
175  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
176  coef%G11_d, coef%G22_d, coef%G33_d, &
177  coef%G12_d, coef%G13_d, coef%G23_d, &
178  msh%nelv, xh%lx)
179 #endif
180 
181  if (coef%ifh2) then
182  call device_addcol4(w_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
183  end if
184 
185  end subroutine ax_helm_device_compute
186 
187  subroutine ax_helm_device_compute_vector(this, au, av, aw, &
188  u, v, w, coef, msh, Xh)
189  class(ax_helm_device_t), intent(in) :: this
190  type(space_t), intent(inout) :: Xh
191  type(mesh_t), intent(inout) :: msh
192  type(coef_t), intent(inout) :: coef
193  real(kind=rp), intent(inout) :: au(xh%lx, xh%ly, xh%lz, msh%nelv)
194  real(kind=rp), intent(inout) :: av(xh%lx, xh%ly, xh%lz, msh%nelv)
195  real(kind=rp), intent(inout) :: aw(xh%lx, xh%ly, xh%lz, msh%nelv)
196  real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
197  real(kind=rp), intent(inout) :: v(xh%lx, xh%ly, xh%lz, msh%nelv)
198  real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
199  type(c_ptr) :: u_d, v_d, w_d
200  type(c_ptr) :: au_d, av_d, aw_d
201 
202  u_d = device_get_ptr(u)
203  v_d = device_get_ptr(v)
204  w_d = device_get_ptr(w)
205 
206  au_d = device_get_ptr(au)
207  av_d = device_get_ptr(av)
208  aw_d = device_get_ptr(aw)
209 
210 #ifdef HAVE_HIP
211  call hip_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
212  xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
213  coef%G11_d, coef%G22_d, coef%G33_d, &
214  coef%G12_d, coef%G13_d, coef%G23_d, &
215  msh%nelv, xh%lx)
216 #elif HAVE_CUDA
217  call cuda_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
218  xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
219  coef%G11_d, coef%G22_d, coef%G33_d, &
220  coef%G12_d, coef%G13_d, coef%G23_d, &
221  msh%nelv, xh%lx)
222 #elif HAVE_OPENCL
223  call opencl_ax_helm(au_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
224  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
225  coef%G11_d, coef%G22_d, coef%G33_d, &
226  coef%G12_d, coef%G13_d, coef%G23_d, &
227  msh%nelv, xh%lx)
228  call opencl_ax_helm(av_d, v_d, xh%dx_d, xh%dy_d, xh%dz_d, &
229  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
230  coef%G11_d, coef%G22_d, coef%G33_d, &
231  coef%G12_d, coef%G13_d, coef%G23_d, &
232  msh%nelv, xh%lx)
233  call opencl_ax_helm(aw_d, w_d, xh%dx_d, xh%dy_d, xh%dz_d, &
234  xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
235  coef%G11_d, coef%G22_d, coef%G33_d, &
236  coef%G12_d, coef%G13_d, coef%G23_d, &
237  msh%nelv, xh%lx)
238 #endif
239 
240  if (coef%ifh2) then
241 #ifdef HAVE_HIP
242  call hip_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
243  coef%h2_d, coef%B_d, coef%dof%size())
244 #elif HAVE_CUDA
245  call cuda_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
246  coef%h2_d, coef%B_d, coef%dof%size())
247 #else
248  call device_addcol4(au_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
249  call device_addcol4(av_d ,coef%h2_d, coef%B_d, v_d, coef%dof%size())
250  call device_addcol4(aw_d ,coef%h2_d, coef%B_d, w_d, coef%dof%size())
251 #endif
252  end if
253 
254  end subroutine ax_helm_device_compute_vector
255 
256 end module ax_helm_device
void opencl_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition: ax_helm.c:52
void cuda_ax_helm_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
Definition: ax_helm.cu:255
void cuda_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition: ax_helm.cu:63
void cuda_ax_helm_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition: ax_helm.cu:185
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
subroutine ax_helm_device_compute_vector(this, au, av, aw, u, v, w, coef, msh, Xh)
subroutine ax_helm_device_compute(w, u, coef, msh, Xh)
Coefficients.
Definition: coef.f90:34
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Defines a mesh.
Definition: mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
Matrix-vector product for a Helmholtz problem.
Definition: ax_helm.f90:44
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
The function space for the SEM solution fields.
Definition: space.f90:62