Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
ax_helm_full_device.F90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only : rp
36 use coefs, only : coef_t
37 use space, only : space_t
38 use mesh, only : mesh_t
39 use device_math, only : device_addcol4
40 use device, only : device_get_ptr
41 use num_types, only : rp
42 use utils, only : neko_error
43 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
44 implicit none
45 private
46
47 type, public, extends(ax_helm_full_t) :: ax_helm_full_device_t
48 contains
49 procedure, pass(this) :: compute_vector => ax_helm_full_device_compute_vector
51
52#ifdef HAVE_HIP
53 interface
54 subroutine hip_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
55 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
56 h1_d, drdx_d, drdy_d, drdz_d, &
57 dsdx_d, dsdy_d, dsdz_d, &
58 dtdx_d, dtdy_d, dtdz_d, jacinv_d, weight3_d, nelv, lx) &
59 bind(c, name='hip_ax_helm_stress_vector')
60 use, intrinsic :: iso_c_binding
61 type(c_ptr), value :: au_d, av_d, aw_d
62 type(c_ptr), value :: u_d, v_d, w_d
63 type(c_ptr), value :: dx_d, dy_d, dz_d
64 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
65 type(c_ptr), value :: h1_d
66 type(c_ptr), value :: drdx_d, drdy_d, drdz_d
67 type(c_ptr), value :: dsdx_d, dsdy_d, dsdz_d
68 type(c_ptr), value :: dtdx_d, dtdy_d, dtdz_d
69 type(c_ptr), value :: jacinv_d, weight3_d
70 integer(c_int) :: nel, lx
71 end subroutine hip_ax_helm_stress_vector
72 end interface
73
74 interface
75 subroutine hip_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
76 h2_d, B_d, n) bind(c, name='hip_ax_helm_stress_vector_part2')
77 use, intrinsic :: iso_c_binding
78 type(c_ptr), value :: au_d, av_d, aw_d
79 type(c_ptr), value :: u_d, v_d, w_d
80 type(c_ptr), value :: h2_d, B_d
81 integer(c_int) :: n
83 end interface
84
85#elif HAVE_CUDA
86 interface
87 subroutine cuda_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
88 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
89 h1_d, drdx_d, drdy_d, drdz_d, &
90 dsdx_d, dsdy_d, dsdz_d, &
91 dtdx_d, dtdy_d, dtdz_d, jacinv_d, weight3_d, nelv, lx) &
92 bind(c, name='cuda_ax_helm_stress_vector')
93 use, intrinsic :: iso_c_binding
94 type(c_ptr), value :: au_d, av_d, aw_d
95 type(c_ptr), value :: u_d, v_d, w_d
96 type(c_ptr), value :: dx_d, dy_d, dz_d
97 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
98 type(c_ptr), value :: h1_d
99 type(c_ptr), value :: drdx_d, drdy_d, drdz_d
100 type(c_ptr), value :: dsdx_d, dsdy_d, dsdz_d
101 type(c_ptr), value :: dtdx_d, dtdy_d, dtdz_d
102 type(c_ptr), value :: jacinv_d, weight3_d
103 integer(c_int) :: nel, lx
104 end subroutine cuda_ax_helm_stress_vector
105 end interface
106
107 interface
108 subroutine cuda_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
109 h2_d, B_d, n) bind(c, name='cuda_ax_helm_stress_vector_part2')
110 use, intrinsic :: iso_c_binding
111 type(c_ptr), value :: au_d, av_d, aw_d
112 type(c_ptr), value :: u_d, v_d, w_d
113 type(c_ptr), value :: h2_d, B_d
114 integer(c_int) :: n
116 end interface
117#endif
118
119contains
120
121 subroutine ax_helm_full_device_compute_vector(this, au, av, aw, &
122 u, v, w, coef, msh, Xh)
123 class(ax_helm_full_device_t), intent(in) :: this
124 type(space_t), intent(inout) :: Xh
125 type(mesh_t), intent(inout) :: msh
126 type(coef_t), intent(inout) :: coef
127 real(kind=rp), intent(inout) :: au(xh%lx, xh%ly, xh%lz, msh%nelv)
128 real(kind=rp), intent(inout) :: av(xh%lx, xh%ly, xh%lz, msh%nelv)
129 real(kind=rp), intent(inout) :: aw(xh%lx, xh%ly, xh%lz, msh%nelv)
130 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
131 real(kind=rp), intent(inout) :: v(xh%lx, xh%ly, xh%lz, msh%nelv)
132 real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
133 type(c_ptr) :: u_d, v_d, w_d
134 type(c_ptr) :: au_d, av_d, aw_d
135
136 u_d = device_get_ptr(u)
137 v_d = device_get_ptr(v)
138 w_d = device_get_ptr(w)
139
140 au_d = device_get_ptr(au)
141 av_d = device_get_ptr(av)
142 aw_d = device_get_ptr(aw)
143
144#ifdef HAVE_HIP
145 call hip_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
146 xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
147 coef%drdx_d, coef%drdy_d, coef%drdz_d, &
148 coef%dsdx_d, coef%dsdy_d, coef%dsdz_d, &
149 coef%dtdx_d, coef%dtdy_d, coef%dtdz_d, &
150 coef%jacinv_d, xh%w3_d, msh%nelv, xh%lx)
151#elif HAVE_CUDA
152 call cuda_ax_helm_stress_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
153 xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
154 coef%drdx_d, coef%drdy_d, coef%drdz_d, &
155 coef%dsdx_d, coef%dsdy_d, coef%dsdz_d, &
156 coef%dtdx_d, coef%dtdy_d, coef%dtdz_d, &
157 coef%jacinv_d, xh%w3_d, msh%nelv, xh%lx)
158#elif HAVE_OPENCL
159 call neko_error('OPENCL is not implemented for full stress formulation')
160#endif
161
162 if (coef%ifh2) then
163#ifdef HAVE_HIP
164 call hip_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
165 coef%h2_d, coef%B_d, coef%dof%size())
166#elif HAVE_CUDA
167 call cuda_ax_helm_stress_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
168 coef%h2_d, coef%B_d, coef%dof%size())
169#else
170 call device_addcol4(au_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
171 call device_addcol4(av_d ,coef%h2_d, coef%B_d, v_d, coef%dof%size())
172 call device_addcol4(aw_d ,coef%h2_d, coef%B_d, w_d, coef%dof%size())
173#endif
174 end if
175
177
178end module ax_helm_full_device
void cuda_ax_helm_stress_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *jacinv, void *w3, int *nelv, int *lx)
void cuda_ax_helm_stress_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
Return the device pointer for an associated Fortran array.
Definition device.F90:81
subroutine ax_helm_full_device_compute_vector(this, au, av, aw, u, v, w, coef, msh, xh)
Coefficients.
Definition coef.f90:34
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
Matrix-vector product for a Helmholtz problem.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62