Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef_kernel.h
Go to the documentation of this file.
1#ifndef __SEM_COEF_KERNEL_H__
2#define __SEM_COEF_KERNEL_H__
3/*
4 Copyright (c) 2022, The Neko Authors
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following
16 disclaimer in the documentation and/or other materials provided
17 with the distribution.
18
19 * Neither the name of the authors nor the names of its
20 contributors may be used to endorse or promote products derived
21 from this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35*/
36
40template< typename T, const int LX, const int CHUNKS >
47 const T * __restrict__ drdx,
48 const T * __restrict__ drdy,
49 const T * __restrict__ drdz,
50 const T * __restrict__ dsdx,
51 const T * __restrict__ dsdy,
52 const T * __restrict__ dsdz,
53 const T * __restrict__ dtdx,
54 const T * __restrict__ dtdy,
55 const T * __restrict__ dtdz,
56 const T * __restrict__ jacinv,
57 const T * __restrict__ w3,
58 const int gdim) {
59
60 int i,j,k;
61
62 const int e = blockIdx.x;
63 const int iii = threadIdx.x;
64 const int nchunks = (LX * LX * LX - 1) / CHUNKS + 1;
65
66 __shared__ T shw3[LX * LX * LX];
67
68 j = iii;
69 while( j < (LX * LX * LX)) {
70 const int i = j + e * LX * LX * LX;
71 G11[i] = (drdx[i]*drdx[i] + drdy[i]*drdy[i] + drdz[i]*drdz[i]) * jacinv[i];
72 G22[i] = (dsdx[i]*dsdx[i] + dsdy[i]*dsdy[i] + dsdz[i]*dsdz[i]) * jacinv[i];
73 G33[i] = (dtdx[i]*dtdx[i] + dtdy[i]*dtdy[i] + dtdz[i]*dtdz[i]) * jacinv[i];
74
75 G12[i] = (drdx[i]*dsdx[i] + drdy[i]*dsdy[i] + drdz[i]*dsdz[i]) * jacinv[i];
76 G13[i] = (drdx[i]*dtdx[i] + drdy[i]*dtdy[i] + drdz[i]*dtdz[i]) * jacinv[i];
77 G23[i] = (dsdx[i]*dtdx[i] + dsdy[i]*dtdy[i] + dsdz[i]*dtdz[i]) * jacinv[i];
78
79 shw3[j] = w3[j];
80
81 j = j + CHUNKS;
82 }
83
85
86 for (int n = 0; n < nchunks; n++) {
87 const int ijk = iii + n * CHUNKS;
88 const int jk = ijk / LX;
89 i = ijk - jk * LX;
90 k = jk / LX;
91 j = jk - k * LX;
92 if ( i < LX && j < LX && k < LX) {
93 G11[ijk + e * LX * LX * LX] *= shw3[ijk];
94 G12[ijk + e * LX * LX * LX] *= shw3[ijk];
95 G13[ijk + e * LX * LX * LX] *= shw3[ijk];
96 G22[ijk + e * LX * LX * LX] *= shw3[ijk];
97 G23[ijk + e * LX * LX * LX] *= shw3[ijk];
98 G33[ijk + e * LX * LX * LX] *= shw3[ijk];
99 }
100 }
101}
102
106template< typename T, const int LX, const int CHUNKS >
108 T * __restrict__ dydr,
109 T * __restrict__ dzdr,
110 T * __restrict__ dxds,
111 T * __restrict__ dyds,
112 T * __restrict__ dzds,
113 T * __restrict__ dxdt,
114 T * __restrict__ dydt,
115 T * __restrict__ dzdt,
116 const T * __restrict__ dx,
117 const T * __restrict__ dy,
118 const T * __restrict__ dz,
119 const T * __restrict__ x,
120 const T * __restrict__ y,
121 const T * __restrict__ z) {
122
123 int i,j,k;
124
125 const int e = blockIdx.x;
126 const int iii = threadIdx.x;
127 const int nchunks = (LX * LX * LX - 1) / CHUNKS + 1;
128
129 __shared__ T shdx[LX * LX];
130 __shared__ T shdy[LX * LX];
131 __shared__ T shdz[LX * LX];
132
133 __shared__ T shu[LX * LX * LX];
134
135 if (iii < (LX * LX)) {
136 shdx[iii] = dx[iii];
137 shdy[iii] = dy[iii];
138 shdz[iii] = dz[iii];
139 }
140
141 j = iii;
142 while(j < (LX * LX * LX)) {
143 shu[j] = x[j + e * LX * LX * LX];
144 j = j + CHUNKS;
145 }
146
148
149 for (int n = 0; n < nchunks; n++) {
150 const int ijk = iii + n * CHUNKS;
151 const int jk = ijk / LX;
152 i = ijk - jk * LX;
153 k = jk / LX;
154 j = jk - k * LX;
155 if ( i < LX && j < LX && k < LX) {
156 T rtmp = 0.0;
157 T stmp = 0.0;
158 T ttmp = 0.0;
159 for (int l = 0; l < LX; l++) {
160 rtmp += shdx[i + l * LX] * shu[l + j * LX + k * LX * LX];
161 stmp += shdy[j + l * LX] * shu[i + l * LX + k * LX * LX];
162 ttmp += shdz[k + l * LX] * shu[i + j * LX + l * LX * LX];
163 }
164 dxdr[ijk + e * LX * LX * LX] = rtmp;
165 dxds[ijk + e * LX * LX * LX] = stmp;
166 dxdt[ijk + e * LX * LX * LX] = ttmp;
167 }
168 }
169
171
172 j = iii;
173 while(j < (LX * LX * LX)) {
174 shu[j] = y[j + e * LX * LX * LX];
175 j = j + CHUNKS;
176 }
177
179
180 for (int n = 0; n < nchunks; n++) {
181 const int ijk = iii + n * CHUNKS;
182 const int jk = ijk / LX;
183 i = ijk - jk * LX;
184 k = jk / LX;
185 j = jk - k * LX;
186 if ( i < LX && j < LX && k < LX) {
187 T rtmp = 0.0;
188 T stmp = 0.0;
189 T ttmp = 0.0;
190 for (int l = 0; l < LX; l++) {
191 rtmp += shdx[i + l * LX] * shu[l + j * LX + k * LX * LX];
192 stmp += shdy[j + l * LX] * shu[i + l * LX + k * LX * LX];
193 ttmp += shdz[k + l * LX] * shu[i + j * LX + l * LX * LX];
194 }
195 dydr[ijk + e * LX * LX * LX] = rtmp;
196 dyds[ijk + e * LX * LX * LX] = stmp;
197 dydt[ijk + e * LX * LX * LX] = ttmp;
198 }
199 }
200
202
203 j = iii;
204 while(j < (LX * LX * LX)) {
205 shu[j] = z[j + e * LX * LX * LX];
206 j = j + CHUNKS;
207 }
208
210
211 for (int n = 0; n < nchunks; n++) {
212 const int ijk = iii + n * CHUNKS;
213 const int jk = ijk / LX;
214 i = ijk - jk * LX;
215 k = jk / LX;
216 j = jk - k * LX;
217 if ( i < LX && j < LX && k < LX) {
218 T rtmp = 0.0;
219 T stmp = 0.0;
220 T ttmp = 0.0;
221 for (int l = 0; l < LX; l++) {
222 rtmp += shdx[i + l * LX] * shu[l + j * LX + k * LX * LX];
223 stmp += shdy[j + l * LX] * shu[i + l * LX + k * LX * LX];
224 ttmp += shdz[k + l * LX] * shu[i + j * LX + l * LX * LX];
225 }
226 dzdr[ijk + e * LX * LX * LX] = rtmp;
227 dzds[ijk + e * LX * LX * LX] = stmp;
228 dzdt[ijk + e * LX * LX * LX] = ttmp;
229 }
230 }
231}
232
236template< typename T >
248 const T * __restrict__ dxdr,
249 const T * __restrict__ dydr,
250 const T * __restrict__ dzdr,
251 const T * __restrict__ dxds,
252 const T * __restrict__ dyds,
253 const T * __restrict__ dzds,
254 const T * __restrict__ dxdt,
255 const T * __restrict__ dydt,
256 const T * __restrict__ dzdt,
257 const int n) {
258
259 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
260 const int str = blockDim.x * gridDim.x;
261 const T one = 1.0;
262
263 for (int i = idx; i < n; i += str) {
264 jac[i] = (dxdr[i] * dyds[i] * dzdt[i])
265 + (dxdt[i] * dydr[i] * dzds[i])
266 + (dxds[i] * dydt[i] * dzdr[i])
267 - (dxdr[i] * dydt[i] * dzds[i])
268 - (dxds[i] * dydr[i] * dzdt[i])
269 - (dxdt[i] * dyds[i] * dzdr[i]);
270 jacinv[i] = one / jac[i];
271
272 drdx[i] = dyds[i]*dzdt[i] - dydt[i]*dzds[i];
273 drdy[i] = dxdt[i]*dzds[i] - dxds[i]*dzdt[i];
274 drdz[i] = dxds[i]*dydt[i] - dxdt[i]*dyds[i];
275 dsdx[i] = dydt[i]*dzdr[i] - dydr[i]*dzdt[i];
276 dsdy[i] = dxdr[i]*dzdt[i] - dxdt[i]*dzdr[i];
277 dsdz[i] = dxdt[i]*dydr[i] - dxdr[i]*dydt[i];
278 dtdx[i] = dydr[i]*dzds[i] - dyds[i]*dzdr[i];
279 dtdy[i] = dxds[i]*dzdr[i] - dxdr[i]*dzds[i];
280 dtdz[i] = dxdr[i]*dyds[i] - dxds[i]*dydr[i];
281
282 }
283
284}
285
286#endif // __SEM_COEF_KERNEL_H__
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__shared__ T shu[LX *LX]
__shared__ T shdy[LX *LX]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__shared__ T shdz[LX *LX]
const int i
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
shdx[ij]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
const int e
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
const int j
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__syncthreads()
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void coef_generate_dxyz_kernel(T *__restrict__ dxdr, T *__restrict__ dydr, T *__restrict__ dzdr, T *__restrict__ dxds, T *__restrict__ dyds, T *__restrict__ dzds, T *__restrict__ dxdt, T *__restrict__ dydt, T *__restrict__ dzdt, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ x, const T *__restrict__ y, const T *__restrict__ z)
__global__ void coef_generate_drst_kernel(T *__restrict__ jac, T *__restrict__ jacinv, T *__restrict__ drdx, T *__restrict__ drdy, T *__restrict__ drdz, T *__restrict__ dsdx, T *__restrict__ dsdy, T *__restrict__ dsdz, T *__restrict__ dtdx, T *__restrict__ dtdy, T *__restrict__ dtdz, const T *__restrict__ dxdr, const T *__restrict__ dydr, const T *__restrict__ dzdr, const T *__restrict__ dxds, const T *__restrict__ dyds, const T *__restrict__ dzds, const T *__restrict__ dxdt, const T *__restrict__ dydt, const T *__restrict__ dzdt, const int n)
__global__ void coef_generate_geo_kernel(T *__restrict__ G11, T *__restrict__ G12, T *__restrict__ G13, T *__restrict__ G22, T *__restrict__ G23, T *__restrict__ G33, const T *__restrict__ drdx, const T *__restrict__ drdy, const T *__restrict__ drdz, const T *__restrict__ dsdx, const T *__restrict__ dsdy, const T *__restrict__ dsdz, const T *__restrict__ dtdx, const T *__restrict__ dtdy, const T *__restrict__ dtdz, const T *__restrict__ jacinv, const T *__restrict__ w3, const int gdim)
Definition coef_kernel.h:41
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)