Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
dudxyz_kernel.h
Go to the documentation of this file.
1#ifndef __MATH_DUDXYZ_KERNEL_H__
2#define __MATH_DUDXYZ_KERNEL_H__
3/*
4 Copyright (c) 2021-2023, The Neko Authors
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following
16 disclaimer in the documentation and/or other materials provided
17 with the distribution.
18
19 * Neither the name of the authors nor the names of its
20 contributors may be used to endorse or promote products derived
21 from this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35*/
36
41template< typename T, const int LX, const int CHUNKS >
43 const T * __restrict__ u,
44 const T * __restrict__ dr,
45 const T * __restrict__ ds,
46 const T * __restrict__ dt,
47 const T * __restrict__ dx,
48 const T * __restrict__ dy,
49 const T * __restrict__ dz,
50 const T * __restrict__ jacinv) {
51
52 __shared__ T shu[LX * LX * LX];
53 __shared__ T shdr[LX * LX * LX];
54 __shared__ T shds[LX * LX * LX];
55 __shared__ T shdt[LX * LX * LX];
56
60
62
63 const int e = blockIdx.x;
64 const int iii = threadIdx.x;
65 const int nchunks = (LX * LX * LX - 1) / CHUNKS + 1;
66
67 if (iii < (LX * LX)) {
68 shdx[iii] = dx[iii];
69 shdy[iii] = dy[iii];
70 shdz[iii] = dz[iii];
71 }
72
73 int l = iii;
74 while(l < (LX * LX * LX)) {
75 shu[l] = u[l + e * LX * LX * LX];
76 shdr[l] = dr[l + e * LX * LX * LX];
77 shds[l] = ds[l + e * LX * LX * LX];
78 shdt[l] = dt[l + e * LX * LX * LX];
79 shjacinv[l] = jacinv[l + e * LX * LX * LX];
80 l = l + CHUNKS;
81 }
82
84
85 for (int n = 0; n < nchunks; n++) {
86 const int ijk = iii + n * CHUNKS;
87 const int jk = ijk / LX;
88 const int i = ijk - jk * LX;
89 const int k = jk / LX;
90 const int j = jk - k * LX;
91 if ( i < LX && j < LX && k < LX) {
92 T rtmp = 0.0;
93 T stmp = 0.0;
94 T ttmp = 0.0;
95 for (int l = 0; l < LX; l++) {
96 rtmp += shdx[i + l * LX] * shu[l + j * LX + k * LX * LX];
97 stmp += shdy[j + l * LX] * shu[i + l * LX + k * LX * LX];
98 ttmp += shdz[k + l * LX] * shu[i + j * LX + l * LX * LX];
99 }
100 du[ijk + e * LX * LX * LX] = ((rtmp * shdr[ijk])
101 + (stmp * shds[ijk])
102 + (ttmp * shdt[ijk]))
103 * shjacinv[ijk];
104
105 }
106 }
107}
108
109template< typename T, const int LX >
120
121 __shared__ T shu[LX * LX];
122
126
127 const int e = blockIdx.x;
128 const int j = threadIdx.y;
129 const int i = threadIdx.x;
130 const int ij = i + j * LX;
131 const int ele = e*LX*LX*LX;
132
133 shdx[ij] = dx[ij];
134 shdy[ij] = dy[ij];
135 shdz[ij] = dz[ij];
136
142
143 #pragma unroll LX
144 for (int k = 0; k < LX; ++k) {
145 ru[k] = u[ij + k*LX*LX + ele];
146 rdr[k] = dr[ij + k*LX*LX + ele];
147 rds[k] = ds[ij + k*LX*LX + ele];
148 rdt[k] = dt[ij + k*LX*LX + ele];
149 rjacinv[k] = jacinv[ij + k*LX*LX + ele];
150 }
151
153
154 #pragma unroll
155 for (int k = 0; k < LX; ++k) {
156 const int ijk = ij + k*LX*LX;
157 T ttmp = 0.0;
158 shu[ij] = ru[k];
159 for (int l = 0; l < LX; l++) {
160 ttmp += shdz[k+l*LX] * ru[l];
161 }
163
164 T rtmp = 0.0;
165 T stmp = 0.0;
166#pragma unroll
167 for (int l = 0; l < LX; l++) {
168 rtmp += shdx[i+l*LX] * shu[l+j*LX];
169 stmp += shdy[j+l*LX] * shu[i+l*LX];
170 }
171
172 du[ijk + ele] = rjacinv[k] * ((rtmp * rdr[k])
173 + (stmp * rds[k])
174 + (ttmp * rdt[k]));
176 }
177}
178
179
180#endif // __MATH_DUDXYZ_KERNEL_H__
__shared__ T shu[LX *LX]
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
T ru[LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__shared__ T shdz[LX *LX]
T rdt[LX]
const int i
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
const int ij
__shared__ T shdy[LX *LX]
T rjacinv[LX]
const int e
T rds[LX]
__global__ void __launch_bounds__(LX *LX, 3) dudxyz_kernel_kstep(T *__restrict__ du
__global__ void dudxyz_kernel_1d(T *__restrict__ du, const T *__restrict__ u, const T *__restrict__ dr, const T *__restrict__ ds, const T *__restrict__ dt, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ jacinv)
__shared__ T shdx[LX *LX]
const int ele
__global__ void const T *__restrict__ u
const int j
T rdr[LX]
__syncthreads()
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv