Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
convect_scalar_kernel.h
Go to the documentation of this file.
1#ifndef __MATH_CONVECT_SCALAR_KERNEL_H__
2#define __MATH_CONVECT_SCALAR_KERNEL_H__
3/*
4 Copyright (c) 2021-2025, The Neko Authors
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following
16 disclaimer in the documentation and/or other materials provided
17 with the distribution.
18
19 * Neither the name of the authors nor the names of its
20 contributors may be used to endorse or promote products derived
21 from this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35*/
36
41template< typename T, const int LX, const int CHUNKS >
43 const T * __restrict__ u,
44 const T * __restrict__ cr,
45 const T * __restrict__ cs,
46 const T * __restrict__ ct,
47 const T * __restrict__ dx,
48 const T * __restrict__ dy,
49 const T * __restrict__ dz) {
50
51 __shared__ T shu[LX * LX * LX];
52
53 __shared__ T shcr[LX * LX * LX];
54 __shared__ T shcs[LX * LX * LX];
55 __shared__ T shct[LX * LX * LX];
56
60
61 const int e = blockIdx.x;
62 const int iii = threadIdx.x;
63 const int nchunks = (LX * LX * LX - 1) / CHUNKS + 1;
64 const int ele = e*LX*LX*LX;
65
66 if (iii < (LX * LX)) {
67 shdx[iii] = dx[iii];
68 shdy[iii] = dy[iii];
69 shdz[iii] = dz[iii];
70 }
71
72 int l = iii;
73 while(l < (LX * LX * LX)) {
74 shu[l] = u[l + ele];
75
76 shcr[l] = cr[l + ele];
77 shcs[l] = cs[l + ele];
78 shct[l] = ct[l + ele];
79
80 l = l + CHUNKS;
81 }
82
84
85 for (int n = 0; n < nchunks; n++) {
86 const int ijk = iii + n * CHUNKS;
87 const int jk = ijk / LX;
88 const int i = ijk - jk * LX;
89 const int k = jk / LX;
90 const int j = jk - k * LX;
91 if ( i < LX && j < LX && k < LX) {
92 T rtmp = 0.0;
93 T stmp = 0.0;
94 T ttmp = 0.0;
95 for (int l = 0; l < LX; l++) {
96 rtmp += shdx[i + l * LX] * shu[l + j * LX + k * LX * LX];
97 stmp += shdy[j + l * LX] * shu[i + l * LX + k * LX * LX];
98 ttmp += shdz[k + l * LX] * shu[i + j * LX + l * LX * LX];
99 }
100
101 du[ijk + e * LX * LX * LX] = shcr[ijk] * rtmp
102 + shcs[ijk] * stmp
103 + shct[ijk] * ttmp;
104 }
105 }
106}
107
108template< typename T, const int LX >
118
119 __shared__ T shu[LX * LX];
120
124
125 const int e = blockIdx.x;
126 const int j = threadIdx.y;
127 const int i = threadIdx.x;
128 const int ij = i + j * LX;
129 const int ele = e*LX*LX*LX;
130
131 shdx[ij] = dx[ij];
132 shdy[ij] = dy[ij];
133 shdz[ij] = dz[ij];
134
139
140#pragma unroll LX
141 for (int k = 0; k < LX; ++k) {
142 ru[k] = u[ij + k*LX*LX + ele];
143 rcr[k] = cr[ij + k*LX*LX + ele];
144 rcs[k] = cs[ij + k*LX*LX + ele];
145 rct[k] = ct[ij + k*LX*LX + ele];
146 }
147
149
150#pragma unroll
151 for (int k = 0; k < LX; ++k) {
152 const int ijk = ij + k*LX*LX;
153 T ttmp = 0.0;
154 shu[ij] = ru[k];
155 for (int l = 0; l < LX; l++) {
156 ttmp += shdz[k+l*LX] * ru[l];
157 }
159
160 T rtmp = 0.0;
161 T stmp = 0.0;
162#pragma unroll
163 for (int l = 0; l < LX; l++) {
164 rtmp += shdx[i+l*LX] * shu[l+j*LX];
165 stmp += shdy[j+l*LX] * shu[i+l*LX];
166 }
167
168 du[ijk + ele] = rcr[k] * rtmp
169 + rcs[k] * stmp
170 + rct[k] * ttmp;
172 }
173}
174
175
176#endif // __MATH_CONVECT_SCALAR_KERNEL_H__
__shared__ T shu[LX *LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
T rcr[LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void __launch_bounds__(LX *LX, 3) convect_scalar_kernel_kstep(T *__restrict__ du
__shared__ T shdz[LX *LX]
const int i
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
const int ij
__shared__ T shdy[LX *LX]
__global__ void const T *__restrict__ const T *__restrict__ cr
const int e
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ cs
T rct[LX]
__shared__ T shdx[LX *LX]
__global__ void convect_scalar_kernel_1d(T *__restrict__ du, const T *__restrict__ u, const T *__restrict__ cr, const T *__restrict__ cs, const T *__restrict__ ct, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz)
const int ele
T rcs[LX]
__global__ void const T *__restrict__ u
const int j
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ ct
__syncthreads()
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)