Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
richardson_kernel.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2026, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#ifndef RICHARDSON_KERNEL_H
36#define RICHARDSON_KERNEL_H
37
38#include <cmath>
39#include <algorithm>
40
41/*
42* Similarity laws and corrections for the STABLE regime:
43* Based on Mauritsen et al. 2007
44*/
45
46template<typename T>
48{
49 return 0.17 * (0.25 + 0.75 / (1.0 + 4.0 * Ri_b));
50}
51
52template<typename T>
54{
55 return -0.145 / (1.0 + 4.0 * Ri_b);
56}
57
58template<typename T>
59__device__ T tau_stable(T magu, T Ri_b, T h, T z0, T kappa)
60{
61 T log_hz0 = log(h / z0);
62 return (magu * magu) / (log_hz0 * log_hz0) * (f_tau_stable<T>(Ri_b) / f_tau_stable<T>(0.0)) * (kappa * kappa);
63}
64
65template<typename T>
66__device__ T heat_flux_stable(T ti, T ts, T Ri_b, T h, T z0h, T utau, T kappa, T Pr)
67{
68 return (ti - ts) / log(h / z0h) * (f_theta_stable<T>(Ri_b) / fabs(f_theta_stable<T>(0.0))) * kappa * (utau / Pr);
69}
70
71/*
72* Similarity laws and corrections for the UNSTABLE (convective) regime:
73* Based on Louis 1979
74*/
75
76template<typename T>
78{
79 return 1.0 - (2.0 * Ri_b) / (1.0 + c * sqrt(fabs(Ri_b)));
80}
81
82template<typename T>
84{
85 // Functionally identical to f_tau_convective in Louis 1979
86 return 1.0 - (2.0 * Ri_b) / (1.0 + c * sqrt(fabs(Ri_b)));
87}
88
89template<typename T>
90__device__ T tau_convective(T magu, T Ri_b, T h, T z0, T kappa)
91{
92 T a = kappa / log(h / z0);
93 T b = 2.0;
94 T c = 7.4 * (a * a) * b * sqrt(h / z0);
95
96 return (a * a) * (magu * magu) * f_tau_convective<T>(Ri_b, c);
97}
98
99template<typename T>
100__device__ T heat_flux_convective(T ti, T ts, T Ri_b, T h, T magu, T z0h, T kappa)
101{
102 T a = kappa / log(h / z0h);
103 T b = 2.0;
104 T c = 5.3 * (a * a) * b * sqrt(h / z0h);
105
106 return -(a * a) / 0.74 * magu * (ti - ts) * f_theta_convective<T>(Ri_b, c);
107}
108
109/*
110* Similarity laws and corrections for the NEUTRAL regime:
111*/
112
113template<typename T>
114__device__ T tau_neutral(T magu, T h, T z0, T kappa)
115{
116 T val = (kappa * magu) / log(h / z0);
117 return val * val;
118}
119
120template<typename T>
121__device__ T heat_flux_neutral(T ti, T ts, T h, T z0h, T utau, T kappa)
122{
123 return kappa * utau * (ti - ts) / log(h / z0h);
124}
125
126/*
127 * CUDA kernel for the Richardson wall model.
128 */
129template<typename T, int BC_TYPE>
131 const T* __restrict__ u_d,
132 const T* __restrict__ v_d,
133 const T* __restrict__ w_d,
134 const T* __restrict__ temp_d,
135 const T* __restrict__ h_d,
136 const T* __restrict__ n_x_d,
137 const T* __restrict__ n_y_d,
138 const T* __restrict__ n_z_d,
139 const int* __restrict__ ind_r_d,
140 const int* __restrict__ ind_s_d,
141 const int* __restrict__ ind_t_d,
142 const int* __restrict__ ind_e_d,
146 int n_nodes,
147 int lx,
148 T kappa,
149 const T * __restrict__ mu_w_d,
150 const T * __restrict__ rho_w_d,
151 T g1,
152 T g2,
153 T g3,
154 T Pr,
155 T z0,
156 T z0h_in,
157 T bc_value,
165 const int* __restrict__ h_x_idx,
166 const int* __restrict__ h_y_idx,
167 const int* __restrict__ h_z_idx
168) {
169 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
170 const int str = blockDim.x * gridDim.x;
171 if(idx >= n_nodes) return;
172
173 const T Ri_threshold = 1e-4;
174
175 // Use 64-bit integer to prevent overflow for large polynomial order / element counts
176 for (int i = idx; i < n_nodes; i += str) {
177 const int index = (ind_e_d[i] - 1) * lx * lx * lx +
178 (ind_t_d[i] - 1) * lx * lx +
179 (ind_s_d[i] - 1) * lx +
180 (ind_r_d[i] - 1); // 'long long' might be needed to avoid
181 // overflowing 32-bits of 'int'
182
183 T ui = u_d[index];
184 T vi = v_d[index];
185 T wi = w_d[index];
186 T ti = temp_d[index];
187 T hi = h_d[i];
188 T mu = mu_w_d[i];
189 T rho = rho_w_d[i];
190
191 // Extract the local normal vector
192 T nx = n_x_d[i];
193 T ny = n_y_d[i];
194 T nz = n_z_d[i];
195
196 // Get the tangential component
197 T normu = ui * nx + vi * ny + wi * nz;
198 ui -= normu * nx;
199 vi -= normu * ny;
200 wi -= normu * nz;
201
202 T magu = sqrt(ui*ui + vi*vi + wi*wi);
203 magu = fmax(magu, (T)1e-6);
204
205 T utau = kappa * magu / log(hi/z0);
206
207 // Zilitinkevich 1995 correlation for thermal roughness
208 T z0h;
209 if (z0h_in < 0.0) {
210 // Note that this uses previous timestep's utau, hence
211 // lags behind by one dt. usually very negligible
212 z0h = z0 * exp(z0h_in * sqrt((utau*z0)/(mu/rho)));
213 } else {
214 z0h = z0h_in;
215 }
216
217 T ts = 0;
218 T q = 0;
219
220 // Initialize variables based on Boundary Condition
221 if constexpr (BC_TYPE == 0) { // Neumann
222 q = bc_value;
223 } else { // Dirichlet
224 ts = bc_value;
225 q = kappa * utau * (ts - ti) / log(hi/z0h);
226 }
227
228 T Ri_b;
229 T g_dot_n = fabs(g1*nx + g2*ny + g3*nz);
230
231 // Compute Bulk Richardson Number
232 if constexpr (BC_TYPE == 0) {
233 Ri_b = -g_dot_n * hi / ti * q / (magu * magu * magu * kappa * kappa);
234 } else {
235 Ri_b = g_dot_n * hi / ti * (ti - ts) / (magu * magu);
236 }
237
238 T tau_mag = 0;
239
240 // Stability regime branching
241 if (Ri_b > Ri_threshold) { // Stable
242 tau_mag = tau_stable<T>(magu, Ri_b, hi, z0, kappa);
243 utau = sqrt(tau_mag);
244 if constexpr (BC_TYPE == 1) {
245 q = heat_flux_stable<T>(ti, ts, Ri_b, hi, z0h, utau, kappa, Pr);
246 }
247 }
248 else if (Ri_b < -Ri_threshold) { // Convective
249 tau_mag = tau_convective<T>(magu, Ri_b, hi, z0, kappa);
250 utau = sqrt(tau_mag);
251 if constexpr (BC_TYPE == 1) {
252 q = heat_flux_convective<T>(ti, ts, Ri_b, hi, magu, z0h, kappa);
253 }
254 }
255 else { // Neutral
256 tau_mag = tau_neutral<T>(magu, hi, z0, kappa);
257 utau = sqrt(tau_mag);
258 if constexpr (BC_TYPE == 1) {
259 q = heat_flux_neutral<T>(ti, ts, hi, z0h, utau, kappa);
260 }
261 }
262
263 // Apply spatial distribution
264 tau_x_d[i] = -rho*tau_mag * ui / magu;
265 tau_y_d[i] = -rho*tau_mag * vi / magu;
266 tau_z_d[i] = -rho*tau_mag * wi / magu;
267
268 // Note: L_ob calculation is omitted from the kernel as it is a pure
269 // diagnostic variable and writing it to global memory would require
270 // passing an extra L_ob_d array pointer if GPU diagnostics are needed.
271
272 const int index_ts = (ind_e_d[i] - 1) * lx * lx * lx +
273 (ind_t_d[i] - 1 - h_z_idx[i]) * lx * lx +
274 (ind_s_d[i] - 1 - h_y_idx[i]) * lx +
275 (ind_r_d[i] - 1 - h_x_idx[i]);
276 Ri_b_diagn[i] = Ri_b;
277 L_ob_diagn[i] = 9999;
278 utau_diagn[i] = utau;
279 magu_diagn[i] = magu;
280 ti_diagn[i] = ti;
281 ts_diagn[i] = temp_d[index_ts];
282 q_diagn[i] = q;
283 }
284}
285
286#endif // RICHARDSON_KERNEL_H
const int i
const int e
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
__device__ T f_tau_stable(T Ri_b)
__device__ T tau_convective(T magu, T Ri_b, T h, T z0, T kappa)
__device__ T heat_flux_neutral(T ti, T ts, T h, T z0h, T utau, T kappa)
__device__ T tau_neutral(T magu, T h, T z0, T kappa)
__device__ T heat_flux_convective(T ti, T ts, T Ri_b, T h, T magu, T z0h, T kappa)
__device__ T tau_stable(T magu, T Ri_b, T h, T z0, T kappa)
__device__ T f_theta_convective(T Ri_b, T c)
__global__ void richardson_compute(const T *__restrict__ u_d, const T *__restrict__ v_d, const T *__restrict__ w_d, const T *__restrict__ temp_d, const T *__restrict__ h_d, const T *__restrict__ n_x_d, const T *__restrict__ n_y_d, const T *__restrict__ n_z_d, const int *__restrict__ ind_r_d, const int *__restrict__ ind_s_d, const int *__restrict__ ind_t_d, const int *__restrict__ ind_e_d, T *__restrict__ tau_x_d, T *__restrict__ tau_y_d, T *__restrict__ tau_z_d, int n_nodes, int lx, T kappa, const T *__restrict__ mu_w_d, const T *__restrict__ rho_w_d, T g1, T g2, T g3, T Pr, T z0, T z0h_in, T bc_value, T *__restrict__ Ri_b_diagn, T *__restrict__ L_ob_diagn, T *__restrict__ utau_diagn, T *__restrict__ magu_diagn, T *__restrict__ ti_diagn, T *__restrict__ ts_diagn, T *__restrict__ q_diagn, const int *__restrict__ h_x_idx, const int *__restrict__ h_y_idx, const int *__restrict__ h_z_idx)
__device__ T heat_flux_stable(T ti, T ts, T Ri_b, T h, T z0h, T utau, T kappa, T Pr)
__device__ T f_tau_convective(T Ri_b, T c)
__device__ T f_theta_stable(T Ri_b)