Neko 1.99.4
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
ale_kinematics_kernel.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2026, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#ifndef __COMMON_ALE_KINEMATICS_KERNEL_H__
36#define __COMMON_ALE_KINEMATICS_KERNEL_H__
37
38#ifndef KINEMATICS_PARAMS_T_DEFINED
39#define KINEMATICS_PARAMS_T_DEFINED
40typedef struct {
42 real vtx, vty, vtz;
43 real vax, vay, vaz;
44 real px, py, pz;
45 real r11, r12, r13;
46 real r21, r22, r23;
47 real r31, r32, r33;
49#endif
50
51template< typename T >
53 T * __restrict__ wx,
54 T * __restrict__ wy,
55 T * __restrict__ wz,
56 const T * __restrict__ x_ref,
57 const T * __restrict__ y_ref,
58 const T * __restrict__ z_ref,
59 const T * __restrict__ phi,
60 const T * __restrict__ x,
61 const T * __restrict__ y,
62 const T * __restrict__ z,
64
65 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
66 const int str = blockDim.x * gridDim.x;
67 const T one = 1.0;
68 const T tol = 1e-6;
69
70 for (int i = idx; i < n; i += str) {
71 const T p_val = phi[i];
73
74 if (fabs(p_val - one) < tol) {
75 const T rx = x[i] - kin_params.cx;
76 const T ry = y[i] - kin_params.cy;
77 const T rz = z[i] - kin_params.cz;
78
79 v_tan_x = kin_params.vay * rz - kin_params.vaz * ry;
80 v_tan_y = kin_params.vaz * rx - kin_params.vax * rz;
81 v_tan_z = kin_params.vax * ry - kin_params.vay * rx;
82 }
83 else {
84 const T dx_ref = x_ref[i] - kin_params.px;
85 const T dy_ref = y_ref[i] - kin_params.py;
86 const T dz_ref = z_ref[i] - kin_params.pz;
87
88 const T rx_target = kin_params.r11*dx_ref +
89 kin_params.r12*dy_ref +
91
92 const T ry_target = kin_params.r21*dx_ref +
93 kin_params.r22*dy_ref +
95
96 const T rz_target = kin_params.r31*dx_ref +
97 kin_params.r32*dy_ref +
99
103 }
104
105 wx[i] += (kin_params.vtx + v_tan_x) * p_val;
106 wy[i] += (kin_params.vty + v_tan_y) * p_val;
107 wz[i] += (kin_params.vtz + v_tan_z) * p_val;
108 }
109}
110
111template <typename T>
113 T * __restrict__ d,
114 const T * __restrict__ x,
115 const T * __restrict__ y,
116 const T * __restrict__ z,
117 const int lx, const int ly, const int lz,
118 const int nel, const int local_iters,
120{
121 int e = blockIdx.x * blockDim.x + threadIdx.x;
122 if (e >= nel) return;
123
124 int iter = 1;
125 bool element_changed_ever = false;
126 bool changed_local = true;
127
128 const int lxy = lx * ly;
129 const int lxyz = lx * ly * lz;
130 const int e_offset = e * lxyz;
131
132 while (changed_local && iter <= local_iters) {
133 changed_local = false;
134
135 // Loop over GLL nodes in this element
136 for (int k = 0; k < lz; ++k) {
137 for (int j = 0; j < ly; ++j) {
138 for (int i = 0; i < lx; ++i) {
139 int idx1 = i + j * lx + k * lxy + e_offset;
140 T x1 = x[idx1];
141 T y1 = y[idx1];
142 T z1 = z[idx1];
143 T d1 = d[idx1];
144
145 int i0 = max(0, i - 1);
146 int i1 = min(lx - 1, i + 1);
147 int j0 = max(0, j - 1);
148 int j1 = min(ly - 1, j + 1);
149 int k0 = max(0, k - 1);
150 int k1 = min(lz - 1, k + 1);
151
152 // Neighbor check
153 for (int kk = k0; kk <= k1; ++kk) {
154 for (int jj = j0; jj <= j1; ++jj) {
155 for (int ii = i0; ii <= i1; ++ii) {
156 if (ii == i && jj == j && kk == k) continue;
157
158 int idx2 = ii + jj * lx + kk * lxy + e_offset;
159 T x2 = x[idx2];
160 T y2 = y[idx2];
161 T z2 = z[idx2];
162 T d2 = d[idx2];
163
164 T dist = sqrt((x1 - x2)*(x1 - x2) +
165 (y1 - y2)*(y1 - y2) +
166 (z1 - z2)*(z1 - z2));
167 T dtmp = d2 + dist;
168
169 if (dtmp < d1) {
170 d1 = dtmp;
171 d[idx1] = d1; // Update locally
172 changed_local = true;
173 }
174 }
175 }
176 }
177 }
178 }
179 }
181 iter++;
182 }
183
185 atomicAdd(nchange, 1);
186 }
187}
188
189#endif // __COMMON_ALE_KINEMATICS_KERNEL_H__
__global__ void compute_cheap_dist_kernel(T *__restrict__ d, const T *__restrict__ x, const T *__restrict__ y, const T *__restrict__ z, const int lx, const int ly, const int lz, const int nel, const int local_iters, int *__restrict__ nchange)
__global__ void ale_add_kinematics_kernel(const int n, T *__restrict__ wx, T *__restrict__ wy, T *__restrict__ wz, const T *__restrict__ x_ref, const T *__restrict__ y_ref, const T *__restrict__ z_ref, const T *__restrict__ phi, const T *__restrict__ x, const T *__restrict__ y, const T *__restrict__ z, const kinematics_params_t kin_params)
const int i
const int e
const int j
__global__ void const T *__restrict__ x
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ cz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ cy
double real
#define max(a, b)
Definition tensor.cu:40