1 #ifndef __MATH_LAMBDA2_KERNEL_H__
2 #define __MATH_LAMBDA2_KERNEL_H__
45 T grad21, T grad22, T grad23,
46 T grad31, T grad32, T grad33) {
50 T s12 = 0.5*(grad12+grad21);
51 T s13 = 0.5*(grad13+grad31);
52 T s23 = 0.5*(grad23+grad32);
54 T o12 = 0.5*(grad12-grad21);
55 T o13 = 0.5*(grad13-grad31);
56 T o23 = 0.5*(grad23-grad32);
58 T a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13;
59 T a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23;
60 T a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23;
62 T a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23;
63 T a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13;
64 T a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23;
67 T B = -(a11 + a22 + a33);
68 T C = -(a12*a12 + a13*a13 + a23*a23 - a11 * a22 - a11 * a33 - a22 * a33);
69 T D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 - a22 * a13*a13
70 - a33 * a12*a12 + a11 * a22 * a33);
73 T q = (3.0 * C - B*B) / 9.0;
74 T r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0;
76 T theta = acos( r / sqrt(-q*q*q) );
78 T eigen1 = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0;
79 T eigen2 = 2.0 * sqrt(-q) * cos((theta + 2.0 *
pi) / 3.0) - B / 3.0;
80 T eigen3 = 2.0 * sqrt(-q) * cos((theta + 4.0 *
pi) / 3.0) - B / 3.0;
82 if (eigen1 <= eigen2 && eigen2 <= eigen3)
84 else if (eigen3 <= eigen2 && eigen2 <= eigen1)
86 else if (eigen1 <= eigen3 && eigen3 <= eigen2)
88 else if (eigen2 <= eigen3 && eigen3 <= eigen1)
90 else if (eigen2 <= eigen1 && eigen1 <= eigen3)
92 else if (eigen3 <= eigen1 && eigen1 <= eigen2)
98 template<
typename T, const
int LX, const
int CHUNKS >
100 const T * __restrict__
u,
101 const T * __restrict__
v,
102 const T * __restrict__
w,
103 const T * __restrict__
dx,
104 const T * __restrict__
dy,
105 const T * __restrict__
dz,
106 const T * __restrict__
drdx,
107 const T * __restrict__
dsdx,
108 const T * __restrict__
dtdx,
109 const T * __restrict__
drdy,
110 const T * __restrict__
dsdy,
111 const T * __restrict__
dtdy,
112 const T * __restrict__
drdz,
113 const T * __restrict__
dsdz,
114 const T * __restrict__
dtdz,
115 const T * __restrict__
jacinv) {
117 __shared__ T
shu[LX * LX * LX];
118 __shared__ T
shv[LX * LX * LX];
119 __shared__ T
shw[LX * LX * LX];
121 __shared__ T
shdx[LX * LX];
122 __shared__ T
shdy[LX * LX];
123 __shared__ T
shdz[LX * LX];
130 const int e = blockIdx.x;
131 const int ele = blockIdx.x*LX*LX*LX;
132 const int iii = threadIdx.x;
133 const int nchunks = (LX * LX * LX - 1) / CHUNKS + 1;
136 if (iii < (LX * LX)) {
143 while(
j < (LX * LX * LX)) {
152 for (
int n = 0; n < nchunks; n++) {
153 const int ijk = iii + n * CHUNKS;
154 const int jk = ijk / LX;
158 if (
i < LX &&
j < LX && k < LX ) {
170 for (
int l = 0; l < LX; l++) {
171 rtmpu +=
shdx[
i + l * LX] *
shu[l +
j * LX + k * LX * LX];
172 stmpu +=
shdy[
j + l * LX] *
shu[
i + l * LX + k * LX * LX];
173 ttmpu +=
shdz[k + l * LX] *
shu[
i +
j * LX + l * LX * LX];
175 rtmpv +=
shdx[
i + l * LX] *
shv[l +
j * LX + k * LX * LX];
176 stmpv +=
shdy[
j + l * LX] *
shv[
i + l * LX + k * LX * LX];
177 ttmpv +=
shdz[k + l * LX] *
shv[
i +
j * LX + l * LX * LX];
179 rtmpw +=
shdx[
i + l * LX] *
shw[l +
j * LX + k * LX * LX];
180 stmpw +=
shdy[
j + l * LX] *
shw[
i + l * LX + k * LX * LX];
181 ttmpw +=
shdz[k + l * LX] *
shw[
i +
j * LX + l * LX * LX];
231 lambda2[ijk +
e*LX*LX*LX] = eigen_val_calc<T>( grad11, grad12, grad13,
232 grad21, grad22, grad23,
233 grad31, grad32, grad33);
239 template<
typename T, const
int LX >
241 lambda2_kernel_kstep(T * __restrict__
lambda2,
242 const T * __restrict__
u,
243 const T * __restrict__
v,
244 const T * __restrict__
w,
245 const T * __restrict__
dx,
246 const T * __restrict__
dy,
247 const T * __restrict__
dz,
259 __shared__ T
shu[LX * LX];
260 __shared__ T
shv[LX * LX];
261 __shared__ T
shw[LX * LX];
267 const int e = blockIdx.x;
268 const int j = threadIdx.y;
269 const int i = threadIdx.x;
282 for (
int k = 0; k < LX; ++k) {
291 for (
int k = 0; k < LX; ++k) {
292 const int ijk =
ij + k*LX*LX;
300 for (
int l = 0; l < LX; l++) {
301 ttmpu +=
shdz[k+l*LX] *
ru[l];
302 ttmpv +=
shdz[k+l*LX] *
rv[l];
303 ttmpw +=
shdz[k+l*LX] *
rw[l];
314 for (
int l = 0; l < LX; l++) {
323 T grad11 = jinv * (
drdx[ijk +
ele] * rtmpu
327 T grad12 = jinv * (
drdy[ijk +
ele] * rtmpu
331 T grad13 = jinv * (
drdz[ijk +
ele] * rtmpu
334 T grad21 = jinv * (
drdx[ijk +
ele] * rtmpv
338 T grad22 = jinv * (
drdy[ijk +
ele] * rtmpv
342 T grad23 = jinv * (
drdz[ijk +
ele] * rtmpv
345 T grad31 = jinv * (
drdx[ijk +
ele] * rtmpw
349 T grad32 = jinv * (
drdy[ijk +
ele] * rtmpw
353 T grad33 = jinv * (
drdz[ijk +
ele] * rtmpw
356 lambda2[ijk +
ele] = eigen_val_calc<T>( grad11, grad12, grad13,
357 grad21, grad22, grad23,
358 grad31, grad32, grad33);
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void lambda2_kernel_1d(T *__restrict__ lambda2, const T *__restrict__ u, const T *__restrict__ v, const T *__restrict__ w, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ drdx, const T *__restrict__ dsdx, const T *__restrict__ dtdx, const T *__restrict__ drdy, const T *__restrict__ dsdy, const T *__restrict__ dtdy, const T *__restrict__ drdz, const T *__restrict__ dsdz, const T *__restrict__ dtdz, const T *__restrict__ jacinv)
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__shared__ T shdz[LX *LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void __launch_bounds__(LX *LX, 3) lambda2_kernel_kstep(T *__restrict__ lambda2
__shared__ T shdy[LX *LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__inline__ __device__ T eigen_val_calc(T grad11, T grad12, T grad13, T grad21, T grad22, T grad23, T grad31, T grad32, T grad33)
__shared__ T shdx[LX *LX]
__global__ void const T *__restrict__ u
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void const T *__restrict__ const T *__restrict__ v
A simulation component that computes lambda2 The values are stored in the field registry under the na...
real(kind=rp), parameter, public pi