Neko  0.9.0
A portable framework for high-order spectral element flow simulations
sigma_nut_kernel.h
Go to the documentation of this file.
1 #ifndef __COMMON_SIGMA_NUT_KERNEL_H__
2 #define __COMMON_SIGMA_NUT_KERNEL_H__
3 /*
4  Copyright (c) 2024, The Neko Authors
5  All rights reserved.
6 
7  Redistribution and use in source and binary forms, with or without
8  modification, are permitted provided that the following conditions
9  are met:
10 
11  * Redistributions of source code must retain the above copyright
12  notice, this list of conditions and the following disclaimer.
13 
14  * Redistributions in binary form must reproduce the above
15  copyright notice, this list of conditions and the following
16  disclaimer in the documentation and/or other materials provided
17  with the distribution.
18 
19  * Neither the name of the authors nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  POSSIBILITY OF SUCH DAMAGE.
35 */
36 
40 #include <cmath>
41 #include <algorithm>
42 template< typename T>
43 __global__ void sigma_nut_compute(T * __restrict__ g11,
44  T * __restrict__ g12,
45  T * __restrict__ g13,
46  T * __restrict__ g21,
47  T * __restrict__ g22,
48  T * __restrict__ g23,
49  T * __restrict__ g31,
50  T * __restrict__ g32,
51  T * __restrict__ g33,
52  T * __restrict__ delta,
53  T * __restrict__ nut,
54  T * __restrict__ mult,
55  const T c,
56  const T eps,
57  const int n){
58 
59  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
60  const int str = blockDim.x * gridDim.x;
61 
62  for (int i = idx; i < n; i += str) {
63  T sigG11, sigG12, sigG13, sigG22, sigG23, sigG33;
64  T sigma1, sigma2, sigma3;
65  T Invariant1, Invariant2, Invariant3;
66  T alpha1, alpha2, alpha3;
67  T Dsigma;
68  T pi_3;
69  T tmp1;
70 
71  pi_3 = 4.0/3.0*atan(1.0);
72 
73  g11[i] = g11[i] * mult[i];
74  g12[i] = g12[i] * mult[i];
75  g13[i] = g13[i] * mult[i];
76  g21[i] = g21[i] * mult[i];
77  g22[i] = g22[i] * mult[i];
78  g23[i] = g23[i] * mult[i];
79  g31[i] = g31[i] * mult[i];
80  g32[i] = g32[i] * mult[i];
81  g33[i] = g33[i] * mult[i];
82 
83  sigG11 = g11[i]*g11[i] + g21[i]*g21[i] + g31[i]*g31[i];
84  sigG22 = g12[i]*g12[i] + g22[i]*g22[i] + g32[i]*g32[i];
85  sigG33 = g13[i]*g13[i] + g23[i]*g23[i] + g33[i]*g33[i];
86  sigG12 = g11[i]*g12[i] + \
87  g21[i]*g22[i] + \
88  g31[i]*g32[i];
89  sigG13 = g11[i]*g13[i] + \
90  g21[i]*g23[i] + \
91  g31[i]*g33[i];
92  sigG23 = g12[i]*g13[i] + \
93  g22[i]*g23[i] + \
94  g32[i]*g33[i];
95 
96  if (abs(sigG11) < eps) {
97  sigG11 = 0.0;
98  }
99  if (abs(sigG12) < eps) {
100  sigG12 = 0.0;
101  }
102  if (abs(sigG13) < eps) {
103  sigG13 = 0.0;
104  }
105  if (abs(sigG22) < eps) {
106  sigG22 = 0.0;
107  }
108  if (abs(sigG23) < eps) {
109  sigG23 = 0.0;
110  }
111  if (abs(sigG33) < eps) {
112  sigG33 = 0.0;
113  }
114 
115  if (abs(sigG12*sigG12 + \
116  sigG13*sigG13 + sigG23*sigG23) < eps) {
117  sigma1 = sqrt(max(max(max(sigG11, sigG22), sigG33), 0.0));
118  sigma3 = sqrt(max(min(min(sigG11, sigG22), sigG33), 0.0));
119  Invariant1 = sigG11 + sigG22 + sigG33;
120  sigma2 = sqrt(abs(Invariant1 - sigma1*sigma1 - sigma3*sigma3));
121  } else {
122  Invariant1 = sigG11 + sigG22 + sigG33;
123  Invariant2 = sigG11*sigG22 + sigG11*sigG33 + sigG22*sigG33 - \
124  (sigG12*sigG12 + sigG13*sigG13 + sigG23*sigG23);
125  Invariant3 = sigG11*sigG22*sigG33 + \
126  2.0*sigG12*sigG13*sigG23 - \
127  (sigG11*sigG23*sigG23 + sigG22*sigG13*sigG13 + \
128  sigG33*sigG12*sigG12);
129 
130  Invariant1 = max(Invariant1, 0.0);
131  Invariant2 = max(Invariant2, 0.0);
132  Invariant3 = max(Invariant3, 0.0);
133 
134  alpha1 = Invariant1*Invariant1/9.0 - Invariant2/3.0;
135 
136  alpha1 = max(alpha1, 0.0);
137 
138  alpha2 = Invariant1*Invariant1*Invariant1/27.0 - \
139  Invariant1*Invariant2/6.0 + Invariant3/2.0;
140 
141  tmp1 = alpha2/sqrt(alpha1 * alpha1 * alpha1);
142 
143  if (tmp1 <= -1.0) {
144  sigma1 = sqrt(max(Invariant1/3.0 + sqrt(alpha1), 0.0));
145  sigma2 = sigma1;
146  sigma3 = sqrt(Invariant1/3.0 - 2.0*sqrt(alpha1));
147 
148  } else if (tmp1 >= 1.0) {
149  sigma1 = sqrt(max(Invariant1/3.0 + 2.0*sqrt(alpha1), 0.0));
150  sigma2 = sqrt(Invariant1/3.0 - sqrt(alpha1));
151  sigma3 = sigma2;
152 
153  } else {
154  alpha3 = acos(tmp1)/3.0;
155 
156  if (abs(Invariant3) < eps) {
157  sigma1 = sqrt(max(Invariant1/3.0 + \
158  2.0*sqrt(alpha1)*cos(alpha3), 0.0));
159  sigma2 = sqrt(abs(Invariant1 - sigma1*sigma1));
160  sigma3 = 0.0;
161  } else {
162  sigma1 = sqrt(max(Invariant1/3.0 + \
163  2.0*sqrt(alpha1)*cos(alpha3), 0.0));
164  sigma2 = sqrt(Invariant1/3.0 - \
165  2.0*sqrt(alpha1)*cos(pi_3 + alpha3));
166  sigma3 = sqrt(abs(Invariant1 - \
167  sigma1*sigma1-sigma2*sigma2));
168  }
169  }
170  }
171 
172  if (sigma1 > 0.0) {
173  Dsigma = sigma3*(sigma1 - sigma2)*(sigma2 - sigma3)/(sigma1*sigma1);
174  } else {
175  Dsigma = 0.0;
176  }
177 
178  Dsigma = max(Dsigma, 0.0);
179 
180  nut[i] = (c*delta[i])*(c*delta[i]) * Dsigma;
181  }
182 }
183 #endif // __COMMON_SIGMA_NUT_KERNEL_H__
const int i
__global__ void sigma_nut_compute(T *__restrict__ g11, T *__restrict__ g12, T *__restrict__ g13, T *__restrict__ g21, T *__restrict__ g22, T *__restrict__ g23, T *__restrict__ g31, T *__restrict__ g32, T *__restrict__ g33, T *__restrict__ delta, T *__restrict__ nut, T *__restrict__ mult, const T c, const T eps, const int n)
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11
#define max(a, b)
Definition: tensor.cu:40