1 #ifndef __COMMON_VREMAN_NUT_KERNEL_H__
2 #define __COMMON_VREMAN_NUT_KERNEL_H__
52 T * __restrict__ delta,
54 T * __restrict__ mult,
59 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
60 const int str = blockDim.x * gridDim.x;
62 for (
int i = idx;
i < n;
i += str) {
63 T beta11, beta22, beta33;
64 T beta12, beta13, beta23;
67 a11[
i] = a11[
i] * mult[
i];
68 a12[
i] = a12[
i] * mult[
i];
69 a13[
i] = a13[
i] * mult[
i];
70 a21[
i] = a21[
i] * mult[
i];
71 a22[
i] = a22[
i] * mult[
i];
72 a23[
i] = a23[
i] * mult[
i];
73 a31[
i] = a31[
i] * mult[
i];
74 a32[
i] = a32[
i] * mult[
i];
75 a33[
i] = a33[
i] * mult[
i];
77 beta11 = a11[
i]*a11[
i] + a21[
i]*a21[
i] + a31[
i]*a31[
i];
78 beta22 = a12[
i]*a12[
i] + a22[
i]*a22[
i] + a32[
i]*a32[
i];
79 beta33 = a13[
i]*a13[
i] + a23[
i]*a23[
i] + a33[
i]*a33[
i];
80 beta12 = a11[
i]*a12[
i] + a21[
i]*a22[
i] + a31[
i]*a32[
i];
81 beta13 = a11[
i]*a13[
i] + a21[
i]*a23[
i] + a31[
i]*a33[
i];
82 beta23 = a12[
i]*a13[
i] + a22[
i]*a23[
i] + a32[
i]*a33[
i];
84 b_beta = beta11*beta22 - beta12*beta12
85 + beta11*beta33 - beta13*beta13
86 + beta22*beta33 - beta23*beta23;
88 b_beta =
max(0.0, b_beta);
90 aijaij = beta11 + beta22 + beta33;
92 nut[
i] = c * delta[
i] * delta[
i]
93 * sqrt(b_beta / (aijaij + eps));
__global__ void vreman_nut_compute(T *__restrict__ a11, T *__restrict__ a12, T *__restrict__ a13, T *__restrict__ a21, T *__restrict__ a22, T *__restrict__ a23, T *__restrict__ a31, T *__restrict__ a32, T *__restrict__ a33, T *__restrict__ delta, T *__restrict__ nut, T *__restrict__ mult, const T c, const T eps, const int n)