1 #ifndef __COMMON_GRADIENT_JUMP_PENALTY_KERNEL_H__
2 #define __COMMON_GRADIENT_JUMP_PENALTY_KERNEL_H__
45 const int idx = threadIdx.x;
47 const int el2 = blockIdx.x*nx2*nx2*nx2;
48 const int el = blockIdx.x*nx*nx*nx;
49 for(
int ijk = idx; ijk < nx*nx*nx; ijk += blockDim.x){
50 const int jk = ijk / nx;
51 const int i = ijk - jk * nx;
52 const int k = jk / nx;
53 const int j = jk - k * nx;
55 b[0+(
j+1)*nx2+(k+1)*nx2*nx2+el2] = a[ijk+el];
58 b[nx2-1+(
j+1)*nx2+(k+1)*nx2*nx2+el2] = a[ijk+el];
61 b[(
i+1)+0*nx2+(k+1)*nx2*nx2+el2] = a[ijk+el];
64 b[(
i+1)+(nx2-1)*nx2+(k+1)*nx2*nx2+el2] = a[ijk+el];
67 b[(
i+1)+(
j+1)*nx2+0*nx2*nx2+el2] = a[ijk+el];
70 b[(
i+1)+(
j+1)*nx2+(nx2-1)*nx2*nx2+el2] = a[ijk+el];
80 T *__restrict__ penalty_facet_d,
81 T *__restrict__ dphidxi_d,
84 const int idx = threadIdx.x;
86 const int el2 = blockIdx.x*nx2*nx2*nx2;
87 const int el = blockIdx.x*nx*nx*nx;
88 for(
int ijk = idx; ijk < nx*nx*nx; ijk += blockDim.x){
89 const int jk = ijk / nx;
90 const int i = ijk - jk * nx;
91 const int k = jk / nx;
92 const int j = jk - k * nx;
93 penalty_d[ijk+el] = penalty_facet_d[0+(
j+1)*nx2+(k+1)*nx2*nx2+el2] \
94 * dphidxi_d[0+
i*nx] + \
95 penalty_facet_d[(nx2-1)+(
j+1)*nx2+(k+1)*nx2*nx2+el2] \
96 * dphidxi_d[nx-1+
i*nx] + \
97 penalty_facet_d[(
i+1)+0*nx2+(k+1)*nx2*nx2+el2] \
98 * dphidxi_d[0+
j*nx] + \
99 penalty_facet_d[(
i+1)+(nx2-1)*nx2+(k+1)*nx2*nx2+el2] \
100 * dphidxi_d[nx-1+
j*nx] + \
101 penalty_facet_d[(
i+1)+(
j+1)*nx2+0*nx2*nx2+el2] \
102 * dphidxi_d[0+k*nx] + \
103 penalty_facet_d[(
i+1)+(
j+1)*nx2+(nx2-1)*nx2*nx2+el2] \
104 * dphidxi_d[nx-1+k*nx];
__global__ void gradient_jump_penalty_finalize_kernel(T *__restrict__ penalty_d, T *__restrict__ penalty_facet_d, T *__restrict__ dphidxi_d, const int nx)
__global__ void pick_facet_value_hex_kernel(T *__restrict__ b, T *__restrict__ a, const int nx)