1 #ifndef __MATH_SCHWARZ_KERNEL_H__
2 #define __MATH_SCHWARZ_KERNEL_H__
47 template<
typename T, const
int NX>
55 __shared__ T x1[NX*NX],x2[NX*NX];
56 __shared__ T y1[NX*NX],y2[NX*NX];
57 __shared__ T z1[NX*NX],z2[NX*NX];
58 const int idx = threadIdx.x;
59 const int el = blockIdx.x*NX*NX*NX;
64 int idx_x1 = l2 +
i*NX + k*NX*NX + el;
66 int idx_x2 = NX-1-l2 +
i*NX + k*NX*NX + el;
69 int idx_y1 =
i + l2*NX + k*NX*NX + el;
71 int idx_y2 =
i + (NX-1-l2)*NX + k*NX*NX + el;
74 int idx_z1 =
i + k*NX + l2*NX*NX + el;
76 int idx_z2 =
i + k*NX + (NX-l2-1)*NX*NX + el;
81 for(
int ijk = idx; ijk<NX*NX*NX; ijk+=blockDim.x){
86 if(
j>0 && j< NX-1 && k > 0 && k < NX -1){
87 int idx1 =
i +
j*NX + k*NX*NX + el;
90 a1[idx1] = f1*a1[idx1] + f2*x1[idx2];
94 a1[idx1] = f1*a1[idx1] + f2*x2[idx2];
97 if(
i > 0 && i < NX-1 && k > 0 && k < NX -1){
98 int idx1 =
i +
j*NX + k*NX*NX + el;
101 a1[idx1] = f1*a1[idx1] + f2*y1[idx2];
105 a1[idx1] = f1*a1[idx1] + f2*y2[idx2];
108 if(
i > 0 && i < NX-1 && j>0 &&
j< NX-1 ){
109 int idx1 =
i +
j*NX + k*NX*NX + el;
112 a1[idx1] = f1*a1[idx1] + f2*z1[idx2];
116 a1[idx1] = f1*a1[idx1] + f2*z2[idx2];
125 template<
typename T>
130 const int idx = threadIdx.x;
131 const int nx2 = nx+2;
132 const int el2 = blockIdx.x*nx2*nx2*nx2;
133 const int el = blockIdx.x*nx*nx*nx;
134 for(
int i = idx;
i<nx2*nx2*nx2;
i+=blockDim.x){
138 for(
int ijk = idx; ijk<nx*nx*nx; ijk+=blockDim.x){
139 const int jk = ijk / nx;
140 const int i = ijk - jk * nx;
141 const int k = jk / nx;
142 const int j = jk - k * nx;
143 a[(
i+1)+(
j+1)*nx2+(k+1)*nx2*nx2+el2] = b[ijk+el];
147 template<
typename T>
152 const int idx = threadIdx.x;
153 const int nx2 = nx+2;
154 const int el2 = blockIdx.x*nx2*nx2*nx2;
155 const int el = blockIdx.x*nx*nx*nx;
156 for(
int ijk = idx; ijk<nx*nx*nx; ijk+=blockDim.x){
157 const int jk = ijk / nx;
158 const int i = ijk - jk * nx;
159 const int k = jk / nx;
160 const int j = jk - k * nx;
161 b[ijk+el] = a[(
i+1)+(
j+1)*nx2+(k+1)*nx2*nx2+el2];
__global__ void schwarz_extrude_kernel(T *a1, const int l1, const T f1, T *a2, const int l2, const T f2)
__global__ void schwarz_toreg3d_kernel(T *__restrict__ b, T *__restrict__ a, const int nx)
__global__ void schwarz_toext3d_kernel(T *__restrict__ a, T *__restrict__ b, const int nx)