1 #ifndef __MATH_SCHWARZ_KERNEL_H__
2 #define __MATH_SCHWARZ_KERNEL_H__
45 template<
typename T, const
int NX>
53 const int idx = threadIdx.x;
54 const int el = blockIdx.x*NX*NX*NX;
55 const int x = idx%(NX-2) + 1;
56 const int y = idx/(NX-2) + 1;
59 idx1 = l1 +
x*NX + y*NX*NX + el;
60 idx2 = l2 +
x*NX + y*NX*NX + el;
61 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
63 idx1 = (NX-1-l1) +
x*NX + y*NX*NX + el;
64 idx2 = (NX-1-l2) +
x*NX + y*NX*NX + el;
65 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
69 idx1 =
x + l1*NX + y*NX*NX + el;
70 idx2 =
x + l2*NX + y*NX*NX + el;
71 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
73 idx1 =
x + (NX-1-l1)*NX + y*NX*NX + el;
74 idx2 =
x + (NX-1-l2)*NX + y*NX*NX + el;
75 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
79 idx1 =
x + y*NX + l1*NX*NX + el;
80 idx2 =
x + y*NX + l2*NX*NX + el;
81 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
83 idx1 =
x + y*NX + (NX-1-l1)*NX*NX + el;
84 idx2 =
x + y*NX + (NX-1-l2)*NX*NX + el;
85 a1[idx1] = f1*a1[idx1] + f2*a2[idx2];
97 const int idx = threadIdx.x;
99 const int el2 = blockIdx.x*nx2*nx2*nx2;
100 const int el = blockIdx.x*nx*nx*nx;
101 for(
int i = idx;
i<nx2*nx2*nx2;
i+=blockDim.x){
105 for(
int ijk = idx; ijk<nx*nx*nx; ijk+=blockDim.x){
106 const int jk = ijk / nx;
107 const int i = ijk - jk * nx;
108 const int k = jk / nx;
109 const int j = jk - k * nx;
110 a[(
i+1)+(
j+1)*nx2+(k+1)*nx2*nx2+el2] = b[ijk+el];
114 template<
typename T>
119 const int idx = threadIdx.x;
120 const int nx2 = nx+2;
121 const int el2 = blockIdx.x*nx2*nx2*nx2;
122 const int el = blockIdx.x*nx*nx*nx;
123 for(
int ijk = idx; ijk<nx*nx*nx; ijk+=blockDim.x){
124 const int jk = ijk / nx;
125 const int i = ijk - jk * nx;
126 const int k = jk / nx;
127 const int j = jk - k * nx;
128 b[ijk+el] = a[(
i+1)+(
j+1)*nx2+(k+1)*nx2*nx2+el2];
__global__ void const T *__restrict__ x
__global__ void schwarz_extrude_kernel(T *a1, const int l1, const T f1, T *a2, const int l2, const T f2)
__global__ void schwarz_toreg3d_kernel(T *__restrict__ b, T *__restrict__ a, const int nx)
__global__ void schwarz_toext3d_kernel(T *__restrict__ a, T *__restrict__ b, const int nx)