48 void *
cx,
void *
cy,
void *
cz,
52 void *
w3,
int *nel,
int *lx);
60 void *
cx,
void *
cy,
void *
cz,
64 void *
w3,
int *nel,
int *lx) {
74 set_convect_rst_kernel_1d<real, LX, 1024> \
75 <<<nblcks, nthrds_1d, 0, stream>>> \
76 ((real *) cr, (real *) cs, (real *) ct, \
77 (real *) cx, (real *) cy, (real *) cz, \
78 (real *) drdx, (real *) dsdx, (real *) dtdx, \
79 (real *) drdy, (real *) dsdy, (real *) dtdy, \
80 (real *) drdz, (real *) dsdz, (real *) dtdz, \
82 CUDA_CHECK(cudaGetLastError());
85#define CASE_KSTEP(LX) \
86 set_convect_rst_kernel_kstep<real, LX> <<<nblcks, nthrds_kstep, 0, stream>>> \
87 ((real *) cr, (real *) cs, (real *) ct, \
88 (real *) cx, (real *) cy, (real *) cz, \
89 (real *) drdx, (real *) dsdx, (real *) dtdx, \
90 (real *) drdy, (real *) dsdy, (real *) dtdy, \
91 (real *) drdz, (real *) dsdz, (real *) dtdz, \
93 CUDA_CHECK(cudaGetLastError());
97 if(autotune[LX] == 0 ) { \
98 autotune[LX]=tune_set_convect_rst<LX>(cr, cs, ct, \
104 } else if (autotune[LX] == 1 ) { \
106 } else if (autotune[LX] == 2 ) { \
136template < const
int LX >
138 void *
cx,
void *
cy,
void *
cz,
142 void *
w3,
int *nel,
int *lx) {
184 for(
int i = 0;
i < 100;
i++) {
194 for(
int i = 0;
i < 100;
i++) {
209 (
retval > 1 ?
"KSTEP" :
"1D"));
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void const T *__restrict__ const T *__restrict__ cr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ cs
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ ct
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ cz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ cx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ cy
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
int tune_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)