48 void *
dr,
void *
ds,
void *
dt,
50 void *
w3,
int *nel,
int *lx);
58 void *
dr,
void *
ds,
void *
dt,
60 void *
w3,
int *nel,
int *lx) {
70 cdtp_kernel_1d<real, LX, 1024> \
71 <<<nblcks, nthrds_1d, 0, stream>>>((real *) dtx, (real *) x, \
72 (real *) dr, (real *) ds, (real *) dt, \
73 (real *) dxt, (real *) dyt, (real *) dzt, \
75 CUDA_CHECK(cudaGetLastError());
77#define CASE_KSTEP(LX) \
78 cdtp_kernel_kstep<real, LX> \
79 <<<nblcks, nthrds_kstep, 0, stream>>>((real *) dtx, (real *) x, \
80 (real *) dr, (real *) ds, (real *) dt, \
81 (real *) dxt, (real *) dyt, (real *) dzt, \
83 CUDA_CHECK(cudaGetLastError());
87 if(autotune[LX] == 0 ) { \
88 autotune[LX]=tune_cdtp<LX>(dtx, x, \
92 } else if (autotune[LX] == 1 ) { \
94 } else if (autotune[LX] == 2 ) { \
99#define CASE_LARGE(LX) \
141template < const
int LX >
143 void *
dr,
void *
ds,
void *
dt,
145 void *
w3,
int *nel,
int *lx) {
187 for(
int i = 0;
i < 100;
i++) {
197 for(
int i = 0;
i < 100;
i++) {
212 (
retval > 1 ?
"KSTEP" :
"1D"));
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
int tune_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)