48               void *
vx, 
void *
vy, 
void *
vz,
 
   49               void *
dx, 
void *
dy, 
void *
dz,
 
   53               void *
jacinv, 
int *nel, 
int *gdim, 
int *lx);
 
   61                  void *
vx, 
void *
vy, 
void *
vz,
 
   62                  void *
dx, 
void *
dy, 
void *
dz,
 
   66                  void *
jacinv, 
int *nel, 
int *gdim, 
int *lx) {
 
   76    conv1_kernel_1d<real, LX, 1024>                                             \ 
   77      <<<nblcks, nthrds_1d, 0, stream>>>                                        \ 
   78      ((real *) du, (real *) u,                                                 \ 
   79       (real *) vx, (real *) vy, (real *) vz,                                   \ 
   80       (real *) dx, (real *) dy, (real *) dz,                                   \ 
   81       (real *) drdx, (real *) dsdx, (real *) dtdx,                             \ 
   82       (real *) drdy, (real *) dsdy, (real *) dtdy,                             \ 
   83       (real *) drdz, (real *) dsdz, (real *) dtdz,                             \ 
   85    CUDA_CHECK(cudaGetLastError()); 
   87#define CASE_KSTEP(LX)                                                          \ 
   88    conv1_kernel_kstep<real, LX>                                                \ 
   89      <<<nblcks, nthrds_kstep, 0, stream>>>                                     \ 
   90      ((real *) du, (real *) u,                                                 \ 
   91       (real *) vx, (real *) vy, (real *) vz,                                   \ 
   92       (real *) dx, (real *) dy, (real *) dz,                                   \ 
   93       (real *) drdx, (real *) dsdx, (real *) dtdx,                             \ 
   94       (real *) drdy, (real *) dsdy, (real *) dtdy,                             \ 
   95       (real *) drdz, (real *) dsdz, (real *) dtdz,                             \ 
   97    CUDA_CHECK(cudaGetLastError());                                            
  101      if(autotune[LX] == 0 ) {                                                  \ 
  102        autotune[LX]=tune_conv1<LX>(du, u,                                      \ 
  108                                    jacinv, nel, gdim, lx);                     \ 
  109      } else if (autotune[LX] == 1 ) {                                          \ 
  111      } else if (autotune[LX] == 2 ) {                                          \ 
  116#define CASE_LARGE(LX)                                                          \ 
 
  158template < const 
int LX >
 
  160               void *
vx, 
void *
vy, 
void *
vz,
 
  161               void *
dx, 
void *
dy, 
void *
dz,
 
  165               void *
jacinv, 
int *nel, 
int *gdim, 
int *lx) {
 
  207  for(
int i = 0; 
i < 100; 
i++) {
 
  217  for(
int i = 0; 
i < 100; 
i++) {
 
  232          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ vz
 
__global__ void const T *__restrict__ const T *__restrict__ vx
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ vy
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)
 
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
 
int tune_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)