48               void *
cr, 
void *
cs, 
void *
ct,
 
   49               void *
dx, 
void *
dy, 
void *
dz,
 
   58                  void *
cr, 
void *
cs, 
void *
ct,
 
   59                  void *
dx, 
void *
dy, 
void *
dz,
 
   70    convect_scalar_kernel_1d<real, LX, 1024>                                    \ 
   71      <<<nblcks, nthrds_1d, 0, stream>>>                                        \ 
   72      ((real *) du, (real *) u,                                                 \ 
   73       (real *) cr, (real *) cs, (real *) ct,                                   \ 
   74       (real *) dx, (real *) dy, (real *) dz);                                  \ 
   75    CUDA_CHECK(cudaGetLastError()); 
   77#define CASE_KSTEP(LX)                                                          \ 
   78    convect_scalar_kernel_kstep<real, LX>                                       \ 
   79      <<<nblcks, nthrds_kstep, 0, stream>>>                                     \ 
   80      ((real *) du, (real *) u,                                                 \ 
   81       (real *) cr, (real *) cs, (real *) ct,                                   \ 
   82       (real *) dx, (real *) dy, (real *) dz);                                  \ 
   83    CUDA_CHECK(cudaGetLastError()); 
   87      if(autotune[LX] == 0 ) {                                                  \ 
   88        autotune[LX]=tune_convect_scalar<LX>(du, u,                             \ 
   92      } else if (autotune[LX] == 1 ) {                                          \ 
   94      } else if (autotune[LX] == 2 ) {                                          \ 
   99#define CASE_LARGE(LX)                                                          \ 
 
  141template < const 
int LX >
 
  143               void *
cr, 
void *
cs, 
void *
ct,
 
  144               void *
dx, 
void *
dy, 
void *
dz,
 
  187  for(
int i = 0; 
i < 100; 
i++) {
 
  197  for(
int i = 0; 
i < 100; 
i++) {
 
  212          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void const T *__restrict__ const T *__restrict__ cr
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ cs
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ ct
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)
 
int tune_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
 
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)