50         void *
g13, 
void *
g23, 
int *nelv, 
int *lx);
 
   56                void *
g13, 
void *
g23, 
int *nelv, 
int *lx);
 
   66                    void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
   80    ax_helm_kernel_1d<real, LX, 1024>                                           \ 
   81      <<<nblcks_1d, nthrds_1d, 0, stream>>>((real *) w, (real *) u,             \ 
   82                          (real *) dx, (real *) dy, (real *) dz,                \ 
   83                          (real *) dxt, (real *) dyt, (real *) dzt, (real *) h1,\ 
   84                          (real *) g11, (real *) g22, (real *) g33,             \ 
   85                          (real *) g12, (real *) g13, (real *) g23);            \ 
   86      CUDA_CHECK(cudaGetLastError()); 
   88#define CASE_KSTEP(LX)                                                          \ 
   89    ax_helm_kernel_kstep<real, LX>                                              \ 
   90      <<<nblcks_kstep, nthrds_kstep, 0, stream>>>((real *) w, (real *) u,       \ 
   91                           (real *) dx, (real *) dy, (real *) dz, (real *) h1,  \ 
   92                           (real *) g11, (real *) g22, (real *) g33,            \ 
   93                           (real *) g12, (real *) g13, (real *) g23);           \ 
   94      CUDA_CHECK(cudaGetLastError()); 
   96#define CASE_KSTEP_PADDED(LX)                                                   \ 
   97    ax_helm_kernel_kstep_padded<real, LX>                                       \ 
   98    <<<nblcks_kstep, nthrds_kstep, 0, stream>>>((real *) w, (real *) u,         \ 
   99                           (real *) dx, (real *) dy, (real *) dz, (real *) h1,  \ 
  100                           (real *) g11, (real *) g22, (real *) g33,            \ 
  101                           (real *) g12, (real *) g13, (real *) g23);           \ 
  102      CUDA_CHECK(cudaGetLastError()); 
  106      if(autotune[LX] == 0 ) {                                                  \ 
  107        autotune[LX]=tune<LX>( w,  u,                                           \ 
  111                               g12, g13, g23, nelv, lx);                        \ 
  112      } else if (autotune[LX] == 1 ) {                                          \ 
  114      } else if (autotune[LX] == 2 ) {                                          \ 
  120#define CASE_PADDED(LX)                                                         \ 
  122      if(autotune[LX] == 0 ) {                                                  \ 
  123        autotune[LX]=tune_padded<LX>(w,  u,                                     \ 
  127                                     g12, g13, g23,nelv,lx);                    \ 
  128      } else if (autotune[LX] == 1 ) {                                          \ 
  130      } else if (autotune[LX] == 2 ) {                                          \ 
  131        CASE_KSTEP_PADDED(LX);                                                  \ 
  135#define CASE_LARGE(LX)                                                          \ 
  140#define CASE_LARGE_PADDED(LX)                                                   \ 
  142      CASE_KSTEP_PADDED(LX);                                                    \ 
 
  186                           void *
u, 
void *
v, 
void *
w,
 
  187                           void *
dx, 
void *
dy, 
void *
dz,
 
  191                           void *
g23, 
int *nelv, 
int *lx) {
 
  197#define CASE_VECTOR_KSTEP(LX)                                                  \ 
  198    ax_helm_kernel_vector_kstep<real, LX>                                      \ 
  199    <<<nblcks, nthrds, 0, stream>>> ((real *) au, (real *) av, (real *) aw,    \ 
  200                                     (real *) u, (real *) v, (real *) w,       \ 
  201                                     (real *) dx, (real *) dy, (real *) dz,    \ 
  202                                     (real *) h1, (real *) g11, (real *) g22,  \ 
  203                                     (real *) g33, (real *) g12, (real *) g13, \ 
  205    CUDA_CHECK(cudaGetLastError()); 
  207#define CASE_VECTOR_KSTEP_PADDED(LX)                                           \ 
  208    ax_helm_kernel_vector_kstep_padded<real, LX>                               \ 
  209    <<<nblcks, nthrds, 0, stream>>> ((real *) au, (real *) av, (real *) aw,    \ 
  210                                     (real *) u, (real *) v, (real *) w,       \ 
  211                                     (real *) dx, (real *) dy, (real *) dz,    \ 
  212                                     (real *) h1, (real *) g11, (real *) g22,  \ 
  213                                     (real *) g33, (real *) g12, (real *) g13, \ 
  215    CUDA_CHECK(cudaGetLastError()); 
  218#define CASE_VECTOR(LX)                                                        \ 
  220      CASE_VECTOR_KSTEP(LX);                                                   \ 
  223#define CASE_VECTOR_PADDED(LX)                                                 \ 
  225      CASE_VECTOR_KSTEP_PADDED(LX);                                            \ 
 
  256                                 void *
u, 
void *
v, 
void *
w,
 
  257                                 void *h2, 
void *
B, 
int *n) {
 
  260    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
 
 
  270template < const 
int LX >
 
  274         void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
  317  for(
int i = 0; 
i < 100; 
i++) {
 
  327  for(
int i = 0; 
i < 100; 
i++) {
 
  342          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
  348template < const 
int LX >
 
  352                void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
  395  for(
int i = 0; 
i < 100; 
i++) {
 
  405  for(
int i = 0; 
i < 100; 
i++) {
 
  420          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
void cuda_ax_helm_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
 
int tune(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void cuda_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void cuda_ax_helm_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
#define CASE_KSTEP_PADDED(LX)
 
#define CASE_LARGE_PADDED(LX)
 
int tune_padded(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
#define CASE_VECTOR_PADDED(LX)
 
__global__ void T *__restrict__ T *__restrict__ aw
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ av
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)