38#include <hip/hip_runtime.h> 
   51         void *
g13, 
void *
g23, 
int *nelv, 
int *lx);
 
   57                void *
g13, 
void *
g23, 
int *nelv, 
int *lx);
 
   67                   void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
   77    hipLaunchKernelGGL(HIP_KERNEL_NAME( ax_helm_kernel_1d<real, LX, 1024> ),    \ 
   78                       nblcks_1d, nthrds_1d, 0,                                 \ 
   79                       (hipStream_t) glb_cmd_queue,                             \ 
   80                       (real *) w, (real *) u,                                  \ 
   81                       (real *) dx, (real *) dy, (real *) dz,                   \ 
   82                       (real *) dxt, (real *) dyt, (real *) dzt, (real *) h1,   \ 
   83                       (real *) g11, (real *) g22, (real *) g33,                \ 
   84                       (real *) g12, (real *) g13, (real *) g23);               \ 
   85    HIP_CHECK(hipGetLastError()); 
   87#define CASE_KSTEP(LX)                                                          \ 
   88    hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_kstep<real, LX> ),      \ 
   89                        nblcks_kstep, nthrds_kstep, 0,                          \ 
   90                        (hipStream_t) glb_cmd_queue,                            \ 
   91                        (real *) w, (real *) u,                                 \ 
   92                        (real *) dx, (real *) dy, (real *) dz,  (real *) h1,    \ 
   93                        (real *) g11, (real *) g22, (real *) g33,               \ 
   94                        (real *) g12, (real *) g13, (real *) g23);              \ 
   95    HIP_CHECK(hipGetLastError()); 
   99#define CASE_KSTEP_PADDED(LX)                                                   \ 
  100    hipLaunchKernelGGL( HIP_KERNEL_NAME(ax_helm_kernel_kstep_padded<real, LX> ),\ 
  101                        nblcks_kstep, nthrds_kstep, 0,                          \ 
  102                        (hipStream_t) glb_cmd_queue,                            \ 
  103                        (real *) w, (real *) u,                                 \ 
  104                        (real *) dx, (real *) dy, (real *) dz,  (real *) h1,    \ 
  105                        (real *) g11, (real *) g22, (real *) g33,               \ 
  106                        (real *) g12, (real *) g13, (real *) g23);              \ 
  107    HIP_CHECK(hipGetLastError()); 
  111      if(autotune[LX] == 0 ) {                                                  \ 
  112        autotune[LX]=tune<LX>( w,  u,                                           \ 
  116                               g12, g13, g23, nelv, lx);                        \ 
  117      } else if (autotune[LX] == 1 ) {                                          \ 
  119      } else if (autotune[LX] == 2 ) {                                          \ 
  125#define CASE_PADDED(LX)                                                         \ 
  127      if(autotune[LX] == 0 ) {                                                  \ 
  128        autotune[LX]=tune_padded<LX>(w,  u,                                     \ 
  132                                     g12, g13, g23,nelv,lx);                    \ 
  133      } else if (autotune[LX] == 1 ) {                                          \ 
  135      } else if (autotune[LX] == 2 ) {                                          \ 
  136        CASE_KSTEP_PADDED(LX);                                                  \ 
  140#define CASE_LARGE(LX)                                                          \ 
  145#define CASE_LARGE_PADDED(LX)                                                   \ 
  147      CASE_KSTEP_PADDED(LX);                                                    \ 
 
  189                           void *
u, 
void *
v, 
void *
w,
 
  190                           void *
dx, 
void *
dy, 
void *
dz,
 
  194                           void *
g23, 
int *nelv, 
int *lx) {
 
  199#define CASE_VECTOR_KSTEP(LX)                                                   \ 
  200    hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_vector_kstep<real, LX> ), \ 
  202                        (hipStream_t) glb_cmd_queue,                            \ 
  203                        (real *) au, (real *) av, (real *) aw,                  \ 
  204                        (real *) u, (real *) v, (real *) w,                     \ 
  205                        (real *) dx, (real *) dy, (real *) dz,  (real *) h1,    \ 
  206                        (real *) g11, (real *) g22, (real *) g33,               \ 
  207                        (real *) g12, (real *) g13, (real *) g23);              \ 
  208    HIP_CHECK(hipGetLastError()); 
  210#define CASE_VECTOR_KSTEP_PADDED(LX)                                            \ 
  211    hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_vector_kstep_padded<real, LX> ), \ 
  213                        (hipStream_t) glb_cmd_queue,                            \ 
  214                        (real *) au, (real *) av, (real *) aw,                  \ 
  215                        (real *) u, (real *) v, (real *) w,                     \ 
  216                        (real *) dx, (real *) dy, (real *) dz,  (real *) h1,    \ 
  217                        (real *) g11, (real *) g22, (real *) g33,               \ 
  218                        (real *) g12, (real *) g13, (real *) g23);              \ 
  219    HIP_CHECK(hipGetLastError()); 
  221#define CASE_VECTOR(LX)                                                         \ 
  223      CASE_VECTOR_KSTEP(LX);                                                    \ 
  226#define CASE_VECTOR_PADDED(LX)                                                  \ 
  228      CASE_VECTOR_KSTEP_PADDED(LX);                                             \ 
 
  259                                 void *
u, 
void *
v, 
void *
w,
 
  260                                 void *h2, 
void *B, 
int *n) {
 
  263    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
 
 
  275template < const 
int LX >
 
  279         void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
  321  for(
int i = 0; 
i < 100; 
i++) {
 
  331  for(
int i = 0; 
i < 100; 
i++) {
 
  346          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
  352template < const 
int LX >
 
  356                void *
g13, 
void *
g23, 
int *nelv, 
int *lx) {
 
  398  for(
int i = 0; 
i < 100; 
i++) {
 
  408  for(
int i = 0; 
i < 100; 
i++) {
 
  423          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
void hip_ax_helm_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
 
int tune(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
#define CASE_KSTEP_PADDED(LX)
 
void hip_ax_helm_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void hip_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
#define CASE_LARGE_PADDED(LX)
 
int tune_padded(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
#define CASE_VECTOR_PADDED(LX)
 
__global__ void T *__restrict__ T *__restrict__ aw
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ av
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)