38#include <hip/hip_runtime.h> 
   49                void *
dr, 
void *
ds, 
void *
dt,
 
   50                void *
dx, 
void *
dy, 
void *
dz,
 
   51                void *
jacinv, 
int *nel, 
int *lx);
 
   59                  void *
dr, 
void *
ds, 
void *
dt,
 
   60                  void *
dx, 
void *
dy, 
void *
dz,
 
   61                  void *
jacinv, 
int *nel, 
int *lx) {
 
   70    hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_1d<real, LX, 1024> ),     \ 
   71                        nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue,      \ 
   72                        (real *) du, (real *) u,                                \ 
   73                        (real *) dr, (real *) ds, (real *) dt,                  \ 
   74                        (real *) dx, (real *) dy, (real *) dz,                  \ 
   76    HIP_CHECK(hipGetLastError());                                            
   78#define CASE_KSTEP(LX)                                                          \ 
   79    hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_kstep<real, LX> ),        \ 
   80                        nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue,   \ 
   81                        (real *) du, (real *) u,                                \ 
   82                        (real *) dr, (real *) ds, (real *) dt,                  \ 
   83                        (real *) dx, (real *) dy, (real *) dz,                  \ 
   85    HIP_CHECK(hipGetLastError());                                            
   89      if(autotune[LX] == 0 ) {                                                  \ 
   90        autotune[LX]=tune_dudxyz<LX>(du, u,                                     \ 
   94      } else if (autotune[LX] == 1 ) {                                          \ 
   96      } else if (autotune[LX] == 2 ) {                                          \ 
  101#define CASE_LARGE(LX)                                                          \ 
 
  143template < const 
int LX >
 
  145                void *
dr, 
void *
ds, 
void *
dt,
 
  146                void *
dx, 
void *
dy, 
void *
dz,
 
  147                void *
jacinv, 
int *nel, 
int *lx) {
 
  188  for(
int i = 0; 
i < 100; 
i++) {
 
  198  for(
int i = 0; 
i < 100; 
i++) {
 
  213          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
 
__global__ void const T *__restrict__ const T *__restrict__ dr
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)
 
int tune_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
 
void hip_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)