38#include <hip/hip_runtime.h> 
   49                void *
dx, 
void *
dy, 
void *
dz,
 
   53                void *
w3, 
int *nel, 
int *lx);
 
   61                   void *
dx, 
void *
dy, 
void *
dz,
 
   65                   void *
w3, 
int *nel, 
int *lx) {
 
   74    hipLaunchKernelGGL( HIP_KERNEL_NAME(opgrad_kernel_1d<real, LX, 1024> ),     \ 
   75                        nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue,      \ 
   76                        (real *) ux, (real *) uy, (real *) uz, (real *) u,      \ 
   77                        (real *) dx, (real *) dy, (real *) dz,                  \ 
   78                        (real *) drdx, (real *) dsdx, (real *) dtdx,            \ 
   79                        (real *) drdy, (real *) dsdy, (real *) dtdy,            \ 
   80                        (real *) drdz, (real *) dsdz, (real *) dtdz,            \ 
   82    HIP_CHECK(hipGetLastError());                                              
   85#define CASE_KSTEP(LX)                                                          \ 
   86    hipLaunchKernelGGL( HIP_KERNEL_NAME(opgrad_kernel_kstep<real, LX> ),        \ 
   87                        nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue,   \ 
   88                        (real *) ux, (real *) uy, (real *) uz, (real *) u,      \ 
   89                        (real *) dx, (real *) dy, (real *) dz,                  \ 
   90                        (real *) drdx, (real *) dsdx, (real *) dtdx,            \ 
   91                        (real *) drdy, (real *) dsdy, (real *) dtdy,            \ 
   92                        (real *) drdz, (real *) dsdz, (real *) dtdz,            \ 
   94    HIP_CHECK(hipGetLastError());                                              
   98      if(autotune[LX] == 0 ) {                                                  \ 
   99        autotune[LX]=tune_opgrad<LX>(ux, uy, uz, u,                             \ 
  105      } else if (autotune[LX] == 1 ) {                                          \ 
  107      } else if (autotune[LX] == 2 ) {                                          \ 
 
  139template < const 
int LX >
 
  141                void *
dx, 
void *
dy, 
void *
dz,
 
  145                void *
w3, 
int *nel, 
int *lx) {
 
  186  for(
int i = 0; 
i < 100; 
i++) {
 
  196  for(
int i = 0; 
i < 100; 
i++) {
 
  211          (
retval > 1 ? 
"KSTEP" : 
"1D"));
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
__global__ void T *__restrict__ uy
 
__global__ void T *__restrict__ T *__restrict__ uz
 
void log_error(char *msg)
 
void log_message(char *msg)
 
void log_section(char *msg)
 
int tune_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
 
void hip_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)