38 #include <hip/hip_runtime.h>
47 template < const
int >
49 void *
vx,
void *
vy,
void *
vz,
50 void *
dx,
void *
dy,
void *
dz,
54 void *
jacinv,
int *nel,
int *gdim,
int *lx);
62 void *
vx,
void *
vy,
void *
vz,
63 void *
dx,
void *
dy,
void *
dz,
67 void *
jacinv,
int *nel,
int *gdim,
int *lx) {
69 static int autotune[17] = { 0 };
71 const dim3 nthrds_1d(1024, 1, 1);
72 const dim3 nthrds_kstep((*lx), (*lx), 1);
73 const dim3 nblcks((*nel), 1, 1);
76 hipLaunchKernelGGL( HIP_KERNEL_NAME(conv1_kernel_1d<real, LX, 1024> ), \
77 nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
78 (real *) du, (real *) u, \
79 (real *) vx, (real *) vy, (real *) vz, \
80 (real *) dx, (real *) dy, (real *) dz, \
81 (real *) drdx, (real *) dsdx, (real *) dtdx, \
82 (real *) drdy, (real *) dsdy, (real *) dtdy, \
83 (real *) drdz, (real *) dsdz, (real *) dtdz, \
85 HIP_CHECK(hipGetLastError());
87 #define CASE_KSTEP(LX) \
88 hipLaunchKernelGGL( HIP_KERNEL_NAME(conv1_kernel_kstep<real, LX> ), \
89 nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
90 (real *) du, (real *) u, \
91 (real *) vx, (real *) vy, (real *) vz, \
92 (real *) dx, (real *) dy, (real *) dz, \
93 (real *) drdx, (real *) dsdx, (real *) dtdx, \
94 (real *) drdy, (real *) dsdy, (real *) dtdy, \
95 (real *) drdz, (real *) dsdz, (real *) dtdz, \
97 HIP_CHECK(hipGetLastError());
101 if(autotune[LX] == 0 ) { \
102 autotune[LX]=tune_conv1<LX>(du, u, \
108 jacinv, nel, gdim, lx); \
109 } else if (autotune[LX] == 1 ) { \
111 } else if (autotune[LX] == 2 ) { \
116 #define CASE_LARGE(LX) \
135 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
150 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
158 template < const
int LX >
160 void *
vx,
void *
vy,
void *
vz,
161 void *
dx,
void *
dy,
void *
dz,
165 void *
jacinv,
int *nel,
int *gdim,
int *lx) {
166 hipEvent_t start,stop;
170 const dim3 nthrds_1d(1024, 1, 1);
171 const dim3 nthrds_kstep((*lx), (*lx), 1);
172 const dim3 nblcks((*nel), 1, 1);
174 char *env_value = NULL;
175 char neko_log_buf[80];
177 env_value=getenv(
"NEKO_AUTOTUNE");
179 sprintf(neko_log_buf,
"Autotune conv1 (lx: %d)", *lx);
183 if( !strcmp(env_value,
"1D") ) {
185 sprintf(neko_log_buf,
"Set by env : 1 (1D)");
189 }
else if( !strcmp(env_value,
"KSTEP") ) {
191 sprintf(neko_log_buf,
"Set by env : 2 (KSTEP)");
196 sprintf(neko_log_buf,
"Invalid value set for NEKO_AUTOTUNE");
206 for(
int i = 0;
i < 100;
i++) {
212 HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
216 for(
int i = 0;
i < 100;
i++) {
222 HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
230 sprintf(neko_log_buf,
"Chose : %d (%s)", retval,
231 (retval > 1 ?
"KSTEP" :
"1D"));
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ vz
__global__ void const T *__restrict__ const T *__restrict__ vx
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ vy
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
void hip_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
int tune_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)