38 #include <hip/hip_runtime.h>
47 template < const
int >
49 void *
dr,
void *
ds,
void *
dt,
51 void *
w3,
int *nel,
int *lx);
59 void *
dr,
void *
ds,
void *
dt,
61 void *
w3,
int *nel,
int *lx) {
63 static int autotune[17] = { 0 };
65 const dim3 nthrds_1d(1024, 1, 1);
66 const dim3 nthrds_kstep((*lx), (*lx), 1);
67 const dim3 nblcks((*nel), 1, 1);
70 hipLaunchKernelGGL( HIP_KERNEL_NAME(cdtp_kernel_1d<real, LX, 1024> ), \
71 nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
72 (real *) dtx, (real *) x, \
73 (real *) dr, (real *) ds, (real *) dt, \
74 (real *) dxt, (real *) dyt, (real *) dzt, \
76 HIP_CHECK(hipGetLastError());
78 #define CASE_KSTEP(LX) \
79 hipLaunchKernelGGL( HIP_KERNEL_NAME(cdtp_kernel_kstep<real, LX> ), \
80 nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
81 (real *) dtx, (real *) x, \
82 (real *) dr, (real *) ds, (real *) dt, \
83 (real *) dxt, (real *) dyt, (real *) dzt, \
85 HIP_CHECK(hipGetLastError());
89 if(autotune[LX] == 0 ) { \
90 autotune[LX]=tune_cdtp<LX>(dtx, x, \
94 } else if (autotune[LX] == 1 ) { \
96 } else if (autotune[LX] == 2 ) { \
101 #define CASE_LARGE(LX) \
122 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
135 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
143 template < const
int LX >
145 void *
dr,
void *
ds,
void *
dt,
147 void *
w3,
int *nel,
int *lx) {
148 hipEvent_t start,stop;
152 const dim3 nthrds_1d(1024, 1, 1);
153 const dim3 nthrds_kstep((*lx), (*lx), 1);
154 const dim3 nblcks((*nel), 1, 1);
156 char *env_value = NULL;
157 char neko_log_buf[80];
159 env_value=getenv(
"NEKO_AUTOTUNE");
161 sprintf(neko_log_buf,
"Autotune cdtp (lx: %d)", *lx);
165 if( !strcmp(env_value,
"1D") ) {
167 sprintf(neko_log_buf,
"Set by env : 1 (1D)");
171 }
else if( !strcmp(env_value,
"KSTEP") ) {
173 sprintf(neko_log_buf,
"Set by env : 2 (KSTEP)");
178 sprintf(neko_log_buf,
"Invalid value set for NEKO_AUTOTUNE");
188 for(
int i = 0;
i < 100;
i++) {
194 HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
198 for(
int i = 0;
i < 100;
i++) {
204 HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
212 sprintf(neko_log_buf,
"Chose : %d (%s)", retval,
213 (retval > 1 ?
"KSTEP" :
"1D"));
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
int tune_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void hip_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)