38 #include <hip/hip_runtime.h>
51 void *
g13,
void *
g23,
int *nelv,
int *lx);
57 void *
g13,
void *
g23,
int *nelv,
int *lx);
67 void *
g13,
void *
g23,
int *nelv,
int *lx) {
69 static int autotune[13] = { 0 };
71 const dim3 nthrds_1d(1024, 1, 1);
72 const dim3 nblcks_1d((*nelv), 1, 1);
73 const dim3 nthrds_kstep((*lx), (*lx), 1);
74 const dim3 nblcks_kstep((*nelv), 1, 1);
77 hipLaunchKernelGGL(HIP_KERNEL_NAME( ax_helm_kernel_1d<real, LX, 1024> ), \
78 nblcks_1d, nthrds_1d, 0, \
79 (hipStream_t) glb_cmd_queue, \
80 (real *) w, (real *) u, \
81 (real *) dx, (real *) dy, (real *) dz, \
82 (real *) dxt, (real *) dyt, (real *) dzt, (real *) h1, \
83 (real *) g11, (real *) g22, (real *) g33, \
84 (real *) g12, (real *) g13, (real *) g23); \
85 HIP_CHECK(hipGetLastError());
87 #define CASE_KSTEP(LX) \
88 hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_kstep<real, LX> ), \
89 nblcks_kstep, nthrds_kstep, 0, \
90 (hipStream_t) glb_cmd_queue, \
91 (real *) w, (real *) u, \
92 (real *) dx, (real *) dy, (real *) dz, (real *) h1, \
93 (real *) g11, (real *) g22, (real *) g33, \
94 (real *) g12, (real *) g13, (real *) g23); \
95 HIP_CHECK(hipGetLastError());
99 #define CASE_KSTEP_PADDED(LX) \
100 hipLaunchKernelGGL( HIP_KERNEL_NAME(ax_helm_kernel_kstep_padded<real, LX> ),\
101 nblcks_kstep, nthrds_kstep, 0, \
102 (hipStream_t) glb_cmd_queue, \
103 (real *) w, (real *) u, \
104 (real *) dx, (real *) dy, (real *) dz, (real *) h1, \
105 (real *) g11, (real *) g22, (real *) g33, \
106 (real *) g12, (real *) g13, (real *) g23); \
107 HIP_CHECK(hipGetLastError());
111 if(autotune[LX] == 0 ) { \
112 autotune[LX]=tune<LX>( w, u, \
116 g12, g13, g23, nelv, lx); \
117 } else if (autotune[LX] == 1 ) { \
119 } else if (autotune[LX] == 2 ) { \
125 #define CASE_PADDED(LX) \
127 if(autotune[LX] == 0 ) { \
128 autotune[LX]=tune_padded<LX>(w, u, \
132 g12, g13, g23,nelv,lx); \
133 } else if (autotune[LX] == 1 ) { \
135 } else if (autotune[LX] == 2 ) { \
136 CASE_KSTEP_PADDED(LX); \
155 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
164 void *
u,
void *
v,
void *
w,
165 void *
dx,
void *
dy,
void *
dz,
169 void *
g23,
int *nelv,
int *lx) {
171 const dim3 nthrds((*lx), (*lx), 1);
172 const dim3 nblcks((*nelv), 1, 1);
174 #define CASE_VECTOR_KSTEP(LX) \
175 hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_vector_kstep<real, LX> ), \
177 (hipStream_t) glb_cmd_queue, \
178 (real *) au, (real *) av, (real *) aw, \
179 (real *) u, (real *) v, (real *) w, \
180 (real *) dx, (real *) dy, (real *) dz, (real *) h1, \
181 (real *) g11, (real *) g22, (real *) g33, \
182 (real *) g12, (real *) g13, (real *) g23); \
183 HIP_CHECK(hipGetLastError());
185 #define CASE_VECTOR_KSTEP_PADDED(LX) \
186 hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_vector_kstep_padded<real, LX> ), \
188 (hipStream_t) glb_cmd_queue, \
189 (real *) au, (real *) av, (real *) aw, \
190 (real *) u, (real *) v, (real *) w, \
191 (real *) dx, (real *) dy, (real *) dz, (real *) h1, \
192 (real *) g11, (real *) g22, (real *) g33, \
193 (real *) g12, (real *) g13, (real *) g23); \
194 HIP_CHECK(hipGetLastError());
196 #define CASE_VECTOR(LX) \
198 CASE_VECTOR_KSTEP(LX); \
201 #define CASE_VECTOR_PADDED(LX) \
203 CASE_VECTOR_KSTEP_PADDED(LX); \
224 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
234 void *
u,
void *
v,
void *
w,
235 void *h2,
void *B,
int *n) {
237 const dim3 nthrds(1024, 1, 1);
238 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
241 hipLaunchKernelGGL( HIP_KERNEL_NAME( ax_helm_kernel_vector_part2<real> ),
242 nblcks, nthrds, 0, stream,
250 template < const
int LX >
254 void *
g13,
void *
g23,
int *nelv,
int *lx) {
255 hipEvent_t start,stop;
259 const dim3 nthrds_1d(1024, 1, 1);
260 const dim3 nblcks_1d((*nelv), 1, 1);
261 const dim3 nthrds_kstep((*lx), (*lx), 1);
262 const dim3 nblcks_kstep((*nelv), 1, 1);
264 char *env_value = NULL;
265 char neko_log_buf[80];
267 env_value=getenv(
"NEKO_AUTOTUNE");
269 sprintf(neko_log_buf,
"Autotune Ax helm (lx: %d)", *lx);
273 if( !strcmp(env_value,
"1D") ) {
275 sprintf(neko_log_buf,
"Set by env : 1 (1D)");
279 }
else if( !strcmp(env_value,
"KSTEP") ) {
281 sprintf(neko_log_buf,
"Set by env : 2 (KSTEP)");
286 sprintf(neko_log_buf,
"Invalid value set for NEKO_AUTOTUNE");
296 for(
int i = 0;
i < 100;
i++) {
302 HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
306 for(
int i = 0;
i < 100;
i++) {
312 HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
320 sprintf(neko_log_buf,
"Chose : %d (%s)", retval,
321 (retval > 1 ?
"KSTEP" :
"1D"));
327 template < const
int LX >
331 void *
g13,
void *
g23,
int *nelv,
int *lx) {
332 hipEvent_t start, stop;
336 const dim3 nthrds_1d(1024, 1, 1);
337 const dim3 nblcks_1d((*nelv), 1, 1);
338 const dim3 nthrds_kstep((*lx), (*lx), 1);
339 const dim3 nblcks_kstep((*nelv), 1, 1);
341 char *env_value = NULL;
342 char neko_log_buf[80];
344 env_value=getenv(
"NEKO_AUTOTUNE");
346 sprintf(neko_log_buf,
"Autotune Ax helm (lx: %d)", *lx);
350 if( !strcmp(env_value,
"1D") ) {
352 sprintf(neko_log_buf,
"Set by env : 1 (1D)");
356 }
else if( !strcmp(env_value,
"KSTEP") ) {
358 sprintf(neko_log_buf,
"Set by env : 2 (KSTEP)");
363 sprintf(neko_log_buf,
"Invalid value set for NEKO_AUTOTUNE");
373 for(
int i = 0;
i < 100;
i++) {
379 HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
383 for(
int i = 0;
i < 100;
i++) {
389 HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
397 sprintf(neko_log_buf,
"Chose : %d (%s)", retval,
398 (retval > 1 ?
"KSTEP" :
"1D"));
void hip_ax_helm_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
int tune(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
#define CASE_KSTEP_PADDED(LX)
void hip_ax_helm_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
void hip_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
int tune_padded(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
#define CASE_VECTOR_PADDED(LX)
__global__ void T *__restrict__ T *__restrict__ aw
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ av
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)