46 template < const
int >
48 void *
dx,
void *
dy,
void *
dz,
52 void *
jacinv,
int *nel,
int *lx);
60 void *
dx,
void *
dy,
void *
dz,
64 void *
jacinv,
int *nel,
int *lx) {
66 static int autotune[17] = { 0 };
68 const dim3 nthrds_1d(1024, 1, 1);
69 const dim3 nthrds_kstep((*lx), (*lx), 1);
70 const dim3 nblcks((*nel), 1, 1);
74 lambda2_kernel_1d<real, LX, 1024> \
75 <<<nblcks, nthrds_1d, 0, stream>>> \
76 ((real *) lambda2, (real *) u, (real *) v, (real *) w, \
77 (real *) dx, (real *) dy, (real *) dz, \
78 (real *) drdx, (real *) dsdx, (real *) dtdx, \
79 (real *) drdy, (real *) dsdy, (real *) dtdy, \
80 (real *) drdz, (real *) dsdz, (real *) dtdz, \
82 CUDA_CHECK(cudaGetLastError());
85 #define CASE_KSTEP(LX) \
86 lambda2_kernel_kstep<real, LX> <<<nblcks, nthrds_kstep, 0, stream>>> \
87 ((real *) lambda2, (real *) u, (real *) v, (real *) w, \
88 (real *) dx, (real *) dy, (real *) dz, \
89 (real *) drdx, (real *) dsdx, (real *) dtdx, \
90 (real *) drdy, (real *) dsdy, (real *) dtdy, \
91 (real *) drdz, (real *) dsdz, (real *) dtdz, \
93 CUDA_CHECK(cudaGetLastError());
97 if(autotune[LX] == 0 ) { \
98 autotune[LX]=tune_lambda2<LX>(lambda2, u, v, w, \
104 } else if (autotune[LX] == 1 ) { \
106 } else if (autotune[LX] == 2 ) { \
125 fprintf(stderr, __FILE__
": size not supported: %d\n", *lx);
132 template < const
int LX >
134 void *
dx,
void *
dy,
void *
dz,
138 void *
jacinv,
int *nel,
int *lx) {
139 cudaEvent_t start,stop;
143 const dim3 nthrds_1d(1024, 1, 1);
144 const dim3 nthrds_kstep((*lx), (*lx), 1);
145 const dim3 nblcks((*nel), 1, 1);
148 char *env_value = NULL;
149 char neko_log_buf[80];
151 env_value=getenv(
"NEKO_AUTOTUNE");
153 sprintf(neko_log_buf,
"Autotune lambda2 (lx: %d)", *lx);
157 if( !strcmp(env_value,
"1D") ) {
159 sprintf(neko_log_buf,
"Set by env : 1 (1D)");
163 }
else if( !strcmp(env_value,
"KSTEP") ) {
165 sprintf(neko_log_buf,
"Set by env : 2 (KSTEP)");
170 sprintf(neko_log_buf,
"Invalid value set for NEKO_AUTOTUNE");
175 cudaEventCreate(&start);
176 cudaEventCreate(&stop);
178 cudaEventRecord(start,0);
180 for(
int i = 0;
i < 100;
i++) {
184 cudaEventRecord(stop,0);
185 cudaEventSynchronize(stop);
186 cudaEventElapsedTime(&time1, start, stop);
188 cudaEventRecord(start,0);
190 for(
int i = 0;
i < 100;
i++) {
194 cudaEventRecord(stop,0);
195 cudaEventSynchronize(stop);
196 cudaEventElapsedTime(&time2, start, stop);
204 sprintf(neko_log_buf,
"Chose : %d (%s)", retval,
205 (retval > 1 ?
"KSTEP" :
"1D"));
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void T *__restrict__ uy
__global__ void T *__restrict__ T *__restrict__ uz
A simulation component that computes lambda2 The values are stored in the field registry under the na...
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
int tune_lambda2(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)