38#include <hip/hip_runtime.h>
49 void *
dr,
void *
ds,
void *
dt,
50 void *
dx,
void *
dy,
void *
dz,
51 void *
jacinv,
int *nel,
int *lx);
59 void *
dr,
void *
ds,
void *
dt,
60 void *
dx,
void *
dy,
void *
dz,
61 void *
jacinv,
int *nel,
int *lx) {
70 hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_1d<real, LX, 1024> ), \
71 nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
72 (real *) du, (real *) u, \
73 (real *) dr, (real *) ds, (real *) dt, \
74 (real *) dx, (real *) dy, (real *) dz, \
76 HIP_CHECK(hipGetLastError());
78#define CASE_KSTEP(LX) \
79 hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_kstep<real, LX> ), \
80 nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
81 (real *) du, (real *) u, \
82 (real *) dr, (real *) ds, (real *) dt, \
83 (real *) dx, (real *) dy, (real *) dz, \
85 HIP_CHECK(hipGetLastError());
89 if(autotune[LX] == 0 ) { \
90 autotune[LX]=tune_dudxyz<LX>(du, u, \
94 } else if (autotune[LX] == 1 ) { \
96 } else if (autotune[LX] == 2 ) { \
101#define CASE_LARGE(LX) \
143template < const
int LX >
145 void *
dr,
void *
ds,
void *
dt,
146 void *
dx,
void *
dy,
void *
dz,
147 void *
jacinv,
int *nel,
int *lx) {
188 for(
int i = 0;
i < 100;
i++) {
198 for(
int i = 0;
i < 100;
i++) {
213 (
retval > 1 ?
"KSTEP" :
"1D"));
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void log_error(char *msg)
void log_message(char *msg)
void log_section(char *msg)
int tune_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void hip_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)