35 #include <hip/hip_runtime.h>
42 ({ __typeof__ (a) _a = (a); \
43 __typeof__ (b) _b = (b); \
51 void *A,
void *Bt,
void *Ct,
int *nel) {
53 const dim3 nthrds(1024, 1, 1);
54 const dim3 nblcks(*nel, 1, 1);
59 hipLaunchKernelGGL(HIP_KERNEL_NAME(tnsr3d_kernel<real, N>), \
61 (hipStream_t) glb_cmd_queue, \
64 (real *) A, (real *) Bt, (real *) Ct); \
65 HIP_CHECK(hipGetLastError()); \
86 fprintf(stderr, __FILE__
": size not supported: %d\n", n);
94 void *A,
void *Bt,
void *Ct,
int * elements,
int* n_points) {
95 const dim3 nthrds(1024, 1, 1);
96 const dim3 nblcks(*n_points, 1, 1);
101 hipLaunchKernelGGL(HIP_KERNEL_NAME(tnsr3d_el_kernel<real, N>), \
103 (hipStream_t) glb_cmd_queue, \
106 (real *) A, (real *) Bt, (real *) Ct, \
107 (int *) elements, *n_points); \
108 HIP_CHECK(hipGetLastError()); \
127 fprintf(stderr, __FILE__
": size not supported: %d\n", n);
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
void hip_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
void hip_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)