41 ({ __typeof__ (a) _a = (a); \
42 __typeof__ (b) _b = (b); \
50 void *A,
void *Bt,
void *Ct,
int *nel) {
51 const dim3 nthrds(1024, 1, 1);
52 const dim3 nblcks(*nel, 1, 1);
58 tnsr3d_kernel<real, N> \
59 <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv, \
61 (real *) A, (real *) Bt, (real *) Ct); \
62 CUDA_CHECK(cudaGetLastError()); \
65 #define CASE_LARGE(N) \
67 tnsr3d_kernel_large<real, N> \
68 <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv, \
70 (real *) A, (real *) Bt, (real *) Ct); \
71 CUDA_CHECK(cudaGetLastError()); \
92 fprintf(stderr, __FILE__
": size not supported: %d\n", n);
100 void *A,
void *Bt,
void *Ct,
int * elements,
int* n_points) {
101 const dim3 nthrds(1024, 1, 1);
102 const dim3 nblcks(*n_points, 1, 1);
105 int n =
max(*nu,*nv);
108 tnsr3d_el_kernel<real, N> \
109 <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv, \
111 (real *) A, (real *) Bt, (real *) Ct, \
112 (int *) elements, *n_points); \
113 CUDA_CHECK(cudaGetLastError()); \
132 fprintf(stderr, __FILE__
": size not supported: %d\n", n);
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
void cuda_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
void cuda_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)