41  ({ __typeof__ (a) _a = (a);  \ 
   42     __typeof__ (b) _b = (b);   \ 
 
   50           void *
A, 
void *
Bt, 
void *
Ct, 
int *nel) {
 
   58    tnsr3d_kernel<real, N>                                                   \ 
   59      <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv,                       \ 
   61                                      (real *) A, (real *) Bt, (real *) Ct); \ 
   62    CUDA_CHECK(cudaGetLastError());                                          \ 
   65#define CASE_LARGE(N)                                                        \ 
   67    tnsr3d_kernel_large<real, N>                                             \ 
   68      <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv,                       \ 
   70                                      (real *) A, (real *) Bt, (real *) Ct); \ 
   71    CUDA_CHECK(cudaGetLastError());                                          \ 
 
  100           void *
A, 
void *
Bt, 
void *
Ct, 
int * elements, 
int* n_points) {
 
  105    int n = 
max(*nu,*
nv);
 
  108    tnsr3d_el_kernel<real, N>                                                   \ 
  109      <<<nblcks, nthrds, 0, stream>>>((real *) v, *nv,                       \ 
  111                                      (real *) A, (real *) Bt, (real *) Ct,  \ 
  112                                      (int *) elements, *n_points); \ 
  113    CUDA_CHECK(cudaGetLastError());                                          \ 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void cuda_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
 
void cuda_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)