54                       void *xbar, 
int *
j, 
int *n){ 
 
   61    const int nt = 1024/
pow2;   
 
   64    const int glsc3_nb = ((*n) + nt - 1)/nt;
 
  123                                              (
const real *) alpha,
 
 
  136    const int nt = 1024/
pow2;   
 
  139    const int glsc3_nb = ((*n) + nt - 1)/nt;
 
  176                                              (
const real *) alpha, *
j, *n);
 
  198                                              (
const real *) alpha, *
j, *n);
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void device_mpi_allreduce_inplace(void *buf_d, int count, int nbytes, int op)
 
void cuda_project_ortho(void *alpha, void *b, void *xx, void *bb, void *w, void *xm, int *j, int *n, real *nrm)
 
void cuda_project_on(void *alpha, void *b, void *xx, void *bb, void *mult, void *xbar, int *j, int *n)