52 void *
u,
int *n,
void *
gd,
int *
nb,
53 void *b,
void *
bo,
int *op,
56 if ((*m) == 0)
return;
59 const dim3 nblcks(((*m)+ 1024 - 1)/ 1024, 1, 1);
66 *
nb, (
int *) b, (
int *)
bo);
73 *
nb, (
int *) b, (
int *)
bo);
80 *
nb, (
int *) b, (
int *)
bo);
87 *
nb, (
int *) b, (
int *)
bo);
97 void *
u,
int *n,
void *
gd,
98 int *
nb,
void *b,
void *
bo,
101 if ((*m) == 0)
return;
104 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
109 *
nb, (
int *) b, (
int *)
bo);
122 if (stream ==
NULL) {
125 (
int *) dof_d + offset, n);
130 (
int *) dof_d + offset, n);
147 if (stream ==
NULL) {
149 <<<
nblcks,
nthrds>>>(u_d, buf_d + offset, dof_d + offset, n);
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void cuda_gs_unpack(real *u_d, int op, real *buf_d, int *dof_d, int offset, int n, cudaStream_t stream)
void cuda_gs_pack(void *u_d, void *buf_d, void *dof_d, int offset, int n, cudaStream_t stream)
void cuda_gather_kernel(void *v, int *m, int *o, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, int *op, cudaStream_t stream)
void cuda_scatter_kernel(void *v, int *m, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, cudaStream_t stream)