50 for (
int i = idx;
i < n;
i +=
str) {
71 for (
int i = idx;
i < n;
i +=
str) {
89 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
105 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
void cuda_amg_cheby_solve_part2(void *r, void *w, void *d, void *x, real *tmp1, real *tmp2, int *n, cudaStream_t strm)
__global__ void amg_cheby_solve_part2(T *__restrict__ r, T *__restrict__ w, T *__restrict__ d, T *__restrict__ x, const T tmp1, const T tmp2, const int n)
void cuda_amg_cheby_solve_part1(void *r, void *f, void *w, void *x, void *d, real *inv_thet, int *n, bool *zero_initial, cudaStream_t strm)
__global__ void amg_cheby_solve_part1(T *__restrict__ r, T *__restrict__ f, T *__restrict__ w, T *__restrict__ x, T *__restrict__ d, const T inv_thet, const bool zero_initial, const int n)
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)