1#ifndef __KRYLOV_FUSEDCG_KERNEL_H__
2#define __KRYLOV_FUSEDCG_KERNEL_H__
52 for (
int i = idx;
i < n;
i+=
str) {
53 p[
i] = beta*
po[
i] + z[
i];
71 for (
int i = idx;
i < n;
i+=
str) {
74 tmp += p[
j][
i] * alpha[
j];
101 for (
int i = idx;
i < n;
i+=
str) {
102 T rt =
a[
i] - alpha * c[
i];
103 tmp = tmp +
rt * b[
i] *
rt;
__global__ void const T *__restrict__ x
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
__global__ void fusedcg_update_p_kernel(T *__restrict__ p, const T *__restrict__ z, const T *__restrict__ po, const T beta, const int n)
__global__ void fusedcg_part2_kernel(T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T alpha, T *buf_h, const int n)
__global__ void fusedcg_update_x_kernel(T *__restrict__ x, const T **p, const T *__restrict__ alpha, const int p_cur, const int n)