1#ifndef __KRYLOV_FUSEDCG_KERNEL_H__ 
    2#define __KRYLOV_FUSEDCG_KERNEL_H__ 
   52  for (
int i = idx; 
i < n; 
i+= 
str) {
 
   53    p[
i] = beta*
po[
i] + z[
i];
 
 
   71  for (
int i = idx; 
i < n; 
i+= 
str) {
 
   74      tmp += p[
j][
i] * alpha[
j];
 
 
  101  for (
int i = idx; 
i < n; 
i+= 
str) {
 
  102    T rt = 
a[
i] - alpha * c[
i];
 
  103    tmp = tmp + 
rt * b[
i] * 
rt;
 
 
__global__ void const T *__restrict__ x
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
__global__ void fusedcg_update_p_kernel(T *__restrict__ p, const T *__restrict__ z, const T *__restrict__ po, const T beta, const int n)
 
__global__ void fusedcg_part2_kernel(T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T alpha, T *buf_h, const int n)
 
__global__ void fusedcg_update_x_kernel(T *__restrict__ x, const T **p, const T *__restrict__ alpha, const int p_cur, const int n)