__global__ void pipecg_vecops_kernel(T *__restrict__ p, T *__restrict__ q, T *__restrict__ r, T *__restrict__ s, T *__restrict__ u1, T *__restrict__ u2, T *__restrict__ w, T *__restrict__ z, T *__restrict__ ni, T *__restrict__ mi, const T alpha, const T beta, const T *mult, T *buf_h1, T *buf_h2, T *buf_h3, const int n)
__global__ void cg_update_xp_kernel(T *__restrict__ x, T *__restrict__ p, T **__restrict__ u, const T *alpha, const T *beta, const int p_cur, const int p_space, const int n)