35 #ifndef __COMMON_PROJECTION_KERNEL__
36 #define __COMMON_PROJECTION_KERNEL__
41 template<
typename T >
46 const T * __restrict__ alpha,
50 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
51 const int str = blockDim.x * gridDim.x;
53 for (
int i = idx;
i < n;
i+= str) {
56 for (
int j = 0;
j < p_cur;
j ++) {
57 tmp1 += xx[
j][
i] * alpha[
j];
58 tmp2 += yy[
j][
i] * -alpha[
j];
70 template<
typename T >
75 const T * __restrict__ alpha,
79 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
80 const int str = blockDim.x * gridDim.x;
82 for (
int i = idx;
i < n;
i+= str) {
85 for (
int j = 0;
j < (p_cur - 1);
j ++) {
86 tmp1 += xx[
j][
i] * -alpha[
j];
87 tmp2 += yy[
j][
i] * -alpha[
j];
__global__ void const T *__restrict__ x
__global__ void project_ortho_vec_kernel(T *__restrict__ x, const T **xx, T *__restrict__ y, const T **yy, const T *__restrict__ alpha, const int p_cur, const int n)
__global__ void project_on_vec_kernel(T *__restrict__ x, const T **xx, T *__restrict__ y, const T **yy, const T *__restrict__ alpha, const int p_cur, const int n)