1 #ifndef __KRYLOV_GMRES_KERNEL_H__
2 #define __KRYLOV_GMRES_KERNEL_H__
42 template<
typename T >
44 T *
const * __restrict__
v,
45 const T * __restrict__ mult,
46 const T * __restrict__ h,
47 T * __restrict__ buf_h1,
51 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
52 const int str = blockDim.x * gridDim.x;
54 const unsigned int lane = threadIdx.x % warpSize;
55 const unsigned int wid = threadIdx.x / warpSize;
57 __shared__ T shared[32];
60 for (
int i = idx;
i < n;
i+= str) {
62 for (
int k = 0; k <
j; k ++) {
66 tmp1 +=
w[
i]*
w[
i]*mult[
i];
69 tmp1 = reduce_warp<T>(tmp1);
74 tmp1 = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
76 tmp1 = reduce_warp<T>(tmp1);
79 buf_h1[blockIdx.x] = tmp1;
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void gmres_part2_kernel(T *__restrict__ w, T *const *__restrict__ v, const T *__restrict__ mult, const T *__restrict__ h, T *__restrict__ buf_h1, const int j, const int n)