__global__ void rotate_cyc_kernel(T *__restrict__ vx, T *__restrict__ vy, T *__restrict__ vz, const T *__restrict__ x, const T *__restrict__ y, const T *__restrict__ z, const int *__restrict__ cyc_msk, const T *__restrict__ R11, const T *__restrict__ R12, const int ncyc, const int idir)