44 const dim3 nthrds(1024, 1, 1);
45 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
49 <<<nblcks, nthrds, 0, stream>>>((
real *) a1, (
real *) a2, (
real *) a3,
55 void cuda_opcolv(
void *a1,
void *a2,
void *a3,
void *c,
int *gdim,
int *n) {
57 const dim3 nthrds(1024, 1, 1);
58 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
62 <<<nblcks, nthrds, 0, stream>>>((
real *) a1, (
real *) a2, (
real *) a3,
63 (
real *) c, *gdim, *n);
69 void cuda_opcolv3c(
void *a1,
void *a2,
void *a3,
void *b1,
void *b2,
void *b3,
70 void *c,
real *d,
int *gdim,
int *n) {
72 const dim3 nthrds(1024, 1, 1);
73 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
77 <<<nblcks, nthrds, 0, stream>>>((
real *) a1, (
real *) a2, (
real *) a3,
79 (
real *) c, *d, *gdim, *n);
86 void *b1,
void *b2,
void *b3,
real *c,
int *gdim,
int *n) {
88 const dim3 nthrds(1024, 1, 1);
89 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
93 <<<nblcks, nthrds, 0, stream>>>((
real *) a1, (
real *) a2, (
real *) a3,
102 void *b1,
void *b2,
void *b3,
void *c,
int *gdim,
int *n) {
104 const dim3 nthrds(1024, 1, 1);
105 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
108 opadd2col_kernel<real>
109 <<<nblcks, nthrds, 0, stream>>>((
real *) a1, (
real *) a2, (
real *) a3,
111 (
real *) c, *gdim, *n);
void cuda_opchsign(void *a1, void *a2, void *a3, int *gdim, int *n)
void cuda_opcolv3c(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, real *d, int *gdim, int *n)
void cuda_opadd2cm(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, real *c, int *gdim, int *n)
void cuda_opcolv(void *a1, void *a2, void *a3, void *c, int *gdim, int *n)
void cuda_opadd2col(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, int *gdim, int *n)