36 #include <OpenCL/cl.h>
47 #include "mathops_kernel.cl.h"
56 cl_kernel kernel = clCreateKernel(
mathops_program,
"opchsign_kernel", &err);
59 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &a1));
60 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &a2));
61 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &a3));
62 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(
int), gdim));
63 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(
int), n));
65 const int nb = ((*n) + 256 - 1) / 256;
66 const size_t global_item_size = 256 * nb;
67 const size_t local_item_size = 256;
70 NULL, &global_item_size, &local_item_size,
75 void opencl_opcolv(
void *a1,
void *a2,
void *a3,
void *c,
int *gdim,
int *n) {
81 cl_kernel kernel = clCreateKernel(
mathops_program,
"opcolv_kernel", &err);
84 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &a1));
85 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &a2));
86 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &a3));
87 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &c));
88 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(
int), gdim));
89 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(
int), n));
91 const int nb = ((*n) + 256 - 1) / 256;
92 const size_t global_item_size = 256 * nb;
93 const size_t local_item_size = 256;
96 NULL, &global_item_size, &local_item_size,
102 void *b1,
void *b2,
void *b3,
103 void *c,
real *d,
int *gdim,
int *n) {
109 cl_kernel kernel = clCreateKernel(
mathops_program,
"opcolv3c_kernel", &err);
112 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &a1));
113 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &a2));
114 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &a3));
115 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &b1));
116 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &b2));
117 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &b3));
118 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &c));
120 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(
int), gdim));
121 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(
int), n));
123 const int nb = ((*n) + 256 - 1) / 256;
124 const size_t global_item_size = 256 * nb;
125 const size_t local_item_size = 256;
128 NULL, &global_item_size, &local_item_size,
134 void *b1,
void *b2,
void *b3,
135 real *c,
int *gdim,
int *n) {
141 cl_kernel kernel = clCreateKernel(
mathops_program,
"opadd2cm_kernel", &err);
144 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &a1));
145 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &a2));
146 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &a3));
147 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &b1));
148 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &b2));
149 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &b3));
151 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(
int), gdim));
152 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(
int), n));
154 const int nb = ((*n) + 256 - 1) / 256;
155 const size_t global_item_size = 256 * nb;
156 const size_t local_item_size = 256;
159 NULL, &global_item_size, &local_item_size,
165 void *b1,
void *b2,
void *b3,
166 void *c,
int *gdim,
int *n) {
172 cl_kernel kernel = clCreateKernel(
mathops_program,
"opadd2col_kernel", &err);
175 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &a1));
176 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &a2));
177 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &a3));
178 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &b1));
179 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &b2));
180 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &b3));
181 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &c));
182 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(
int), gdim));
183 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(
int), n));
185 const int nb = ((*n) + 256 - 1) / 256;
186 const size_t global_item_size = 256 * nb;
187 const size_t local_item_size = 256;
190 NULL, &global_item_size, &local_item_size,
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_opcolv3c(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, real *d, int *gdim, int *n)
void opencl_opadd2cm(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, real *c, int *gdim, int *n)
void opencl_opcolv(void *a1, void *a2, void *a3, void *c, int *gdim, int *n)
void opencl_opchsign(void *a1, void *a2, void *a3, int *gdim, int *n)
void opencl_opadd2col(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, int *gdim, int *n)