36 #include <OpenCL/cl.h>
47 #include "conv1_kernel.cl.h"
53 void *
vx,
void *
vy,
void *
vz,
54 void *
dx,
void *
dy,
void *
dz,
58 void *
jacinv,
int *nel,
int *gdim,
int *lx) {
64 const size_t global_item_size = 256 * (*nel);
65 const size_t local_item_size = 256;
71 cl_kernel kernel = clCreateKernel(conv1_program, \
72 STR(conv1_kernel_lx##LX), &err); \
75 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &du)); \
76 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &u)); \
77 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &vx)); \
78 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &vy)); \
79 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &vz)); \
80 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dx)); \
81 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dy)); \
82 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dz)); \
83 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &drdx)); \
84 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dsdx)); \
85 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dtdx)); \
86 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &drdy)); \
87 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dsdy)); \
88 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &dtdy)); \
89 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &drdz)); \
90 CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &dsdz)); \
91 CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_mem), (void *) &dtdz)); \
92 CL_CHECK(clSetKernelArg(kernel, 17, sizeof(cl_mem), (void *) &jacinv)); \
94 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
95 kernel, 1, NULL, &global_item_size, \
96 &local_item_size, 0, NULL, NULL)); \
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ vz
__global__ void const T *__restrict__ const T *__restrict__ vx
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ vy
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)