36 #include <OpenCL/cl.h>
47 #include "cdtp_kernel.cl.h"
53 void *
dr,
void *
ds,
void *
dt,
55 void *
w3,
int *nel,
int *lx) {
61 const size_t global_item_size = 256 * (*nel);
62 const size_t local_item_size = 256;
68 cl_kernel kernel = clCreateKernel(cdtp_program, \
69 STR(cdtp_kernel_lx##LX), &err); \
72 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &dtx)); \
73 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &x)); \
74 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &dr)); \
75 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &ds)); \
76 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &dt)); \
77 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dxt)); \
78 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dyt)); \
79 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dzt)); \
80 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &w3)); \
82 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
83 kernel, 1, NULL, &global_item_size, \
84 &local_item_size, 0, NULL, NULL)); \
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)