36 #include <OpenCL/cl.h>
47 #include "jacobi_kernel.cl.h"
54 void *G11,
void *G22,
void *G33,
55 void *G12,
void *G13,
void *G23,
62 const int nb = (((*nel) * (*lx) * (*lx) * (*lx)) + 256 - 1) / 256;
63 const size_t global_item_size = 256 * nb;
64 const size_t local_item_size = 256;
70 cl_kernel kernel = clCreateKernel(jacobi_program, \
71 STR(jacobi_kernel_lx##LX), &err); \
74 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &d)); \
75 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &dxt)); \
76 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &dyt)); \
77 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &dzt)); \
78 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &G11)); \
79 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &G22)); \
80 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &G33)); \
81 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &G12)); \
82 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &G13)); \
83 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &G23)); \
84 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), nel)); \
87 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
88 kernel, 1, NULL, &global_item_size, \
89 &local_item_size, 0, NULL, NULL)); \
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_jacobi_update(void *d, void *dxt, void *dyt, void *dzt, void *G11, void *G22, void *G33, void *G12, void *G13, void *G23, int *nel, int *lx)
__global__ void jacobi_kernel(T *__restrict__ du, const T *__restrict__ dxt, const T *__restrict__ dyt, const T *__restrict__ dzt, const T *__restrict__ G11, const T *__restrict__ G22, const T *__restrict__ G33, const T *__restrict__ G12, const T *__restrict__ G13, const T *__restrict__ G23, const int nel)