37 #include <OpenCL/cl.h>
49 #include "coef_kernel.cl.h"
55 void *G22,
void *G23,
void *G33,
67 const size_t global_item_size = 256 * (*nel);
68 const size_t local_item_size = 256;
71 #define GEO_CASE(LX) \
74 cl_kernel kernel = clCreateKernel(coef_program, \
75 STR(coef_generate_geo_kernel_lx##LX), &err); \
78 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &G11)); \
79 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &G12)); \
80 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &G13)); \
81 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &G22)); \
82 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &G23)); \
83 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &G33)); \
84 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &drdx)); \
85 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &drdy)); \
86 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &drdz)); \
87 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dsdx)); \
88 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dsdy)); \
89 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dsdz)); \
90 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dtdx)); \
91 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &dtdy)); \
92 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &dtdz)); \
93 CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &jacinv)); \
94 CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_mem), (void *) &w3)); \
95 CL_CHECK(clSetKernelArg(kernel, 17, sizeof(int), gdim)); \
97 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
98 kernel, 1, NULL, &global_item_size, \
99 &local_item_size, 0, NULL, NULL)); \
128 void *dxdr,
void *dydr,
void *dzdr,
129 void *dxds,
void *dyds,
void *dzds,
130 void *dxdt,
void *dydt,
void *dzdt,
131 void *
dx,
void *
dy,
void *
dz,
132 void *
x,
void *y,
void *z,
141 const int n = (*nel) * (*lx) * (*lx) * (*lx);
142 const size_t global_item_size_dxyz = 256 * (*nel);
143 const size_t global_item_size_drst = 256 * n;
144 const size_t local_item_size = 256;
147 #define DXYZDRST_CASE(LX) \
150 cl_kernel kernel = clCreateKernel(coef_program, \
151 STR(coef_generate_dxyz_kernel_lx##LX), &err); \
154 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &dxdr)); \
155 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &dydr)); \
156 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &dzdr)); \
157 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &dxds)); \
158 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &dyds)); \
159 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dzds)); \
160 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dxdt)); \
161 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dydt)); \
162 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &dzdt)); \
163 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dx)); \
164 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dy)); \
165 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dz)); \
166 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &x)); \
167 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &y)); \
168 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &z)); \
170 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
171 kernel, 1, NULL, &global_item_size_dxyz, \
172 &local_item_size, 0, NULL, NULL)); \
195 "coef_generate_drst_kernel", &err);
198 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &jac));
199 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &
jacinv));
200 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &
drdx));
201 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &
drdy));
202 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &
drdz));
203 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &
dsdx));
204 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &
dsdy));
205 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &
dsdz));
206 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &
dtdx));
207 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(cl_mem), (
void *) &
dtdy));
208 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(cl_mem), (
void *) &
dtdz));
209 CL_CHECK(clSetKernelArg(kernel, 11,
sizeof(cl_mem), (
void *) &dxdr));
210 CL_CHECK(clSetKernelArg(kernel, 12,
sizeof(cl_mem), (
void *) &dydr));
211 CL_CHECK(clSetKernelArg(kernel, 13,
sizeof(cl_mem), (
void *) &dzdr));
212 CL_CHECK(clSetKernelArg(kernel, 14,
sizeof(cl_mem), (
void *) &dxds));
213 CL_CHECK(clSetKernelArg(kernel, 15,
sizeof(cl_mem), (
void *) &dyds));
214 CL_CHECK(clSetKernelArg(kernel, 16,
sizeof(cl_mem), (
void *) &dzds));
215 CL_CHECK(clSetKernelArg(kernel, 17,
sizeof(cl_mem), (
void *) &dxdt));
216 CL_CHECK(clSetKernelArg(kernel, 18,
sizeof(cl_mem), (
void *) &dydt));
217 CL_CHECK(clSetKernelArg(kernel, 19,
sizeof(cl_mem), (
void *) &dzdt));
218 CL_CHECK(clSetKernelArg(kernel, 20,
sizeof(
int), &n));
221 kernel, 1, NULL, &global_item_size_drst,
222 &local_item_size, 0, NULL, NULL));
void opencl_coef_generate_geo(void *G11, void *G12, void *G13, void *G22, void *G23, void *G33, void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *jacinv, void *w3, int *nel, int *lx, int *gdim)
void opencl_coef_generate_dxyzdrst(void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *dxdr, void *dydr, void *dzdr, void *dxds, void *dyds, void *dzds, void *dxdt, void *dydt, void *dzdt, void *dx, void *dy, void *dz, void *x, void *y, void *z, void *jacinv, void *jac, int *lx, int *nel)
#define DXYZDRST_CASE(LX)
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
void opencl_kernel_jit(const char *kernel, cl_program *program)