49#include "coef_kernel.cl.h"
55 void *G22,
void *G23,
void *G33,
74 cl_kernel kernel = clCreateKernel(coef_program, \
75 STR(coef_generate_geo_kernel_lx##LX), &err); \
78 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &G11)); \
79 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &G12)); \
80 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &G13)); \
81 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &G22)); \
82 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &G23)); \
83 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &G33)); \
84 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &drdx)); \
85 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &drdy)); \
86 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &drdz)); \
87 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dsdx)); \
88 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dsdy)); \
89 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dsdz)); \
90 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dtdx)); \
91 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &dtdy)); \
92 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &dtdz)); \
93 CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &jacinv)); \
94 CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_mem), (void *) &w3)); \
95 CL_CHECK(clSetKernelArg(kernel, 17, sizeof(int), gdim)); \
97 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
98 kernel, 1, NULL, &global_item_size, \
99 &local_item_size, 0, NULL, NULL)); \
100 CL_CHECK(clReleaseKernel(kernel)); \
129 void *dxdr,
void *dydr,
void *dzdr,
130 void *dxds,
void *dyds,
void *dzds,
131 void *dxdt,
void *dydt,
void *dzdt,
132 void *
dx,
void *
dy,
void *
dz,
133 void *
x,
void *y,
void *z,
142 const int n = (*nel) * (*lx) * (*lx) * (*lx);
148#define DXYZDRST_CASE(LX) \
151 cl_kernel kernel = clCreateKernel(coef_program, \
152 STR(coef_generate_dxyz_kernel_lx##LX), &err); \
155 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &dxdr)); \
156 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &dydr)); \
157 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &dzdr)); \
158 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &dxds)); \
159 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &dyds)); \
160 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dzds)); \
161 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dxdt)); \
162 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dydt)); \
163 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &dzdt)); \
164 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dx)); \
165 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dy)); \
166 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dz)); \
167 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &x)); \
168 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &y)); \
169 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &z)); \
171 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
172 kernel, 1, NULL, &global_item_size_dxyz, \
173 &local_item_size, 0, NULL, NULL)); \
174 CL_CHECK(clReleaseKernel(kernel)); \
197 "coef_generate_drst_kernel", &
err);
232 void *
w3,
int *lxyz,
int *nel) {
242 "coef_generate_mass_kernel", &
err);
262 void *nx,
void *ny,
void *nz,
263 void *dxdr,
void *dydr,
void *dzdr,
264 void *dxds,
void *dyds,
void *dzds,
265 void *dxdt,
void *dydt,
void *dzdt,
266 void *wx,
void *wy,
void *wz,
277#define AREA_CASE(LX) \
280 printf("%s\n", STR(coef_generate_area_and_normal_kernel_lx##LX)); \
282 clCreateKernel(coef_program, \
283 STR(coef_generate_area_and_normal_kernel_lx##LX), &err); \
287 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &area)); \
288 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &nx)); \
289 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &ny)); \
290 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &nz)); \
291 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &dxdr)); \
292 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dydr)); \
293 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dzdr)); \
294 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dxds)); \
295 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &dyds)); \
296 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dzds)); \
297 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dxdt)); \
298 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dydt)); \
299 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dzdt)); \
300 CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &wx)); \
301 CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &wy)); \
302 CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &wz)); \
303 CL_CHECK(clSetKernelArg(kernel, 16, sizeof(real), &eps)); \
305 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
306 kernel, 1, NULL, &global_item_size, \
307 &local_item_size, 0, NULL, NULL)); \
308 CL_CHECK(clReleaseKernel(kernel)); \
void opencl_coef_generate_mass(void *B, void *Binv, void *jac, void *w3, int *lxyz, int *nel)
void opencl_coef_generate_geo(void *G11, void *G12, void *G13, void *G22, void *G23, void *G33, void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *jacinv, void *w3, int *nel, int *lx, int *gdim)
void opencl_coef_generate_dxyzdrst(void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *dxdr, void *dydr, void *dzdr, void *dxds, void *dyds, void *dzds, void *dxdt, void *dydt, void *dzdt, void *dx, void *dy, void *dz, void *x, void *y, void *z, void *jacinv, void *jac, int *lx, int *nel)
void opencl_coef_generate_area_and_normal(void *area, void *nx, void *ny, void *nz, void *dxdr, void *dydr, void *dzdr, void *dxds, void *dyds, void *dzds, void *dxdt, void *dydt, void *dzdt, void *wx, void *wy, void *wz, int *lx, int *nel, real eps)
#define DXYZDRST_CASE(LX)
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void opencl_kernel_jit(const char *kernel, cl_program *program)