36 #include <OpenCL/cl.h>
47 #include "rhs_maker_kernel.cl.h"
50 void *uu,
void *vv,
void *ww,
51 void *ulag1,
void *ulag2,
void *vlag1,
52 void *vlag2,
void *wlag1,
void *wlag2,
62 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &
u));
63 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &
v));
64 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &
w));
65 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &uu));
66 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &vv));
67 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &ww));
68 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &ulag1));
69 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &ulag2));
70 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &vlag1));
71 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(cl_mem), (
void *) &vlag2));
72 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(cl_mem), (
void *) &wlag1));
73 CL_CHECK(clSetKernelArg(kernel, 11,
sizeof(cl_mem), (
void *) &wlag2));
74 CL_CHECK(clSetKernelArg(kernel, 12,
sizeof(
real), ext1));
75 CL_CHECK(clSetKernelArg(kernel, 13,
sizeof(
real), ext2));
76 CL_CHECK(clSetKernelArg(kernel, 14,
sizeof(
real), ext3));
77 CL_CHECK(clSetKernelArg(kernel, 15,
sizeof(
int), nab));
78 CL_CHECK(clSetKernelArg(kernel, 16,
sizeof(
int), n));
80 const int nb = ((*n) + 256 - 1) / 256;
81 const size_t global_item_size = 256 * nb;
82 const size_t local_item_size = 256;
85 NULL, &global_item_size, &local_item_size,
90 void *abx2,
void *aby2,
void *abz2,
91 void *bfx,
void *bfy,
void *bfz,
101 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &abx1));
102 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &aby1));
103 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &abz1));
104 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &abx2));
105 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &aby2));
106 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &abz2));
107 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &bfx));
108 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &bfy));
109 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &bfz));
111 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(
real), ext1));
112 CL_CHECK(clSetKernelArg(kernel, 11,
sizeof(
real), ext2));
113 CL_CHECK(clSetKernelArg(kernel, 12,
sizeof(
real), ext3));
114 CL_CHECK(clSetKernelArg(kernel, 13,
sizeof(
int), n));
116 const int nb = ((*n) + 256 - 1) / 256;
117 const size_t global_item_size = 256 * nb;
118 const size_t local_item_size = 256;
121 NULL, &global_item_size, &local_item_size,
128 real *ext3,
int *n) {
134 cl_kernel kernel = clCreateKernel(
rhs_maker_program,
"scalar_makeext_kernel", &err);
137 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &fs_lag));
138 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &fs_laglag));
139 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &fs));
141 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(
real), ext1));
142 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(
real), ext2));
143 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(
real), ext3));
144 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(
int), n));
146 const int nb = ((*n) + 256 - 1) / 256;
147 const size_t global_item_size = 256 * nb;
148 const size_t local_item_size = 256;
151 NULL, &global_item_size, &local_item_size,
157 void *vlag2,
void *wlag1,
void *wlag2,
158 void *bfx,
void *bfy,
void *bfz,
159 void *
u,
void *
v,
void *
w,
void *B,
161 real *bd3,
real *bd4,
int *nbd,
int *n) {
170 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &ulag1));
171 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &ulag2));
172 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &vlag1));
173 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &vlag2));
174 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &wlag1));
175 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &wlag2));
176 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &bfx));
177 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &bfy));
178 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &bfz));
179 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(cl_mem), (
void *) &
u));
180 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(cl_mem), (
void *) &
v));
181 CL_CHECK(clSetKernelArg(kernel, 11,
sizeof(cl_mem), (
void *) &
w));
182 CL_CHECK(clSetKernelArg(kernel, 12,
sizeof(cl_mem), (
void *) &B));
183 CL_CHECK(clSetKernelArg(kernel, 13,
sizeof(
real), rho));
185 CL_CHECK(clSetKernelArg(kernel, 15,
sizeof(
real), bd2));
186 CL_CHECK(clSetKernelArg(kernel, 16,
sizeof(
real), bd3));
187 CL_CHECK(clSetKernelArg(kernel, 17,
sizeof(
real), bd4));
188 CL_CHECK(clSetKernelArg(kernel, 18,
sizeof(
int), nbd));
189 CL_CHECK(clSetKernelArg(kernel, 19,
sizeof(
int), n));
191 const int nb = ((*n) + 256 - 1) / 256;
192 const size_t global_item_size = 256 * nb;
193 const size_t local_item_size = 256;
196 NULL, &global_item_size, &local_item_size,
210 cl_kernel kernel = clCreateKernel(
rhs_maker_program,
"scalar_makebdf_kernel", &err);
213 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &s_lag));
214 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &s_laglag));
215 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &fs));
216 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &s));
217 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &B));
223 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(
int), nbd));
224 CL_CHECK(clSetKernelArg(kernel, 11,
sizeof(
int), n));
226 const int nb = ((*n) + 256 - 1) / 256;
227 const size_t global_item_size = 256 * nb;
228 const size_t local_item_size = 256;
231 NULL, &global_item_size, &local_item_size,
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
void opencl_kernel_jit(const char *kernel, cl_program *program)
void scalar_rhs_maker_bdf_opencl(void *s_lag, void *s_laglag, void *fs, void *s, void *B, real *rho, real *dt, real *bd2, real *bd3, real *bd4, int *nbd, int *n)
void rhs_maker_sumab_opencl(void *u, void *v, void *w, void *uu, void *vv, void *ww, void *ulag1, void *ulag2, void *vlag1, void *vlag2, void *wlag1, void *wlag2, real *ext1, real *ext2, real *ext3, int *nab, int *n)
void rhs_maker_ext_opencl(void *abx1, void *aby1, void *abz1, void *abx2, void *aby2, void *abz2, void *bfx, void *bfy, void *bfz, real *rho, real *ext1, real *ext2, real *ext3, int *n)
void scalar_rhs_maker_ext_opencl(void *fs_lag, void *fs_laglag, void *fs, real *rho, real *ext1, real *ext2, real *ext3, int *n)
void rhs_maker_bdf_opencl(void *ulag1, void *ulag2, void *vlag1, void *vlag2, void *wlag1, void *wlag2, void *bfx, void *bfy, void *bfz, void *u, void *v, void *w, void *B, real *rho, real *dt, real *bd2, real *bd3, real *bd4, int *nbd, int *n)