31 #include <OpenCL/cl.h>
42 #include "pnpn_res_kernel.cl.h"
45 void *wa1,
void *wa2,
void *wa3,
46 void *f_u,
void *f_v,
void *f_w,
47 void *B,
void *
h1,
real *mu,
55 "prs_res_part1_kernel", &err);
58 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &ta1));
59 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &ta2));
60 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &ta3));
61 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &wa1));
62 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &wa2));
63 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &wa3));
64 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &f_u));
65 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &f_v));
66 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &f_w));
67 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(cl_mem), (
void *) &B));
68 CL_CHECK(clSetKernelArg(kernel, 10,
sizeof(cl_mem), (
void *) &
h1));
71 CL_CHECK(clSetKernelArg(kernel, 13,
sizeof(
int), n));
73 const int nb = ((*n) + 256 - 1) / 256;
74 const size_t global_item_size = 256 * nb;
75 const size_t local_item_size = 256;
78 NULL, &global_item_size, &local_item_size,
90 "prs_res_part2_kernel", &err);
93 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &p_res));
94 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &wa1));
95 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &wa2));
96 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &wa3));
97 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(
int), n));
99 const int nb = ((*n) + 256 - 1) / 256;
100 const size_t global_item_size = 256 * nb;
101 const size_t local_item_size = 256;
104 NULL, &global_item_size, &local_item_size,
109 void *ta3,
real *dtbd,
int *n) {
116 "prs_res_part3_kernel", &err);
119 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &p_res));
120 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &ta1));
121 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &ta2));
122 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &ta3));
123 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(
real), dtbd));
124 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(
int), n));
126 const int nb = ((*n) + 256 - 1) / 256;
127 const size_t global_item_size = 256 * nb;
128 const size_t local_item_size = 256;
131 NULL, &global_item_size, &local_item_size,
136 void *ta1,
void *ta2,
void *ta3,
137 void *f_u,
void *f_v,
void *f_w,
int *n) {
144 "vel_res_update_kernel", &err);
147 CL_CHECK(clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void *) &u_res));
148 CL_CHECK(clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void *) &v_res));
149 CL_CHECK(clSetKernelArg(kernel, 2,
sizeof(cl_mem), (
void *) &w_res));
150 CL_CHECK(clSetKernelArg(kernel, 3,
sizeof(cl_mem), (
void *) &ta1));
151 CL_CHECK(clSetKernelArg(kernel, 4,
sizeof(cl_mem), (
void *) &ta2));
152 CL_CHECK(clSetKernelArg(kernel, 5,
sizeof(cl_mem), (
void *) &ta3));
153 CL_CHECK(clSetKernelArg(kernel, 6,
sizeof(cl_mem), (
void *) &f_u));
154 CL_CHECK(clSetKernelArg(kernel, 7,
sizeof(cl_mem), (
void *) &f_v));
155 CL_CHECK(clSetKernelArg(kernel, 8,
sizeof(cl_mem), (
void *) &f_w));
156 CL_CHECK(clSetKernelArg(kernel, 9,
sizeof(
int), n));
158 const int nb = ((*n) + 256 - 1) / 256;
159 const size_t global_item_size = 256 * nb;
160 const size_t local_item_size = 256;
163 NULL, &global_item_size, &local_item_size,
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
void opencl_kernel_jit(const char *kernel, cl_program *program)
void pnpn_prs_res_part3_opencl(void *p_res, void *ta1, void *ta2, void *ta3, real *dtbd, int *n)
void pnpn_prs_res_part1_opencl(void *ta1, void *ta2, void *ta3, void *wa1, void *wa2, void *wa3, void *f_u, void *f_v, void *f_w, void *B, void *h1, real *mu, real *rho, int *n)
void pnpn_prs_res_part2_opencl(void *p_res, void *wa1, void *wa2, void *wa3, int *n)
void pnpn_vel_res_update_opencl(void *u_res, void *v_res, void *w_res, void *ta1, void *ta2, void *ta3, void *f_u, void *f_v, void *f_w, int *n)