47#include "tensor_kernel.cl.h"
49#define MAX(a,b) (((a)>(b))?(a):(b))
52 void *
A,
void *
Bt,
void *
Ct,
int *nel) {
54 const int n =
MAX(*nu, *
nv);
66 cl_kernel kernel = clCreateKernel(tensor_program, \
67 STR(tnsr3d_kernel_n##N), &err); \
70 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v)); \
71 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv)); \
72 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u)); \
73 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu)); \
74 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A)); \
75 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt)); \
76 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct)); \
78 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\
79 1, NULL, &global_item_size, \
80 &local_item_size, 0, NULL, NULL)); \
102 void *
A,
void *
Bt,
void *
Ct,
int *elements,
105 const int n =
MAX(*nu, *
nv);
117 cl_kernel kernel = clCreateKernel(tensor_program, \
118 STR(tnsr3d_el_kernel_n##N), &err); \
121 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v)); \
122 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv)); \
123 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u)); \
124 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu)); \
125 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A)); \
126 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt)); \
127 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct)); \
128 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &elements)); \
129 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n_points)); \
131 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\
132 1, NULL, &global_item_size, \
133 &local_item_size, 0, NULL, NULL)); \
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
void opencl_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)