47#include "tensor_kernel.cl.h"
49#define MAX(a,b) (((a)>(b))?(a):(b))
52 void *
A,
void *
Bt,
void *
Ct,
int *nel) {
54 const int n =
MAX(*nu, *
nv);
66 cl_kernel kernel = clCreateKernel(tensor_program, \
67 STR(tnsr3d_kernel_n##N), &err); \
70 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v)); \
71 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv)); \
72 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u)); \
73 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu)); \
74 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A)); \
75 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt)); \
76 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct)); \
78 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\
79 1, NULL, &global_item_size, \
80 &local_item_size, 0, NULL, NULL)); \
81 CL_CHECK(clReleaseKernel(kernel)); \
103 void *
A,
void *
Bt,
void *
Ct,
int *elements,
106 const int n =
MAX(*nu, *
nv);
118 cl_kernel kernel = clCreateKernel(tensor_program, \
119 STR(tnsr3d_el_kernel_n##N), &err); \
122 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v)); \
123 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv)); \
124 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u)); \
125 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu)); \
126 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A)); \
127 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt)); \
128 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct)); \
129 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &elements)); \
130 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n_points)); \
132 CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\
133 1, NULL, &global_item_size, \
134 &local_item_size, 0, NULL, NULL)); \
135 CL_CHECK(clReleaseKernel(kernel)); \
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
void opencl_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)