47#include "tensor_kernel.cl.h" 
   49#define MAX(a,b) (((a)>(b))?(a):(b)) 
   52                   void *
A, 
void *
Bt, 
void *
Ct, 
int *nel) {
 
   54  const int n = 
MAX(*nu, *
nv);
 
   66      cl_kernel kernel = clCreateKernel(tensor_program,                        \ 
   67                                        STR(tnsr3d_kernel_n##N), &err);        \ 
   70      CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v));        \ 
   71      CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv));                    \ 
   72      CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u));        \ 
   73      CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu));                    \ 
   74      CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A));        \ 
   75      CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt));       \ 
   76      CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct));       \ 
   78      CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\ 
   79                                      1, NULL, &global_item_size,              \ 
   80                                      &local_item_size, 0, NULL, NULL));       \ 
 
  102                           void *
A, 
void *
Bt, 
void *
Ct, 
int *elements,
 
  105  const int n = 
MAX(*nu, *
nv);
 
  117      cl_kernel kernel = clCreateKernel(tensor_program,                        \ 
  118                                        STR(tnsr3d_el_kernel_n##N), &err);     \ 
  121      CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &v));        \ 
  122      CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), nv));                    \ 
  123      CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &u));        \ 
  124      CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), nu));                    \ 
  125      CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &A));        \ 
  126      CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &Bt));       \ 
  127      CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &Ct));       \ 
  128      CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &elements)); \ 
  129      CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n_points));              \ 
  131      CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel,\ 
  132                                      1, NULL, &global_item_size,              \ 
  133                                      &local_item_size, 0, NULL, NULL));       \ 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void opencl_kernel_jit(const char *kernel, cl_program *program)
 
void opencl_tnsr3d(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *nel)
 
void opencl_tnsr3d_el_list(void *v, int *nv, void *u, int *nu, void *A, void *Bt, void *Ct, int *elements, int *n_points)