48#include "math_kernel.cl.h" 
   55                               b, 
a, 0, 0, (*n) * 
sizeof(
real),
 
 
   78  const int nb = ((*n) + 256 - 1) / 256;
 
 
  108  const int nb = ((*n) + 256 - 1) / 256;
 
 
  138  const int nb = ((*n) + 256 - 1) / 256;
 
 
  167  const int nb = ((*mask_size) + 256 - 1) / 256;
 
 
  216  const int nb = ((*n) + 256 - 1) / 256;
 
 
  243  const int nb = ((*n) + 256 - 1) / 256;
 
 
  268  const int nb = ((*n) + 256 - 1) / 256;
 
 
  295  const int nb = ((*n) + 256 - 1) / 256;
 
 
  320  const int nb = ((*n) + 256 - 1) / 256;
 
 
  347  const int nb = ((*n) + 256 - 1) / 256;
 
 
  372  const int nb = ((*n) + 256 - 1) / 256;
 
 
  398  const int nb = ((*n) + 256 - 1) / 256;
 
 
  426  const int nb = ((*n) + 256 - 1) / 256;
 
 
  455  const int nb = ((*n) + 256 - 1) / 256;
 
 
  484  const int nb = ((*n) + 256 - 1) / 256;
 
 
  513  const int nb = ((*n) + 256 - 1) / 256;
 
 
  544  const int nb = ((*n) + 256 - 1) / 256;
 
 
  574  const int nb = ((*n) + 256 - 1) / 256;
 
 
  604  const int nb = ((*n) + 256 - 1) / 256;
 
 
  636  const int nb = ((*n) + 256 - 1) / 256;
 
 
  671  const int nb = ((*n) + 256 - 1) / 256;
 
 
  696  const int nb = ((*n) + 256 - 1) / 256;
 
 
  722  const int nb = ((*n) + 256 - 1) / 256;
 
 
  748  const int nb = ((*n) + 256 - 1) / 256;
 
 
  776  const int nb = ((*n) + 256 - 1) / 256;
 
 
  804  const int nb = ((*n) + 256 - 1) / 256;
 
 
  830  const int nb = ((*n) + 256 - 1) / 256;
 
 
  858  const int nb = ((*n) + 256 - 1) / 256;
 
 
  886  const int nb = ((*n) + 256 - 1) / 256;
 
 
  915  const int nb = ((*n) + 256 - 1) / 256;
 
 
  944  const int nb = ((*n) + 256 - 1) / 256;
 
 
  959                  void *v1, 
void *v2, 
void *v3, 
int *n,
 
  978  const int nb = ((*n) + 256 - 1) / 256;
 
 
  993                   void *v1, 
void *v2, 
void *v3,
 
  994                   void *w1, 
void *w2, 
void *
w3,
 
 1016  const int nb = ((*n) + 256 - 1) / 256;
 
 
 1044  const int nb = ((*n) + 256 - 1) / 256;
 
 1079  for (
i = 0; 
i < 
nb; 
i++) {
 
 
 1104  const int nt = 256 / 
pow2;
 
 1105  const int nb = ((*n) + nt - 1) / nt;
 
 1140  for (
k = 0; 
k < (*j); 
k++) {
 
 1144  for (
i = 0; 
i < 
nb; 
i++) {
 
 1145    for (
k = 0; 
k < (*j); 
k++) {
 
 
 1163  const int nb = ((*n) + 256 - 1) / 256;
 
 1190  for (
i = 0; 
i < 
nb; 
i++) {
 
 
 1212  const int nb = ((*n) + 256 - 1) / 256;
 
 1239  for (
i = 0; 
i < 
nb; 
i++) {
 
 
 1261  const int nb = ((*n) + 256 - 1) / 256;
 
 1287  for (
i = 0; 
i < 
nb; 
i++) {
 
 
 1314  const int nb = ((*n) + 256 - 1) / 256;
 
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
 
__global__ void const T *__restrict__ x
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
 
void opencl_kernel_jit(const char *kernel, cl_program *program)
 
void opencl_iadd(void *a, int *c, int *n, cl_command_queue cmd_queue)
 
void opencl_col3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
void opencl_cdiv(void *a, real *c, int *n, cl_command_queue cmd_queue)
 
void opencl_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, cl_command_queue cmd_queue)
 
void opencl_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, cl_command_queue cmd_queue)
 
void opencl_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, cl_command_queue cmd_queue)
 
void opencl_sub2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_col2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n, cl_command_queue cmd_queue)
 
void opencl_sub3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
void opencl_add2s1(void *a, void *b, real *c1, int *n, cl_command_queue cmd_queue)
 
void opencl_addcol3s2(void *a, void *b, void *c, real *s, int *n, cl_command_queue cmd_queue)
 
void opencl_invcol1(void *a, int *n, cl_command_queue cmd_queue)
 
void opencl_add3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
real opencl_glsc3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
void opencl_rone(void *a, int *n, cl_command_queue cmd_queue)
 
void opencl_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1, real *c2, real *c3, real *c4, int *n, cl_command_queue cmd_queue)
 
void opencl_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2, real *c3, int *n, cl_command_queue cmd_queue)
 
void opencl_cmult(void *a, real *c, int *n, cl_command_queue cmd_queue)
 
void opencl_cfill_mask(void *a, void *c, int *size, void *mask, int *mask_size, cl_command_queue cmd_queue)
 
void opencl_cadd2(void *a, void *b, real *c, int *n, cl_command_queue cmd_queue)
 
real opencl_glsum(void *a, int *n, cl_command_queue cmd_queue)
 
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m, cl_command_queue cmd_queue)
 
void opencl_add4(void *a, void *b, void *c, void *d, int *n, cl_command_queue cmd_queue)
 
void opencl_radd(void *a, real *c, int *n, cl_command_queue cmd_queue)
 
void opencl_add2s2(void *a, void *b, real *c1, int *n, cl_command_queue cmd_queue)
 
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, cl_command_queue cmd_queue)
 
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, cl_command_queue cmd_queue)
 
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, cl_command_queue cmd_queue)
 
real opencl_glsubnorm2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n, cl_command_queue cmd_queue)
 
void opencl_addcol3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
void opencl_rzero(void *a, int *n, cl_command_queue cmd_queue)
 
void opencl_copy(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_subcol3(void *a, void *b, void *c, int *n, cl_command_queue cmd_queue)
 
void opencl_add2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n, cl_command_queue cmd_queue)
 
void opencl_invcol2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
real opencl_glsc2(void *a, void *b, int *n, cl_command_queue cmd_queue)
 
void opencl_cdiv2(void *a, void *b, real *c, int *n, cl_command_queue cmd_queue)
 
void opencl_cfill(void *a, real *c, int *n, cl_command_queue cmd_queue)
 
void opencl_cmult2(void *a, void *b, real *c, int *n, cl_command_queue cmd_queue)
 
Object for handling masks in Neko.