48#include "math_kernel.cl.h"
55 b,
a, 0, 0, (*n) *
sizeof(
real),
77 const int nb = ((*n) + 256 - 1) / 256;
105 const int nb = ((*mask_size) + 256 - 1) / 256;
157 const int nb = ((*n) + 256 - 1) / 256;
183 const int nb = ((*n) + 256 - 1) / 256;
208 const int nb = ((*n) + 256 - 1) / 256;
234 const int nb = ((*n) + 256 - 1) / 256;
259 const int nb = ((*n) + 256 - 1) / 256;
285 const int nb = ((*n) + 256 - 1) / 256;
312 const int nb = ((*n) + 256 - 1) / 256;
340 const int nb = ((*n) + 256 - 1) / 256;
368 const int nb = ((*n) + 256 - 1) / 256;
396 const int nb = ((*n) + 256 - 1) / 256;
426 const int nb = ((*n) + 256 - 1) / 256;
455 const int nb = ((*n) + 256 - 1) / 256;
484 const int nb = ((*n) + 256 - 1) / 256;
508 const int nb = ((*n) + 256 - 1) / 256;
534 const int nb = ((*n) + 256 - 1) / 256;
560 const int nb = ((*n) + 256 - 1) / 256;
587 const int nb = ((*n) + 256 - 1) / 256;
614 const int nb = ((*n) + 256 - 1) / 256;
640 const int nb = ((*n) + 256 - 1) / 256;
667 const int nb = ((*n) + 256 - 1) / 256;
694 const int nb = ((*n) + 256 - 1) / 256;
722 const int nb = ((*n) + 256 - 1) / 256;
737 void *v1,
void *v2,
void *v3,
int *n) {
755 const int nb = ((*n) + 256 - 1) / 256;
781 const int nb = ((*n) + 256 - 1) / 256;
816 for (
i = 0;
i <
nb;
i++) {
840 const int nt = 256 /
pow2;
841 const int nb = ((*n) + nt - 1) / nt;
876 for (
k = 0;
k < (*j);
k++) {
880 for (
i = 0;
i <
nb;
i++) {
881 for (
k = 0;
k < (*j);
k++) {
899 const int nb = ((*n) + 256 - 1) / 256;
926 for (
i = 0;
i <
nb;
i++) {
948 const int nb = ((*n) + 256 - 1) / 256;
974 for (
i = 0;
i <
nb;
i++) {
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void const T *__restrict__ x
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
void opencl_kernel_jit(const char *kernel, cl_program *program)
void opencl_add3(void *a, void *b, void *c, int *n)
void opencl_addcol3(void *a, void *b, void *c, int *n)
void opencl_invcol1(void *a, int *n)
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n)
void opencl_cmult(void *a, real *c, int *n)
void opencl_sub3(void *a, void *b, void *c, int *n)
void opencl_rone(void *a, int *n)
void opencl_cadd(void *a, real *c, int *n)
void opencl_cmult2(void *a, void *b, real *c, int *n)
void opencl_add4(void *a, void *b, void *c, void *d, int *n)
real opencl_glsc3(void *a, void *b, void *c, int *n)
void opencl_add2s2(void *a, void *b, real *c1, int *n)
void opencl_rzero(void *a, int *n)
void opencl_sub2(void *a, void *b, int *n)
void opencl_col2(void *a, void *b, int *n)
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n)
void opencl_col3(void *a, void *b, void *c, int *n)
void opencl_subcol3(void *a, void *b, void *c, int *n)
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n)
void opencl_invcol2(void *a, void *b, int *n)
void opencl_cadd2(void *a, void *b, real *c, int *n)
void opencl_add2(void *a, void *b, int *n)
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m)
void opencl_cfill_mask(void *a, void *c, int *size, void *mask, int *mask_size)
void opencl_cfill(void *a, real *c, int *n)
void opencl_add2s1(void *a, void *b, real *c1, int *n)
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
real opencl_glsc2(void *a, void *b, int *n)
real opencl_glsum(void *a, int *n)
void opencl_copy(void *a, void *b, int *n)