Neko 1.99.1
A portable framework for high-order spectral element flow simulations
|
Go to the source code of this file.
__global__ void absval_kernel | ( | T *__restrict__ | a, |
const int | n | ||
) |
Device kernel for abs_value
Definition at line 872 of file math_kernel.h.
__global__ void add2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for add2
Definition at line 248 of file math_kernel.h.
__global__ void add2s1_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for add2s1
Definition at line 299 of file math_kernel.h.
__global__ void add2s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for add2s2
Definition at line 339 of file math_kernel.h.
__global__ void add2s2_many_kernel | ( | T *__restrict__ | x, |
const T ** | p, | ||
const T * | alpha, | ||
const int | p_cur, | ||
const int | n | ||
) |
Device kernel for add2s2 many
Definition at line 316 of file math_kernel.h.
__global__ void add3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for add3
Definition at line 264 of file math_kernel.h.
__global__ void add3s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const T | c1, | ||
const T | c2, | ||
const int | n | ||
) |
Device kernel for add3s2
Definition at line 373 of file math_kernel.h.
__global__ void add4_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const T *__restrict__ | d, | ||
const int | n | ||
) |
Device kernel for add4
Definition at line 281 of file math_kernel.h.
__global__ void addcol3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for addcol3
Definition at line 524 of file math_kernel.h.
__global__ void addcol4_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const T *__restrict__ | d, | ||
const int | n | ||
) |
Device kernel for addcol4
Definition at line 542 of file math_kernel.h.
__global__ void addsqr2s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for addsqr2s2
Definition at line 356 of file math_kernel.h.
__global__ void cadd2_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for cadd2
Definition at line 215 of file math_kernel.h.
__global__ void cadd_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cadd
Definition at line 199 of file math_kernel.h.
__global__ void cdiv2_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for cdiv2
Definition at line 182 of file math_kernel.h.
__global__ void cdiv_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cdiv
Definition at line 166 of file math_kernel.h.
__global__ void cfill_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cfill
Definition at line 232 of file math_kernel.h.
__global__ void cfill_mask_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | size, | ||
int *__restrict__ | mask, | ||
const int | mask_size | ||
) |
Device kernel for cfill_mask
Definition at line 133 of file math_kernel.h.
__global__ void cmult2_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for cmult2
Definition at line 149 of file math_kernel.h.
__global__ void cmult_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cmult
Definition at line 41 of file math_kernel.h.
__global__ void col2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for col2
Definition at line 441 of file math_kernel.h.
__global__ void col3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for col3
Definition at line 457 of file math_kernel.h.
Device kernel for glsc2
Definition at line 767 of file math_kernel.h.
Device kernel for glsc3
Definition at line 691 of file math_kernel.h.
__global__ void glsc3_many_kernel | ( | const T * | a, |
const T ** | b, | ||
const T * | c, | ||
T * | buf_h, | ||
const int | j, | ||
const int | n | ||
) |
Device kernel for glsc3 many
Definition at line 726 of file math_kernel.h.
Reduction kernel for glsc3
Definition at line 656 of file math_kernel.h.
Device kernel for glsubnorm2
Definition at line 802 of file math_kernel.h.
Device kernel for glsum
Definition at line 837 of file math_kernel.h.
__global__ void invcol1_kernel | ( | T *__restrict__ | a, |
const int | n | ||
) |
Device kernel for invcol1
Definition at line 392 of file math_kernel.h.
__global__ void invcol2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for invcol2
Definition at line 408 of file math_kernel.h.
__global__ void invcol3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for invcol3
Definition at line 424 of file math_kernel.h.
__global__ void masked_atomic_reduction_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
int *__restrict__ | mask, | ||
const int | n, | ||
const int | m | ||
) |
Device kernel for masked atomic update
Definition at line 95 of file math_kernel.h.
__global__ void masked_copy_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
int *__restrict__ | mask, | ||
const int | n, | ||
const int | n_mask | ||
) |
Device kernel for masked copy
Definition at line 115 of file math_kernel.h.
__global__ void masked_gather_copy_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
int *__restrict__ | mask, | ||
const int | n, | ||
const int | n_mask | ||
) |
Device kernel for masked gather copy
Definition at line 57 of file math_kernel.h.
__global__ void masked_scatter_copy_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
int *__restrict__ | mask, | ||
const int | n, | ||
const int | n_mask | ||
) |
Device kernel for masked scatter copy
Definition at line 76 of file math_kernel.h.
__global__ void pwmax_sca2_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for point-wise max of vector and scalar a = max(a, c)
Definition at line 920 of file math_kernel.h.
__global__ void pwmax_sca3_kernel | ( | T *__restrict__ | a, |
const T *__restrict | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for point-wise max of vector and scalar a = max(b, c)
Definition at line 933 of file math_kernel.h.
__global__ void pwmax_vec2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for point-wise max of two vectors a = max(a, b)
Definition at line 892 of file math_kernel.h.
__global__ void pwmax_vec3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for point-wise max of two vectors a = max(b, c)
Definition at line 905 of file math_kernel.h.
__global__ void pwmin_sca2_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for point-wise min of vector and scalar a = min(a, c)
Definition at line 976 of file math_kernel.h.
__global__ void pwmin_sca3_kernel | ( | T *__restrict__ | a, |
const T *__restrict | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for point-wise min of vector and scalar a = min(b, c)
Definition at line 989 of file math_kernel.h.
__global__ void pwmin_vec2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for point-wise min of two vectors a = min(a, b)
Definition at line 948 of file math_kernel.h.
__global__ void pwmin_vec3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for point-wise min of two vectors a = min(b, c)
Definition at line 961 of file math_kernel.h.
Vector reduction kernel
Definition at line 623 of file math_kernel.h.
__inline__ __device__ T reduce_warp | ( | T | val | ) |
Warp shuffle reduction
Definition at line 610 of file math_kernel.h.
__global__ void sub2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for sub2
Definition at line 491 of file math_kernel.h.
__global__ void sub3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for sub3
Definition at line 507 of file math_kernel.h.
__global__ void subcol3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for subcol3
Definition at line 474 of file math_kernel.h.
__global__ void vcross_kernel | ( | T *__restrict__ | u1, |
T *__restrict__ | u2, | ||
T *__restrict__ | u3, | ||
const T *__restrict__ | v1, | ||
const T *__restrict__ | v2, | ||
const T *__restrict__ | v3, | ||
const T *__restrict__ | w1, | ||
const T *__restrict__ | w2, | ||
const T *__restrict__ | w3, | ||
const int | n | ||
) |
Device kernel for vcross
Definition at line 583 of file math_kernel.h.
__global__ void vdot3_kernel | ( | T *__restrict__ | dot, |
const T *__restrict__ | u1, | ||
const T *__restrict__ | u2, | ||
const T *__restrict__ | u3, | ||
const T *__restrict__ | v1, | ||
const T *__restrict__ | v2, | ||
const T *__restrict__ | v3, | ||
const int | n | ||
) |
Device kernel for vdot3
Definition at line 561 of file math_kernel.h.