Neko
0.8.1
A portable framework for high-order spectral element flow simulations
|
Go to the source code of this file.
Functions | |
template<typename T > | |
__global__ void | cmult_kernel (T *__restrict__ a, const T c, const int n) |
template<typename T > | |
__global__ void | masked_copy_kernel (T *__restrict__ a, T *__restrict__ b, int *__restrict__ mask, const int n, const int m) |
template<typename T > | |
__global__ void | cmult2_kernel (T *__restrict__ a, T *__restrict__ b, const T c, const int n) |
template<typename T > | |
__global__ void | cadd_kernel (T *__restrict__ a, const T c, const int n) |
template<typename T > | |
__global__ void | cfill_kernel (T *__restrict__ a, const T c, const int n) |
template<typename T > | |
__global__ void | add2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n) |
template<typename T > | |
__global__ void | add2s1_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n) |
template<typename T > | |
__global__ void | add2s2_many_kernel (T *__restrict__ x, const T **p, const T *alpha, const int p_cur, const int n) |
template<typename T > | |
__global__ void | add2s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n) |
template<typename T > | |
__global__ void | addsqr2s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n) |
template<typename T > | |
__global__ void | add3s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T c1, const T c2, const int n) |
template<typename T > | |
__global__ void | invcol1_kernel (T *__restrict__ a, const int n) |
template<typename T > | |
__global__ void | invcol2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n) |
template<typename T > | |
__global__ void | col2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n) |
template<typename T > | |
__global__ void | col3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n) |
template<typename T > | |
__global__ void | subcol3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n) |
template<typename T > | |
__global__ void | sub2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n) |
template<typename T > | |
__global__ void | sub3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n) |
template<typename T > | |
__global__ void | addcol3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n) |
template<typename T > | |
__global__ void | addcol4_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T *__restrict__ d, const int n) |
template<typename T > | |
__global__ void | vdot3_kernel (T *__restrict__ dot, const T *__restrict__ u1, const T *__restrict__ u2, const T *__restrict__ u3, const T *__restrict__ v1, const T *__restrict__ v2, const T *__restrict__ v3, const int n) |
template<typename T > | |
__inline__ __device__ T | reduce_warp (T val) |
template<typename T > | |
__global__ void | reduce_kernel (T *bufred, const int n) |
template<typename T > | |
__global__ void | glsc3_reduce_kernel (T *bufred, const int n, const int j) |
template<typename T > | |
__global__ void | glsc3_kernel (const T *a, const T *b, const T *c, T *buf_h, const int n) |
template<typename T > | |
__global__ void | glsc3_many_kernel (const T *a, const T **b, const T *c, T *buf_h, const int j, const int n) |
template<typename T > | |
__global__ void | glsc2_kernel (const T *a, const T *b, T *buf_h, const int n) |
template<typename T > | |
__global__ void | glsum_kernel (const T *a, T *buf_h, const int n) |
__global__ void add2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for add2
Definition at line 125 of file math_kernel.h.
__global__ void add2s1_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for add2s1
Definition at line 141 of file math_kernel.h.
__global__ void add2s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for add2s2
Definition at line 181 of file math_kernel.h.
__global__ void add2s2_many_kernel | ( | T *__restrict__ | x, |
const T ** | p, | ||
const T * | alpha, | ||
const int | p_cur, | ||
const int | n | ||
) |
Device kernel for add2s2 many
Definition at line 158 of file math_kernel.h.
__global__ void add3s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const T | c1, | ||
const T | c2, | ||
const int | n | ||
) |
Device kernel for add3s2
Definition at line 215 of file math_kernel.h.
__global__ void addcol3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for addcol3
Definition at line 349 of file math_kernel.h.
__global__ void addcol4_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const T *__restrict__ | d, | ||
const int | n | ||
) |
Device kernel for addcol4
Definition at line 367 of file math_kernel.h.
__global__ void addsqr2s2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T | c1, | ||
const int | n | ||
) |
Device kernel for addsqr2s2
Definition at line 198 of file math_kernel.h.
__global__ void cadd_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cadd
Definition at line 93 of file math_kernel.h.
__global__ void cfill_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cfill
Definition at line 109 of file math_kernel.h.
__global__ void cmult2_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
const T | c, | ||
const int | n | ||
) |
Device kernel for cmult2
Definition at line 76 of file math_kernel.h.
__global__ void cmult_kernel | ( | T *__restrict__ | a, |
const T | c, | ||
const int | n | ||
) |
Device kernel for cmult
Definition at line 41 of file math_kernel.h.
__global__ void col2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for col2
Definition at line 266 of file math_kernel.h.
__global__ void col3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for col3
Definition at line 282 of file math_kernel.h.
__global__ void glsc2_kernel | ( | const T * | a, |
const T * | b, | ||
T * | buf_h, | ||
const int | n | ||
) |
Device kernel for glsc2
Definition at line 564 of file math_kernel.h.
__global__ void glsc3_kernel | ( | const T * | a, |
const T * | b, | ||
const T * | c, | ||
T * | buf_h, | ||
const int | n | ||
) |
Device kernel for glsc3
Definition at line 488 of file math_kernel.h.
__global__ void glsc3_many_kernel | ( | const T * | a, |
const T ** | b, | ||
const T * | c, | ||
T * | buf_h, | ||
const int | j, | ||
const int | n | ||
) |
Device kernel for glsc3 many
Definition at line 523 of file math_kernel.h.
__global__ void glsc3_reduce_kernel | ( | T * | bufred, |
const int | n, | ||
const int | j | ||
) |
Reduction kernel for glsc3
Definition at line 453 of file math_kernel.h.
__global__ void glsum_kernel | ( | const T * | a, |
T * | buf_h, | ||
const int | n | ||
) |
Device kernel for glsum
Definition at line 599 of file math_kernel.h.
__global__ void invcol1_kernel | ( | T *__restrict__ | a, |
const int | n | ||
) |
Device kernel for invcol1
Definition at line 234 of file math_kernel.h.
__global__ void invcol2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for invcol2
Definition at line 250 of file math_kernel.h.
__global__ void masked_copy_kernel | ( | T *__restrict__ | a, |
T *__restrict__ | b, | ||
int *__restrict__ | mask, | ||
const int | n, | ||
const int | m | ||
) |
Device kernel for masked copy
Definition at line 57 of file math_kernel.h.
__global__ void reduce_kernel | ( | T * | bufred, |
const int | n | ||
) |
Vector reduction kernel
Definition at line 421 of file math_kernel.h.
__inline__ __device__ T reduce_warp | ( | T | val | ) |
Warp shuffle reduction
Definition at line 408 of file math_kernel.h.
__global__ void sub2_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const int | n | ||
) |
Device kernel for sub2
Definition at line 316 of file math_kernel.h.
__global__ void sub3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for sub3
Definition at line 332 of file math_kernel.h.
__global__ void subcol3_kernel | ( | T *__restrict__ | a, |
const T *__restrict__ | b, | ||
const T *__restrict__ | c, | ||
const int | n | ||
) |
Device kernel for subcol3
Definition at line 299 of file math_kernel.h.
__global__ void vdot3_kernel | ( | T *__restrict__ | dot, |
const T *__restrict__ | u1, | ||
const T *__restrict__ | u2, | ||
const T *__restrict__ | u3, | ||
const T *__restrict__ | v1, | ||
const T *__restrict__ | v2, | ||
const T *__restrict__ | v3, | ||
const int | n | ||
) |
Device kernel for vdot3
Definition at line 386 of file math_kernel.h.