Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
math_kernel.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

template<typename T >
__global__ void cmult_kernel (T *__restrict__ a, const T c, const int n)
 
template<typename T >
__global__ void masked_red_copy_kernel (T *__restrict__ a, T *__restrict__ b, int *__restrict__ mask, const int n, const int m)
 
template<typename T >
__global__ void masked_atomic_reduction_kernel (T *__restrict__ a, T *__restrict__ b, int *__restrict__ mask, const int n, const int m)
 
template<typename T >
__global__ void masked_copy_kernel (T *__restrict__ a, T *__restrict__ b, int *__restrict__ mask, const int n, const int m)
 
template<typename T >
__global__ void cfill_mask_kernel (T *__restrict__ a, const T c, const int size, int *__restrict__ mask, const int mask_size)
 
template<typename T >
__global__ void cmult2_kernel (T *__restrict__ a, T *__restrict__ b, const T c, const int n)
 
template<typename T >
__global__ void cadd_kernel (T *__restrict__ a, const T c, const int n)
 
template<typename T >
__global__ void cadd2_kernel (T *__restrict__ a, T *__restrict__ b, const T c, const int n)
 
template<typename T >
__global__ void cfill_kernel (T *__restrict__ a, const T c, const int n)
 
template<typename T >
__global__ void add2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void add3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void add4_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T *__restrict__ d, const int n)
 
template<typename T >
__global__ void add2s1_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n)
 
template<typename T >
__global__ void add2s2_many_kernel (T *__restrict__ x, const T **p, const T *alpha, const int p_cur, const int n)
 
template<typename T >
__global__ void add2s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n)
 
template<typename T >
__global__ void addsqr2s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T c1, const int n)
 
template<typename T >
__global__ void add3s2_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T c1, const T c2, const int n)
 
template<typename T >
__global__ void invcol1_kernel (T *__restrict__ a, const int n)
 
template<typename T >
__global__ void invcol2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void col2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void col3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void subcol3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void sub2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void sub3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void addcol3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void addcol4_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const T *__restrict__ d, const int n)
 
template<typename T >
__global__ void vdot3_kernel (T *__restrict__ dot, const T *__restrict__ u1, const T *__restrict__ u2, const T *__restrict__ u3, const T *__restrict__ v1, const T *__restrict__ v2, const T *__restrict__ v3, const int n)
 
template<typename T >
__global__ void vcross_kernel (T *__restrict__ u1, T *__restrict__ u2, T *__restrict__ u3, const T *__restrict__ v1, const T *__restrict__ v2, const T *__restrict__ v3, const T *__restrict__ w1, const T *__restrict__ w2, const T *__restrict__ w3, const int n)
 
template<typename T >
__inline__ __device__ T reduce_warp (T val)
 
template<typename T >
__global__ void reduce_kernel (T *bufred, const int n)
 
template<typename T >
__global__ void glsc3_reduce_kernel (T *bufred, const int n, const int j)
 
template<typename T >
__global__ void glsc3_kernel (const T *a, const T *b, const T *c, T *buf_h, const int n)
 
template<typename T >
__global__ void glsc3_many_kernel (const T *a, const T **b, const T *c, T *buf_h, const int j, const int n)
 
template<typename T >
__global__ void glsc2_kernel (const T *a, const T *b, T *buf_h, const int n)
 
template<typename T >
__global__ void glsum_kernel (const T *a, T *buf_h, const int n)
 
template<typename T >
__global__ void absval_kernel (T *__restrict__ a, const int n)
 
template<typename T >
__global__ void pwmax_vec2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void pwmax_vec3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void pwmax_sca2_kernel (T *__restrict__ a, const T c, const int n)
 
template<typename T >
__global__ void pwmax_sca3_kernel (T *__restrict__ a, const T *__restrict b, const T c, const int n)
 
template<typename T >
__global__ void pwmin_vec2_kernel (T *__restrict__ a, const T *__restrict__ b, const int n)
 
template<typename T >
__global__ void pwmin_vec3_kernel (T *__restrict__ a, const T *__restrict__ b, const T *__restrict__ c, const int n)
 
template<typename T >
__global__ void pwmin_sca2_kernel (T *__restrict__ a, const T c, const int n)
 
template<typename T >
__global__ void pwmin_sca3_kernel (T *__restrict__ a, const T *__restrict b, const T c, const int n)
 

Function Documentation

◆ absval_kernel()

template<typename T >
__global__ void absval_kernel ( T *__restrict__  a,
const int  n 
)

Device kernel for abs_value

Definition at line 768 of file math_kernel.h.

Here is the call graph for this function:

◆ add2_kernel()

template<typename T >
__global__ void add2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for add2

Definition at line 196 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s1_kernel()

template<typename T >
__global__ void add2s1_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T  c1,
const int  n 
)

Device kernel for add2s1

Definition at line 247 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s2_kernel()

template<typename T >
__global__ void add2s2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T  c1,
const int  n 
)

Device kernel for add2s2

Definition at line 287 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s2_many_kernel()

template<typename T >
__global__ void add2s2_many_kernel ( T *__restrict__  x,
const T **  p,
const T alpha,
const int  p_cur,
const int  n 
)

Device kernel for add2s2 many

Definition at line 264 of file math_kernel.h.

Here is the call graph for this function:

◆ add3_kernel()

template<typename T >
__global__ void add3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for add3

Definition at line 212 of file math_kernel.h.

Here is the call graph for this function:

◆ add3s2_kernel()

template<typename T >
__global__ void add3s2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const T  c1,
const T  c2,
const int  n 
)

Device kernel for add3s2

Definition at line 321 of file math_kernel.h.

Here is the call graph for this function:

◆ add4_kernel()

template<typename T >
__global__ void add4_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const T *__restrict__  d,
const int  n 
)

Device kernel for add4

Definition at line 229 of file math_kernel.h.

Here is the call graph for this function:

◆ addcol3_kernel()

template<typename T >
__global__ void addcol3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for addcol3

Definition at line 455 of file math_kernel.h.

Here is the call graph for this function:

◆ addcol4_kernel()

template<typename T >
__global__ void addcol4_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const T *__restrict__  d,
const int  n 
)

Device kernel for addcol4

Definition at line 473 of file math_kernel.h.

Here is the call graph for this function:

◆ addsqr2s2_kernel()

template<typename T >
__global__ void addsqr2s2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T  c1,
const int  n 
)

Device kernel for addsqr2s2

Definition at line 304 of file math_kernel.h.

Here is the call graph for this function:

◆ cadd2_kernel()

template<typename T >
__global__ void cadd2_kernel ( T *__restrict__  a,
T *__restrict__  b,
const T  c,
const int  n 
)

Device kernel for cadd2

Definition at line 163 of file math_kernel.h.

Here is the call graph for this function:

◆ cadd_kernel()

template<typename T >
__global__ void cadd_kernel ( T *__restrict__  a,
const T  c,
const int  n 
)

Device kernel for cadd

Definition at line 147 of file math_kernel.h.

Here is the call graph for this function:

◆ cfill_kernel()

template<typename T >
__global__ void cfill_kernel ( T *__restrict__  a,
const T  c,
const int  n 
)

Device kernel for cfill

Definition at line 180 of file math_kernel.h.

Here is the call graph for this function:

◆ cfill_mask_kernel()

template<typename T >
__global__ void cfill_mask_kernel ( T *__restrict__  a,
const T  c,
const int  size,
int *__restrict__  mask,
const int  mask_size 
)

Device kernel for cfill_mask

Definition at line 114 of file math_kernel.h.

Here is the call graph for this function:

◆ cmult2_kernel()

template<typename T >
__global__ void cmult2_kernel ( T *__restrict__  a,
T *__restrict__  b,
const T  c,
const int  n 
)

Device kernel for cmult2

Definition at line 130 of file math_kernel.h.

Here is the call graph for this function:

◆ cmult_kernel()

template<typename T >
__global__ void cmult_kernel ( T *__restrict__  a,
const T  c,
const int  n 
)

Device kernel for cmult

Definition at line 41 of file math_kernel.h.

Here is the call graph for this function:

◆ col2_kernel()

template<typename T >
__global__ void col2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for col2

Definition at line 372 of file math_kernel.h.

Here is the call graph for this function:

◆ col3_kernel()

template<typename T >
__global__ void col3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for col3

Definition at line 388 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc2_kernel()

template<typename T >
__global__ void glsc2_kernel ( const T a,
const T b,
T buf_h,
const int  n 
)

Device kernel for glsc2

Definition at line 698 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_kernel()

template<typename T >
__global__ void glsc3_kernel ( const T a,
const T b,
const T c,
T buf_h,
const int  n 
)

Device kernel for glsc3

Definition at line 622 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_many_kernel()

template<typename T >
__global__ void glsc3_many_kernel ( const T a,
const T **  b,
const T c,
T buf_h,
const int  j,
const int  n 
)

Device kernel for glsc3 many

Definition at line 657 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_reduce_kernel()

template<typename T >
__global__ void glsc3_reduce_kernel ( T bufred,
const int  n,
const int  j 
)

Reduction kernel for glsc3

Definition at line 587 of file math_kernel.h.

Here is the call graph for this function:

◆ glsum_kernel()

template<typename T >
__global__ void glsum_kernel ( const T a,
T buf_h,
const int  n 
)

Device kernel for glsum

Definition at line 733 of file math_kernel.h.

Here is the call graph for this function:

◆ invcol1_kernel()

template<typename T >
__global__ void invcol1_kernel ( T *__restrict__  a,
const int  n 
)

Device kernel for invcol1

Definition at line 340 of file math_kernel.h.

Here is the call graph for this function:

◆ invcol2_kernel()

template<typename T >
__global__ void invcol2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for invcol2

Definition at line 356 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_atomic_reduction_kernel()

template<typename T >
__global__ void masked_atomic_reduction_kernel ( T *__restrict__  a,
T *__restrict__  b,
int *__restrict__  mask,
const int  n,
const int  m 
)

Device kernel for masked atomic update

Definition at line 76 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_copy_kernel()

template<typename T >
__global__ void masked_copy_kernel ( T *__restrict__  a,
T *__restrict__  b,
int *__restrict__  mask,
const int  n,
const int  m 
)

Device kernel for masked copy

Definition at line 96 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_red_copy_kernel()

template<typename T >
__global__ void masked_red_copy_kernel ( T *__restrict__  a,
T *__restrict__  b,
int *__restrict__  mask,
const int  n,
const int  m 
)

Device kernel for masked red copy

Definition at line 58 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_sca2_kernel()

template<typename T >
__global__ void pwmax_sca2_kernel ( T *__restrict__  a,
const T  c,
const int  n 
)

Device kernel for point-wise max of vector and scalar a = max(a, c)

Definition at line 816 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_sca3_kernel()

template<typename T >
__global__ void pwmax_sca3_kernel ( T *__restrict__  a,
const T *__restrict  b,
const T  c,
const int  n 
)

Device kernel for point-wise max of vector and scalar a = max(b, c)

Definition at line 829 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_vec2_kernel()

template<typename T >
__global__ void pwmax_vec2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for point-wise max of two vectors a = max(a, b)

Definition at line 788 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_vec3_kernel()

template<typename T >
__global__ void pwmax_vec3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for point-wise max of two vectors a = max(b, c)

Definition at line 801 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_sca2_kernel()

template<typename T >
__global__ void pwmin_sca2_kernel ( T *__restrict__  a,
const T  c,
const int  n 
)

Device kernel for point-wise min of vector and scalar a = min(a, c)

Definition at line 872 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_sca3_kernel()

template<typename T >
__global__ void pwmin_sca3_kernel ( T *__restrict__  a,
const T *__restrict  b,
const T  c,
const int  n 
)

Device kernel for point-wise min of vector and scalar a = min(b, c)

Definition at line 885 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_vec2_kernel()

template<typename T >
__global__ void pwmin_vec2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for point-wise min of two vectors a = min(a, b)

Definition at line 844 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_vec3_kernel()

template<typename T >
__global__ void pwmin_vec3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for point-wise min of two vectors a = min(b, c)

Definition at line 857 of file math_kernel.h.

Here is the call graph for this function:

◆ reduce_kernel()

template<typename T >
__global__ void reduce_kernel ( T bufred,
const int  n 
)

Vector reduction kernel

Definition at line 554 of file math_kernel.h.

Here is the call graph for this function:

◆ reduce_warp()

template<typename T >
__inline__ __device__ T reduce_warp ( T  val)

Warp shuffle reduction

Definition at line 541 of file math_kernel.h.

Here is the call graph for this function:

◆ sub2_kernel()

template<typename T >
__global__ void sub2_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const int  n 
)

Device kernel for sub2

Definition at line 422 of file math_kernel.h.

Here is the call graph for this function:

◆ sub3_kernel()

template<typename T >
__global__ void sub3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for sub3

Definition at line 438 of file math_kernel.h.

Here is the call graph for this function:

◆ subcol3_kernel()

template<typename T >
__global__ void subcol3_kernel ( T *__restrict__  a,
const T *__restrict__  b,
const T *__restrict__  c,
const int  n 
)

Device kernel for subcol3

Definition at line 405 of file math_kernel.h.

Here is the call graph for this function:

◆ vcross_kernel()

template<typename T >
__global__ void vcross_kernel ( T *__restrict__  u1,
T *__restrict__  u2,
T *__restrict__  u3,
const T *__restrict__  v1,
const T *__restrict__  v2,
const T *__restrict__  v3,
const T *__restrict__  w1,
const T *__restrict__  w2,
const T *__restrict__  w3,
const int  n 
)

Device kernel for vcross

Definition at line 514 of file math_kernel.h.

Here is the call graph for this function:

◆ vdot3_kernel()

template<typename T >
__global__ void vdot3_kernel ( T *__restrict__  dot,
const T *__restrict__  u1,
const T *__restrict__  u2,
const T *__restrict__  u3,
const T *__restrict__  v1,
const T *__restrict__  v2,
const T *__restrict__  v3,
const int  n 
)

Device kernel for vdot3

Definition at line 492 of file math_kernel.h.

Here is the call graph for this function: