This graph shows which files directly or indirectly include this file:

Functions
template<typename T >
__global__ void	cmult_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	masked_gather_copy_kernel (T __restrict__ a, T __restrict__ b, int *__restrict__ mask, const int n, const int n_mask)

template<typename T >
__global__ void	masked_scatter_copy_kernel (T __restrict__ a, T __restrict__ b, int *__restrict__ mask, const int n, const int n_mask)

template<typename T >
__global__ void	masked_atomic_reduction_kernel (T __restrict__ a, T __restrict__ b, int *__restrict__ mask, const int n, const int m)

template<typename T >
__global__ void	masked_copy_kernel (T __restrict__ a, T __restrict__ b, int *__restrict__ mask, const int n, const int n_mask)

template<typename T >
__global__ void	cfill_mask_kernel (T __restrict__ a, const T c, const int size, int __restrict__ mask, const int mask_size)

template<typename T >
__global__ void	cmult2_kernel (T __restrict__ a, T __restrict__ b, const T c, const int n)

template<typename T >
__global__ void	cdiv_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	cdiv2_kernel (T __restrict__ a, T __restrict__ b, const T c, const int n)

template<typename T >
__global__ void	cadd_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	cadd2_kernel (T __restrict__ a, T __restrict__ b, const T c, const int n)

template<typename T >
__global__ void	cfill_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	add2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	add3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	add4_kernel (T __restrict__ a, const T __restrict__ b, const T __restrict__ c, const T __restrict__ d, const int n)

template<typename T >
__global__ void	add2s1_kernel (T __restrict__ a, const T __restrict__ b, const T c1, const int n)

template<typename T >
__global__ void	add2s2_many_kernel (T __restrict__ x, const T p, const T alpha, const int p_cur, const int n)

template<typename T >
__global__ void	add2s2_kernel (T __restrict__ a, const T __restrict__ b, const T c1, const int n)

template<typename T >
__global__ void	addsqr2s2_kernel (T __restrict__ a, const T __restrict__ b, const T c1, const int n)

template<typename T >
__global__ void	add3s2_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const T c1, const T c2, const int n)

template<typename T >
__global__ void	add4s3_kernel (T __restrict__ a, const T __restrict__ b, const T __restrict__ c, const T __restrict__ d, const T c1, const T c2, const T c3, const int n)

template<typename T >
__global__ void	add5s4_kernel (T __restrict__ a, const T __restrict__ b, const T __restrict__ c, const T __restrict__ d, const T *__restrict__ e, const T c1, const T c2, const T c3, const T c4, const int n)

template<typename T >
__global__ void	invcol1_kernel (T *__restrict__ a, const int n)

template<typename T >
__global__ void	invcol2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	invcol3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	col2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	col3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	subcol3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	sub2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	sub3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	addcol3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	addcol4_kernel (T __restrict__ a, const T __restrict__ b, const T __restrict__ c, const T __restrict__ d, const int n)

template<typename T >
__global__ void	addcol3s2_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const T s, const int n)

template<typename T >
__global__ void	vdot3_kernel (T __restrict__ dot, const T __restrict__ u1, const T __restrict__ u2, const T __restrict__ u3, const T __restrict__ v1, const T __restrict__ v2, const T *__restrict__ v3, const int n)

template<typename T >
__global__ void	vcross_kernel (T __restrict__ u1, T __restrict__ u2, T __restrict__ u3, const T __restrict__ v1, const T __restrict__ v2, const T __restrict__ v3, const T __restrict__ w1, const T __restrict__ w2, const T *__restrict__ w3, const int n)

template<typename T >
__inline__ __device__ T	reduce_warp (T val)

template<typename T >
__global__ void	reduce_kernel (T *bufred, const int n)

template<typename T >
__global__ void	glsc3_reduce_kernel (T *bufred, const int n, const int j)

template<typename T >
__global__ void	glsc3_kernel (const T a, const T b, const T c, T buf_h, const int n)

template<typename T >
__global__ void	glsc3_many_kernel (const T a, const T b, const T c, T *buf_h, const int j, const int n)

template<typename T >
__global__ void	glsc2_kernel (const T a, const T b, T *buf_h, const int n)

template<typename T >
__global__ void	glsubnorm2_kernel (const T a, const T b, T *buf_h, const int n)

template<typename T >
__global__ void	glsum_kernel (const T a, T buf_h, const int n)

template<typename T >
__global__ void	absval_kernel (T *__restrict__ a, const int n)

template<typename T >
__global__ void	pwmax_vec2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	pwmax_vec3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	pwmax_sca2_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	pwmax_sca3_kernel (T __restrict__ a, const T __restrict b, const T c, const int n)

template<typename T >
__global__ void	pwmin_vec2_kernel (T __restrict__ a, const T __restrict__ b, const int n)

template<typename T >
__global__ void	pwmin_vec3_kernel (T __restrict__ a, const T __restrict__ b, const T *__restrict__ c, const int n)

template<typename T >
__global__ void	pwmin_sca2_kernel (T *__restrict__ a, const T c, const int n)

template<typename T >
__global__ void	pwmin_sca3_kernel (T __restrict__ a, const T __restrict b, const T c, const int n)

Function Documentation

◆ absval_kernel()

template<typename T >

__global__ void absval_kernel	(	T *__restrict__	a,
		const int	n
	)

Device kernel for abs_value

Definition at line 935 of file math_kernel.h.

Here is the call graph for this function:

◆ add2_kernel()

template<typename T >

__global__ void add2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for add2

Definition at line 248 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s1_kernel()

template<typename T >

__global__ void add2s1_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T	c1,
		const int	n
	)

Device kernel for add2s1

Definition at line 299 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s2_kernel()

template<typename T >

__global__ void add2s2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T	c1,
		const int	n
	)

Device kernel for add2s2

Definition at line 339 of file math_kernel.h.

Here is the call graph for this function:

◆ add2s2_many_kernel()

template<typename T >

__global__ void add2s2_many_kernel	(	T *__restrict__	x,
		const T **	p,
		const T *	alpha,
		const int	p_cur,
		const int	n
	)

Device kernel for add2s2 many

Definition at line 316 of file math_kernel.h.

Here is the call graph for this function:

◆ add3_kernel()

template<typename T >

__global__ void add3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for add3

Definition at line 264 of file math_kernel.h.

Here is the call graph for this function:

◆ add3s2_kernel()

template<typename T >

__global__ void add3s2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T	c1,
		const T	c2,
		const int	n
	)

Device kernel for add3s2

Definition at line 373 of file math_kernel.h.

Here is the call graph for this function:

◆ add4_kernel()

template<typename T >

__global__ void add4_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T *__restrict__	d,
		const int	n
	)

Device kernel for add4

Definition at line 281 of file math_kernel.h.

Here is the call graph for this function:

◆ add4s3_kernel()

template<typename T >

__global__ void add4s3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T *__restrict__	d,
		const T	c1,
		const T	c2,
		const T	c3,
		const int	n
	)

Device kernel for add4s3

Definition at line 392 of file math_kernel.h.

Here is the call graph for this function:

◆ add5s4_kernel()

template<typename T >

__global__ void add5s4_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T *__restrict__	d,
		const T *__restrict__	e,
		const T	c1,
		const T	c2,
		const T	c3,
		const T	c4,
		const int	n
	)

Device kernel for add5s4

Definition at line 413 of file math_kernel.h.

Here is the call graph for this function:

◆ addcol3_kernel()

template<typename T >

__global__ void addcol3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for addcol3

Definition at line 568 of file math_kernel.h.

Here is the call graph for this function:

◆ addcol3s2_kernel()

template<typename T >

__global__ void addcol3s2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T	s,
		const int	n
	)

Device kernel for addcol3s2

Definition at line 605 of file math_kernel.h.

Here is the call graph for this function:

◆ addcol4_kernel()

template<typename T >

__global__ void addcol4_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const T *__restrict__	d,
		const int	n
	)

Device kernel for addcol4

Definition at line 586 of file math_kernel.h.

Here is the call graph for this function:

◆ addsqr2s2_kernel()

template<typename T >

__global__ void addsqr2s2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T	c1,
		const int	n
	)

Device kernel for addsqr2s2

Definition at line 356 of file math_kernel.h.

Here is the call graph for this function:

◆ cadd2_kernel()

template<typename T >

__global__ void cadd2_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		const T	c,
		const int	n
	)

Device kernel for cadd2

Definition at line 215 of file math_kernel.h.

Here is the call graph for this function:

◆ cadd_kernel()

template<typename T >

__global__ void cadd_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for cadd

Definition at line 199 of file math_kernel.h.

Here is the call graph for this function:

◆ cdiv2_kernel()

template<typename T >

__global__ void cdiv2_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		const T	c,
		const int	n
	)

Device kernel for cdiv2

Definition at line 182 of file math_kernel.h.

Here is the call graph for this function:

◆ cdiv_kernel()

template<typename T >

__global__ void cdiv_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for cdiv

Definition at line 166 of file math_kernel.h.

Here is the call graph for this function:

◆ cfill_kernel()

template<typename T >

__global__ void cfill_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for cfill

Definition at line 232 of file math_kernel.h.

Here is the call graph for this function:

◆ cfill_mask_kernel()

template<typename T >

__global__ void cfill_mask_kernel	(	T *__restrict__	a,
		const T	c,
		const int	size,
		int *__restrict__	mask,
		const int	mask_size
	)

Device kernel for cfill_mask

Definition at line 133 of file math_kernel.h.

Here is the call graph for this function:

◆ cmult2_kernel()

template<typename T >

__global__ void cmult2_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		const T	c,
		const int	n
	)

Device kernel for cmult2

Definition at line 149 of file math_kernel.h.

Here is the call graph for this function:

◆ cmult_kernel()

template<typename T >

__global__ void cmult_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for cmult

Definition at line 41 of file math_kernel.h.

Here is the call graph for this function:

◆ col2_kernel()

template<typename T >

__global__ void col2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for col2

Definition at line 485 of file math_kernel.h.

Here is the call graph for this function:

◆ col3_kernel()

template<typename T >

__global__ void col3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for col3

Definition at line 501 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc2_kernel()

template<typename T >

__global__ void glsc2_kernel	(	const T *	a,
		const T *	b,
		T *	buf_h,
		const int	n
	)

Device kernel for glsc2

Definition at line 830 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_kernel()

template<typename T >

__global__ void glsc3_kernel	(	const T *	a,
		const T *	b,
		const T *	c,
		T *	buf_h,
		const int	n
	)

Device kernel for glsc3

Definition at line 754 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_many_kernel()

template<typename T >

__global__ void glsc3_many_kernel	(	const T *	a,
		const T **	b,
		const T *	c,
		T *	buf_h,
		const int	j,
		const int	n
	)

Device kernel for glsc3 many

Definition at line 789 of file math_kernel.h.

Here is the call graph for this function:

◆ glsc3_reduce_kernel()

template<typename T >

__global__ void glsc3_reduce_kernel	(	T *	bufred,
		const int	n,
		const int	j
	)

Reduction kernel for glsc3

Definition at line 719 of file math_kernel.h.

Here is the call graph for this function:

◆ glsubnorm2_kernel()

template<typename T >

__global__ void glsubnorm2_kernel	(	const T *	a,
		const T *	b,
		T *	buf_h,
		const int	n
	)

Device kernel for glsubnorm2

Definition at line 865 of file math_kernel.h.

Here is the call graph for this function:

◆ glsum_kernel()

template<typename T >

__global__ void glsum_kernel	(	const T *	a,
		T *	buf_h,
		const int	n
	)

Device kernel for glsum

Definition at line 900 of file math_kernel.h.

Here is the call graph for this function:

◆ invcol1_kernel()

template<typename T >

__global__ void invcol1_kernel	(	T *__restrict__	a,
		const int	n
	)

Device kernel for invcol1

Definition at line 436 of file math_kernel.h.

Here is the call graph for this function:

◆ invcol2_kernel()

template<typename T >

__global__ void invcol2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for invcol2

Definition at line 452 of file math_kernel.h.

Here is the call graph for this function:

◆ invcol3_kernel()

template<typename T >

__global__ void invcol3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for invcol3

Definition at line 468 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_atomic_reduction_kernel()

template<typename T >

__global__ void masked_atomic_reduction_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		int *__restrict__	mask,
		const int	n,
		const int	m
	)

Device kernel for masked atomic update

Definition at line 95 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_copy_kernel()

template<typename T >

__global__ void masked_copy_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		int *__restrict__	mask,
		const int	n,
		const int	n_mask
	)

Device kernel for masked copy

Definition at line 115 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_gather_copy_kernel()

template<typename T >

__global__ void masked_gather_copy_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		int *__restrict__	mask,
		const int	n,
		const int	n_mask
	)

Device kernel for masked gather copy

Definition at line 57 of file math_kernel.h.

Here is the call graph for this function:

◆ masked_scatter_copy_kernel()

template<typename T >

__global__ void masked_scatter_copy_kernel	(	T *__restrict__	a,
		T *__restrict__	b,
		int *__restrict__	mask,
		const int	n,
		const int	n_mask
	)

Device kernel for masked scatter copy

Definition at line 76 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_sca2_kernel()

template<typename T >

__global__ void pwmax_sca2_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for point-wise max of vector and scalar a = max(a, c)

Definition at line 983 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_sca3_kernel()

template<typename T >

__global__ void pwmax_sca3_kernel	(	T *__restrict__	a,
		const T *__restrict	b,
		const T	c,
		const int	n
	)

Device kernel for point-wise max of vector and scalar a = max(b, c)

Definition at line 996 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_vec2_kernel()

template<typename T >

__global__ void pwmax_vec2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for point-wise max of two vectors a = max(a, b)

Definition at line 955 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmax_vec3_kernel()

template<typename T >

__global__ void pwmax_vec3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for point-wise max of two vectors a = max(b, c)

Definition at line 968 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_sca2_kernel()

template<typename T >

__global__ void pwmin_sca2_kernel	(	T *__restrict__	a,
		const T	c,
		const int	n
	)

Device kernel for point-wise min of vector and scalar a = min(a, c)

Definition at line 1039 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_sca3_kernel()

template<typename T >

__global__ void pwmin_sca3_kernel	(	T *__restrict__	a,
		const T *__restrict	b,
		const T	c,
		const int	n
	)

Device kernel for point-wise min of vector and scalar a = min(b, c)

Definition at line 1052 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_vec2_kernel()

template<typename T >

__global__ void pwmin_vec2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for point-wise min of two vectors a = min(a, b)

Definition at line 1011 of file math_kernel.h.

Here is the call graph for this function:

◆ pwmin_vec3_kernel()

template<typename T >

__global__ void pwmin_vec3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for point-wise min of two vectors a = min(b, c)

Definition at line 1024 of file math_kernel.h.

Here is the call graph for this function:

◆ reduce_kernel()

template<typename T >

__global__ void reduce_kernel	(	T *	bufred,
		const int	n
	)

Vector reduction kernel

Definition at line 686 of file math_kernel.h.

Here is the call graph for this function:

◆ reduce_warp()

template<typename T >

__inline__ __device__ T reduce_warp ( T val )

Warp shuffle reduction

Definition at line 673 of file math_kernel.h.

Here is the call graph for this function:

◆ sub2_kernel()

template<typename T >

__global__ void sub2_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const int	n
	)

Device kernel for sub2

Definition at line 535 of file math_kernel.h.

Here is the call graph for this function:

◆ sub3_kernel()

template<typename T >

__global__ void sub3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for sub3

Definition at line 551 of file math_kernel.h.

Here is the call graph for this function:

◆ subcol3_kernel()

template<typename T >

__global__ void subcol3_kernel	(	T *__restrict__	a,
		const T *__restrict__	b,
		const T *__restrict__	c,
		const int	n
	)

Device kernel for subcol3

Definition at line 518 of file math_kernel.h.

Here is the call graph for this function:

◆ vcross_kernel()

template<typename T >

__global__ void vcross_kernel	(	T *__restrict__	u1,
		T *__restrict__	u2,
		T *__restrict__	u3,
		const T *__restrict__	v1,
		const T *__restrict__	v2,
		const T *__restrict__	v3,
		const T *__restrict__	w1,
		const T *__restrict__	w2,
		const T *__restrict__	w3,
		const int	n
	)

Device kernel for vcross

Definition at line 646 of file math_kernel.h.

Here is the call graph for this function:

◆ vdot3_kernel()

template<typename T >

__global__ void vdot3_kernel	(	T *__restrict__	dot,
		const T *__restrict__	u1,
		const T *__restrict__	u2,
		const T *__restrict__	u3,
		const T *__restrict__	v1,
		const T *__restrict__	v2,
		const T *__restrict__	v3,
		const int	n
	)

Device kernel for vdot3

Definition at line 624 of file math_kernel.h.

Here is the call graph for this function:

Functions

Function Documentation

◆ absval_kernel()

◆ add2_kernel()

◆ add2s1_kernel()

◆ add2s2_kernel()

◆ add2s2_many_kernel()

◆ add3_kernel()

◆ add3s2_kernel()

◆ add4_kernel()

◆ add4s3_kernel()

◆ add5s4_kernel()

◆ addcol3_kernel()

◆ addcol3s2_kernel()

◆ addcol4_kernel()

◆ addsqr2s2_kernel()

◆ cadd2_kernel()

◆ cadd_kernel()

◆ cdiv2_kernel()

◆ cdiv_kernel()

◆ cfill_kernel()

◆ cfill_mask_kernel()

◆ cmult2_kernel()

◆ cmult_kernel()

◆ col2_kernel()

◆ col3_kernel()

◆ glsc2_kernel()

◆ glsc3_kernel()

◆ glsc3_many_kernel()

◆ glsc3_reduce_kernel()

◆ glsubnorm2_kernel()

◆ glsum_kernel()

◆ invcol1_kernel()

◆ invcol2_kernel()

◆ invcol3_kernel()

◆ masked_atomic_reduction_kernel()

◆ masked_copy_kernel()

◆ masked_gather_copy_kernel()

◆ masked_scatter_copy_kernel()

◆ pwmax_sca2_kernel()

◆ pwmax_sca3_kernel()

◆ pwmax_vec2_kernel()

◆ pwmax_vec3_kernel()

◆ pwmin_sca2_kernel()

◆ pwmin_sca3_kernel()

◆ pwmin_vec2_kernel()

◆ pwmin_vec3_kernel()

◆ reduce_kernel()

◆ reduce_warp()

◆ sub2_kernel()

◆ sub3_kernel()

◆ subcol3_kernel()

◆ vcross_kernel()

◆ vdot3_kernel()