|
Neko 1.99.3
A portable framework for high-order spectral element flow simulations
|

Go to the source code of this file.
| __global__ void absval_kernel | ( | T *__restrict__ | a, |
| const int | n | ||
| ) |
Device kernel for abs_value
Definition at line 1228 of file math_kernel.h.

| __global__ void add2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for add2
Definition at line 382 of file math_kernel.h.

| __global__ void add2s1_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T | c1, | ||
| const int | n | ||
| ) |
Device kernel for add2s1
Definition at line 433 of file math_kernel.h.

| __global__ void add2s2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T | c1, | ||
| const int | n | ||
| ) |
Device kernel for add2s2
Definition at line 473 of file math_kernel.h.

| __global__ void add2s2_many_kernel | ( | T *__restrict__ | x, |
| const T ** | p, | ||
| const T * | alpha, | ||
| const int | p_cur, | ||
| const int | n | ||
| ) |
Device kernel for add2s2 many
Definition at line 450 of file math_kernel.h.

| __global__ void add3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for add3
Definition at line 398 of file math_kernel.h.

| __global__ void add3s2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T | c1, | ||
| const T | c2, | ||
| const int | n | ||
| ) |
Device kernel for add3s2
Definition at line 507 of file math_kernel.h.

| __global__ void add4_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T *__restrict__ | d, | ||
| const int | n | ||
| ) |
Device kernel for add4
Definition at line 415 of file math_kernel.h.

| __global__ void add4s3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T *__restrict__ | d, | ||
| const T | c1, | ||
| const T | c2, | ||
| const T | c3, | ||
| const int | n | ||
| ) |
Device kernel for add4s3
Definition at line 526 of file math_kernel.h.

| __global__ void add5s4_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T *__restrict__ | d, | ||
| const T *__restrict__ | e, | ||
| const T | c1, | ||
| const T | c2, | ||
| const T | c3, | ||
| const T | c4, | ||
| const int | n | ||
| ) |
Device kernel for add5s4
Definition at line 547 of file math_kernel.h.

| __global__ void addcol3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for addcol3
Definition at line 702 of file math_kernel.h.

| __global__ void addcol3s2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T | s, | ||
| const int | n | ||
| ) |
Device kernel for addcol3s2
Definition at line 739 of file math_kernel.h.

| __global__ void addcol4_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const T *__restrict__ | d, | ||
| const int | n | ||
| ) |
Device kernel for addcol4
Definition at line 720 of file math_kernel.h.

| __global__ void addsqr2s2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T | c1, | ||
| const int | n | ||
| ) |
Device kernel for addsqr2s2
Definition at line 490 of file math_kernel.h.

| __global__ void cadd2_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cadd2
Definition at line 331 of file math_kernel.h.

| __global__ void cadd_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cadd
Definition at line 315 of file math_kernel.h.

| __global__ void cdiv2_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cdiv2
Definition at line 298 of file math_kernel.h.

| __global__ void cdiv_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cdiv
Definition at line 282 of file math_kernel.h.

| __global__ void cfill_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cfill
Definition at line 366 of file math_kernel.h.

| __global__ void cfill_mask_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | size, | ||
| int *__restrict__ | mask, | ||
| const int | mask_size | ||
| ) |
Device kernel for cfill_mask
Definition at line 249 of file math_kernel.h.

| __global__ void cmult2_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cmult2
Definition at line 265 of file math_kernel.h.

| __global__ void cmult_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for cmult
Definition at line 41 of file math_kernel.h.

| __global__ void col2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for col2
Definition at line 619 of file math_kernel.h.

| __global__ void col3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for col3
Definition at line 635 of file math_kernel.h.

| __global__ void cwrap_kernel | ( | T *__restrict__ | a, |
| const T | min_val, | ||
| const T | max_val, | ||
| const int | n | ||
| ) |
Device kernel for cwrap
Definition at line 348 of file math_kernel.h.

| __device__ __forceinline__ int face_gather_idx | ( | const int | i, |
| const int | j, | ||
| const int | k, | ||
| const int | l, | ||
| const int | n1, | ||
| const int | n2, | ||
| const int | nf | ||
| ) |
Definition at line 106 of file math_kernel.h.


| __device__ __forceinline__ void face_gather_nonlinear_index | ( | int * | index, |
| const int | idx, | ||
| const int | lx, | ||
| const int | ly, | ||
| const int | lz | ||
| ) |
Definition at line 91 of file math_kernel.h.


| __global__ void face_masked_gather_copy_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int *__restrict__ | mask, | ||
| const int *__restrict__ | facet, | ||
| const int | n1, | ||
| const int | n2, | ||
| const int | lx, | ||
| const int | ly, | ||
| const int | lz, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked gather copy from a face-local field
Definition at line 115 of file math_kernel.h.

Device kernel for glmax
Definition at line 1156 of file math_kernel.h.

Device kernel for glmin
Definition at line 1192 of file math_kernel.h.

Device kernel for glsc2
Definition at line 1051 of file math_kernel.h.

Device kernel for glsc3
Definition at line 975 of file math_kernel.h.

| __global__ void glsc3_many_kernel | ( | const T * | a, |
| const T ** | b, | ||
| const T * | c, | ||
| T * | buf_h, | ||
| const int | j, | ||
| const int | n | ||
| ) |
Device kernel for glsc3 many
Definition at line 1010 of file math_kernel.h.

Reduction kernel for glsc3
Definition at line 940 of file math_kernel.h.

Device kernel for glsubnorm2
Definition at line 1086 of file math_kernel.h.

Device kernel for glsum
Definition at line 1121 of file math_kernel.h.

| __global__ void invcol1_kernel | ( | T *__restrict__ | a, |
| const int | n | ||
| ) |
Device kernel for invcol1
Definition at line 570 of file math_kernel.h.

| __global__ void invcol2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for invcol2
Definition at line 586 of file math_kernel.h.

| __global__ void invcol3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for invcol3
Definition at line 602 of file math_kernel.h.

| __global__ void masked_atomic_reduction_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | m | ||
| ) |
Device kernel for masked atomic update
Definition at line 193 of file math_kernel.h.

| __global__ void masked_copy_kernel_0 | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked copy with BC style mask
Definition at line 213 of file math_kernel.h.

| __global__ void masked_copy_kernel_aligned | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked copy with point zone style mask
Definition at line 231 of file math_kernel.h.

| __global__ void masked_gather_copy_aligned_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked gather copy with aligned mask
Definition at line 76 of file math_kernel.h.

| __global__ void masked_gather_copy_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked gather copy
Definition at line 57 of file math_kernel.h.

| __global__ void masked_scatter_copy_aligned_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked scatter copy with aligned mask
Definition at line 174 of file math_kernel.h.

| __global__ void masked_scatter_copy_kernel | ( | T *__restrict__ | a, |
| T *__restrict__ | b, | ||
| int *__restrict__ | mask, | ||
| const int | n, | ||
| const int | n_mask | ||
| ) |
Device kernel for masked scatter copy
Definition at line 156 of file math_kernel.h.

| __global__ void pwmax_sca2_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise max of vector and scalar a = max(a, c)
Definition at line 1276 of file math_kernel.h.

| __global__ void pwmax_sca3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict | b, | ||
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise max of vector and scalar a = max(b, c)
Definition at line 1289 of file math_kernel.h.

| __global__ void pwmax_vec2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for point-wise max of two vectors a = max(a, b)
Definition at line 1248 of file math_kernel.h.

| __global__ void pwmax_vec3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise max of two vectors a = max(b, c)
Definition at line 1261 of file math_kernel.h.

| __global__ void pwmin_sca2_kernel | ( | T *__restrict__ | a, |
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise min of vector and scalar a = min(a, c)
Definition at line 1332 of file math_kernel.h.

| __global__ void pwmin_sca3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict | b, | ||
| const T | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise min of vector and scalar a = min(b, c)
Definition at line 1345 of file math_kernel.h.

| __global__ void pwmin_vec2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for point-wise min of two vectors a = min(a, b)
Definition at line 1304 of file math_kernel.h.

| __global__ void pwmin_vec3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for point-wise min of two vectors a = min(b, c)
Definition at line 1317 of file math_kernel.h.

Vector reduction kernel
Definition at line 846 of file math_kernel.h.

Vector reduction maximisation kernel
Definition at line 877 of file math_kernel.h.

| __inline__ __device__ T reduce_max_warp | ( | T | val | ) |
Warp shuffle reduction of maximisation
Definition at line 820 of file math_kernel.h.

Vector reduction minimisation kernel
Definition at line 908 of file math_kernel.h.

| __inline__ __device__ T reduce_min_warp | ( | T | val | ) |
Warp shuffle reduction of minimisation
Definition at line 833 of file math_kernel.h.

| __inline__ __device__ T reduce_warp | ( | T | val | ) |
Warp shuffle reduction
Definition at line 807 of file math_kernel.h.

| __global__ void sub2_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const int | n | ||
| ) |
Device kernel for sub2
Definition at line 669 of file math_kernel.h.

| __global__ void sub3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for sub3
Definition at line 685 of file math_kernel.h.

| __global__ void subcol3_kernel | ( | T *__restrict__ | a, |
| const T *__restrict__ | b, | ||
| const T *__restrict__ | c, | ||
| const int | n | ||
| ) |
Device kernel for subcol3
Definition at line 652 of file math_kernel.h.

| __global__ void vcross_kernel | ( | T *__restrict__ | u1, |
| T *__restrict__ | u2, | ||
| T *__restrict__ | u3, | ||
| const T *__restrict__ | v1, | ||
| const T *__restrict__ | v2, | ||
| const T *__restrict__ | v3, | ||
| const T *__restrict__ | w1, | ||
| const T *__restrict__ | w2, | ||
| const T *__restrict__ | w3, | ||
| const int | n | ||
| ) |
Device kernel for vcross
Definition at line 780 of file math_kernel.h.

| __global__ void vdot3_kernel | ( | T *__restrict__ | dot, |
| const T *__restrict__ | u1, | ||
| const T *__restrict__ | u2, | ||
| const T *__restrict__ | u3, | ||
| const T *__restrict__ | v1, | ||
| const T *__restrict__ | v2, | ||
| const T *__restrict__ | v3, | ||
| const int | n | ||
| ) |
Device kernel for vdot3
Definition at line 758 of file math_kernel.h.
