Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.cu File Reference
#include "math_kernel.h"
#include <device/device_config.h>
#include <device/cuda/check.h>
#include <stdio.h>
#include <stdlib.h>
#include <math/bcknd/device/device_mpi_reduce.h>
#include <math/bcknd/device/device_mpi_op.h>

Go to the source code of this file.

Functions

void cuda_copy (void *a, void *b, int *n, cudaStream_t strm)
 
void cuda_masked_copy (void *a, void *b, void *mask, int *n, int *m, cudaStream_t strm)
 
void cuda_masked_gather_copy (void *a, void *b, void *mask, int *n, int *m, cudaStream_t strm)
 
void cuda_masked_atomic_reduction (void *a, void *b, void *mask, int *n, int *m, cudaStream_t strm)
 
void cuda_masked_scatter_copy (void *a, void *b, void *mask, int *n, int *m, cudaStream_t strm)
 
void cuda_cfill_mask (void *a, real *c, int *size, int *mask, int *mask_size, cudaStream_t strm)
 
void cuda_rzero (void *a, int *n, cudaStream_t strm)
 
void cuda_cmult (void *a, real *c, int *n, cudaStream_t strm)
 
void cuda_cmult2 (void *a, void *b, real *c, int *n, cudaStream_t strm)
 
void cuda_cdiv (void *a, real *c, int *n, cudaStream_t strm)
 
void cuda_cdiv2 (void *a, void *b, real *c, int *n, cudaStream_t strm)
 
void cuda_radd (void *a, real *c, int *n, cudaStream_t strm)
 
void cuda_cadd2 (void *a, void *b, real *c, int *n, cudaStream_t strm)
 
void cuda_cfill (void *a, real *c, int *n, cudaStream_t strm)
 
void cuda_add2 (void *a, void *b, int *n, cudaStream_t strm)
 
void cuda_add3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_add4 (void *a, void *b, void *c, void *d, int *n, cudaStream_t strm)
 
void cuda_add2s1 (void *a, void *b, real *c1, int *n, cudaStream_t strm)
 
void cuda_add2s2 (void *a, void *b, real *c1, int *n, cudaStream_t strm)
 
void cuda_add2s2_many (void *x, void **p, void *alpha, int *j, int *n, cudaStream_t strm)
 
void cuda_addsqr2s2 (void *a, void *b, real *c1, int *n, cudaStream_t strm)
 
void cuda_add3s2 (void *a, void *b, void *c, real *c1, real *c2, int *n, cudaStream_t strm)
 
void cuda_invcol1 (void *a, int *n, cudaStream_t strm)
 
void cuda_invcol2 (void *a, void *b, int *n, cudaStream_t strm)
 
void cuda_invcol3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_col2 (void *a, void *b, int *n, cudaStream_t strm)
 
void cuda_col3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_subcol3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_sub2 (void *a, void *b, int *n, cudaStream_t strm)
 
void cuda_sub3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_addcol3 (void *a, void *b, void *c, int *n, cudaStream_t strm)
 
void cuda_addcol4 (void *a, void *b, void *c, void *d, int *n, cudaStream_t strm)
 
void cuda_vdot3 (void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, cudaStream_t strm)
 
void cuda_vcross (void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, cudaStream_t strm)
 
void cuda_redbuf_check_alloc (int nb)
 
void cuda_global_reduce_add (real *bufred, void *bufred_d, int n, const cudaStream_t stream)
 
real cuda_vlsc3 (void *u, void *v, void *w, int *n, cudaStream_t stream)
 
real cuda_glsc3 (void *a, void *b, void *c, int *n, cudaStream_t stream)
 
void cuda_glsc3_many (real *h, void *w, void *v, void *mult, int *j, int *n, cudaStream_t stream)
 
real cuda_glsc2 (void *a, void *b, int *n, cudaStream_t stream)
 
real cuda_glsubnorm2 (void *a, void *b, int *n, cudaStream_t stream)
 
real cuda_glsum (void *a, int *n, cudaStream_t stream)
 
void cuda_absval (void *a, int *n, cudaStream_t stream)
 
void cuda_pwmax_vec2 (void *a, void *b, int *n, cudaStream_t stream)
 
void cuda_pwmax_vec3 (void *a, void *b, void *c, int *n, cudaStream_t stream)
 
void cuda_pwmax_sca2 (void *a, real *c, int *n, cudaStream_t stream)
 
void cuda_pwmax_sca3 (void *a, void *b, real *c, int *n, cudaStream_t stream)
 
void cuda_pwmin_vec2 (void *a, void *b, int *n, cudaStream_t stream)
 
void cuda_pwmin_vec3 (void *a, void *b, void *c, int *n, cudaStream_t stream)
 
void cuda_pwmin_sca2 (void *a, real *c, int *n, cudaStream_t stream)
 
void cuda_pwmin_sca3 (void *a, void *b, real *c, int *n, cudaStream_t stream)
 
void cuda_iadd (void *a, int *c, int *n, cudaStream_t stream)
 

Variables

int red_s = 0
 
realbufred = NULL
 
voidbufred_d = NULL
 

Function Documentation

◆ cuda_absval()

void cuda_absval ( void a,
int n,
cudaStream_t  stream 
)

Fortran wrapper absval Take the abs value of a vector of length n

Definition at line 790 of file math.cu.

Here is the call graph for this function:

◆ cuda_add2()

void cuda_add2 ( void a,
void b,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add2 Vector addition \( a = a + b \)

Definition at line 245 of file math.cu.

Here is the call graph for this function:

◆ cuda_add2s1()

void cuda_add2s1 ( void a,
void b,
real c1,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add2s1 Vector addition with scalar multiplication \( a = c_1 a + b \) (multiplication on first argument)

Definition at line 290 of file math.cu.

Here is the call graph for this function:

◆ cuda_add2s2()

void cuda_add2s2 ( void a,
void b,
real c1,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add2s2 Vector addition with scalar multiplication \( a = a + c_1 b \) (multiplication on second argument)

Definition at line 306 of file math.cu.

Here is the call graph for this function:

◆ cuda_add2s2_many()

void cuda_add2s2_many ( void x,
void **  p,
void alpha,
int j,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add2s2 Vector addition with scalar multiplication \( x = x + c_1 p1 + c_2p2 + ... + c_jpj \) (multiplication on second argument)

Definition at line 323 of file math.cu.

Here is the call graph for this function:

◆ cuda_add3()

void cuda_add3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add3 Vector addition \( a = b + c \)

Definition at line 260 of file math.cu.

Here is the call graph for this function:

◆ cuda_add3s2()

void cuda_add3s2 ( void a,
void b,
void c,
real c1,
real c2,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add3s2 Vector addition with scalar multiplication \( a = c_1 b + c_2 c \) (multiplication on second argument)

Definition at line 356 of file math.cu.

Here is the call graph for this function:

◆ cuda_add4()

void cuda_add4 ( void a,
void b,
void c,
void d,
int n,
cudaStream_t  strm 
)

Fortran wrapper for add4 Vector addition \( a = b + c + d \)

Definition at line 274 of file math.cu.

Here is the call graph for this function:

◆ cuda_addcol3()

void cuda_addcol3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for addcol3 \( a = a + b * c \)

Definition at line 483 of file math.cu.

Here is the call graph for this function:

◆ cuda_addcol4()

void cuda_addcol4 ( void a,
void b,
void c,
void d,
int n,
cudaStream_t  strm 
)

Fortran wrapper for addcol4 \( a = a + b * c * d\)

Definition at line 497 of file math.cu.

Here is the call graph for this function:

◆ cuda_addsqr2s2()

void cuda_addsqr2s2 ( void a,
void b,
real c1,
int n,
cudaStream_t  strm 
)

Fortran wrapper for addsqr2s2 Vector addition with scalar multiplication \( a = a + c_1 (b * b) \) (multiplication on second argument)

Definition at line 340 of file math.cu.

Here is the call graph for this function:

◆ cuda_cadd2()

void cuda_cadd2 ( void a,
void b,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cadd2 Add a scalar to vector \( a_i = b_i + c \)

Definition at line 215 of file math.cu.

Here is the call graph for this function:

◆ cuda_cdiv()

void cuda_cdiv ( void a,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cdiv Division of constant c by array \( a = c / a \)

Definition at line 174 of file math.cu.

Here is the call graph for this function:

◆ cuda_cdiv2()

void cuda_cdiv2 ( void a,
void b,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cdiv2 Division of constant c by array \( a = c / b \)

Definition at line 187 of file math.cu.

Here is the call graph for this function:

◆ cuda_cfill()

void cuda_cfill ( void a,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cfill Set all elements to a constant c \( a = c \)

Definition at line 229 of file math.cu.

Here is the call graph for this function:

◆ cuda_cfill_mask()

void cuda_cfill_mask ( void a,
real c,
int size,
int mask,
int mask_size,
cudaStream_t  strm 
)

Fortran wrapper for cfill_mask Fill a scalar to vector \( a_i = s, for i \in mask \)

Definition at line 126 of file math.cu.

Here is the call graph for this function:

◆ cuda_cmult()

void cuda_cmult ( void a,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cmult Multiplication by constant c \( a = c \cdot a \)

Definition at line 147 of file math.cu.

Here is the call graph for this function:

◆ cuda_cmult2()

void cuda_cmult2 ( void a,
void b,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cmult2 Multiplication by constant c \( a = c \cdot b \)

Definition at line 160 of file math.cu.

Here is the call graph for this function:

◆ cuda_col2()

void cuda_col2 ( void a,
void b,
int n,
cudaStream_t  strm 
)

Fortran wrapper for col2 Vector multiplication with 2 vectors \( a = a \cdot b \)

Definition at line 413 of file math.cu.

Here is the call graph for this function:

◆ cuda_col3()

void cuda_col3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for col3 Vector multiplication with 3 vectors \( a = b \cdot c \)

Definition at line 426 of file math.cu.

Here is the call graph for this function:

◆ cuda_copy()

void cuda_copy ( void a,
void b,
int n,
cudaStream_t  strm 
)

Fortran wrapper for copy Copy a vector \( a = b \)

Definition at line 59 of file math.cu.

Here is the call graph for this function:

◆ cuda_global_reduce_add()

void cuda_global_reduce_add ( real bufred,
void bufred_d,
int  n,
const cudaStream_t  stream 
)

Global additive reduction

Definition at line 578 of file math.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_glsc2()

real cuda_glsc2 ( void a,
void b,
int n,
cudaStream_t  stream 
)

Fortran wrapper glsc2 Weighted inner product \( a^T b c \)

Definition at line 705 of file math.cu.

Here is the call graph for this function:

◆ cuda_glsc3()

real cuda_glsc3 ( void a,
void b,
void c,
int n,
cudaStream_t  stream 
)

Fortran wrapper glsc3 Weighted inner product \( a^T b c \)

Definition at line 646 of file math.cu.

Here is the call graph for this function:

◆ cuda_glsc3_many()

void cuda_glsc3_many ( real h,
void w,
void v,
void mult,
int j,
int n,
cudaStream_t  stream 
)

Fortran wrapper for doing an reduction to an array Weighted inner product \( w^T v(n,1:j) c \)

Definition at line 673 of file math.cu.

Here is the call graph for this function:

◆ cuda_glsubnorm2()

real cuda_glsubnorm2 ( void a,
void b,
int n,
cudaStream_t  stream 
)

Fortran wrapper glsubnorm Squared Norm of difference \( \| a - b \|_2^2 \)

Definition at line 734 of file math.cu.

Here is the call graph for this function:

◆ cuda_glsum()

real cuda_glsum ( void a,
int n,
cudaStream_t  stream 
)

Fortran wrapper glsum Sum a vector of length n

Definition at line 763 of file math.cu.

Here is the call graph for this function:

◆ cuda_iadd()

void cuda_iadd ( void a,
int c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for iadd Add a scalar to vector \( a_i = a_i + c \)

Definition at line 920 of file math.cu.

Here is the call graph for this function:

◆ cuda_invcol1()

void cuda_invcol1 ( void a,
int n,
cudaStream_t  strm 
)

Fortran wrapper for invcol1 Invert a vector \( a = 1 / a \)

Definition at line 372 of file math.cu.

Here is the call graph for this function:

◆ cuda_invcol2()

void cuda_invcol2 ( void a,
void b,
int n,
cudaStream_t  strm 
)

Fortran wrapper for invcol2 Vector division \( a = a / b \)

Definition at line 385 of file math.cu.

Here is the call graph for this function:

◆ cuda_invcol3()

void cuda_invcol3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for invcol3 Vector division \( a = b / c \)

Definition at line 399 of file math.cu.

Here is the call graph for this function:

◆ cuda_masked_atomic_reduction()

void cuda_masked_atomic_reduction ( void a,
void b,
void mask,
int n,
int m,
cudaStream_t  strm 
)

Fortran wrapper for masked atomic reduction update a vector \( a += b(mask) \) where mask is not unique

Definition at line 97 of file math.cu.

Here is the call graph for this function:

◆ cuda_masked_copy()

void cuda_masked_copy ( void a,
void b,
void mask,
int n,
int m,
cudaStream_t  strm 
)

Fortran wrapper for masked copy Copy a vector \( a(mask) = b(mask) \)

Definition at line 67 of file math.cu.

Here is the call graph for this function:

◆ cuda_masked_gather_copy()

void cuda_masked_gather_copy ( void a,
void b,
void mask,
int n,
int m,
cudaStream_t  strm 
)

Fortran wrapper for masked gather copy Copy a vector \( a(i) = b(mask(i)) \)

Definition at line 82 of file math.cu.

Here is the call graph for this function:

◆ cuda_masked_scatter_copy()

void cuda_masked_scatter_copy ( void a,
void b,
void mask,
int n,
int m,
cudaStream_t  strm 
)

Fortran wrapper for masked scatter copy Copy a vector \( a(mask(i)) = b(i) \)

Definition at line 111 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmax_sca2()

void cuda_pwmax_sca2 ( void a,
real c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmax_sca2

Compute the maximum of vector and scalar \( a = \max(a, c) \)

Definition at line 836 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmax_sca3()

void cuda_pwmax_sca3 ( void a,
void b,
real c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmax_sca3

Compute the maximum of vector and scalar \( a = \max(b, c) \)

Definition at line 850 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmax_vec2()

void cuda_pwmax_vec2 ( void a,
void b,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmax_vec2

Compute the maximum of two vectors \( a = \max(a, b) \)

Definition at line 808 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmax_vec3()

void cuda_pwmax_vec3 ( void a,
void b,
void c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmax_vec3

Compute the maximum of two vectors \( a = \max(b, c) \)

Definition at line 822 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmin_sca2()

void cuda_pwmin_sca2 ( void a,
real c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmin_sca2

Compute the minimum of vector and scalar \( a = \min(a, c) \)

Definition at line 892 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmin_sca3()

void cuda_pwmin_sca3 ( void a,
void b,
real c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmin_sca3

Compute the minimum of vector and scalar \( a = \min(b, c) \)

Definition at line 905 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmin_vec2()

void cuda_pwmin_vec2 ( void a,
void b,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmin_vec2

Compute the minimum of two vectors \( a = \min(a, b) \)

Definition at line 864 of file math.cu.

Here is the call graph for this function:

◆ cuda_pwmin_vec3()

void cuda_pwmin_vec3 ( void a,
void b,
void c,
int n,
cudaStream_t  stream 
)

Fortran wrapper for pwmin_vec3

Compute the minimum of two vectors \( a = \min(b, c) \)

Definition at line 878 of file math.cu.

Here is the call graph for this function:

◆ cuda_radd()

void cuda_radd ( void a,
real c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for cadd Add a scalar to vector \( a_i = a_i + c \)

Definition at line 201 of file math.cu.

Here is the call graph for this function:

◆ cuda_redbuf_check_alloc()

void cuda_redbuf_check_alloc ( int  nb)

Checks and allocates a buffer of size nb*sizeof(real) for reductions

Definition at line 555 of file math.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_rzero()

void cuda_rzero ( void a,
int n,
cudaStream_t  strm 
)

Fortran wrapper for rzero Zero a real vector

Definition at line 140 of file math.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_sub2()

void cuda_sub2 ( void a,
void b,
int n,
cudaStream_t  strm 
)

Fortran wrapper for sub2 Vector subtraction \( a = a - b \)

Definition at line 455 of file math.cu.

Here is the call graph for this function:

◆ cuda_sub3()

void cuda_sub3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for sub3 Vector subtraction \( a = b - c \)

Definition at line 469 of file math.cu.

Here is the call graph for this function:

◆ cuda_subcol3()

void cuda_subcol3 ( void a,
void b,
void c,
int n,
cudaStream_t  strm 
)

Fortran wrapper for subcol3 Vector multiplication with 3 vectors \( a = a - b \cdot c \)

Definition at line 440 of file math.cu.

Here is the call graph for this function:

◆ cuda_vcross()

void cuda_vcross ( void u1,
void u2,
void u3,
void v1,
void v2,
void v3,
void w1,
void w2,
void w3,
int n,
cudaStream_t  strm 
)

Fortran wrapper for vcross \( u = v \times w \)

Definition at line 529 of file math.cu.

Here is the call graph for this function:

◆ cuda_vdot3()

void cuda_vdot3 ( void dot,
void u1,
void u2,
void u3,
void v1,
void v2,
void v3,
int n,
cudaStream_t  strm 
)

Fortran wrapper for vdot3 \( dot = u \cdot v \)

Definition at line 512 of file math.cu.

Here is the call graph for this function:

◆ cuda_vlsc3()

real cuda_vlsc3 ( void u,
void v,
void w,
int n,
cudaStream_t  stream 
)

Fortran wrapper vlsc3 Compute multiplication sum \( dot = u \cdot v \cdot w \)

Definition at line 618 of file math.cu.

Here is the call graph for this function:

Variable Documentation

◆ bufred

real* bufred = NULL

Definition at line 549 of file math.cu.

◆ bufred_d

void* bufred_d = NULL

Definition at line 550 of file math.cu.

◆ red_s

int red_s = 0

Definition at line 548 of file math.cu.