Neko  0.9.99
A portable framework for high-order spectral element flow simulations
ax_helm.cu File Reference
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "ax_helm_kernel.h"
#include <device/device_config.h>
#include <device/cuda/check.h>
#include <common/neko_log.h>
Include dependency graph for ax_helm.cu:

Go to the source code of this file.

Macros

#define CASE_1D(LX)
 
#define CASE_KSTEP(LX)
 
#define CASE_KSTEP_PADDED(LX)
 
#define CASE(LX)
 
#define CASE_PADDED(LX)
 
#define CASE_LARGE(LX)
 
#define CASE_LARGE_PADDED(LX)
 
#define CASE_VECTOR_KSTEP(LX)
 
#define CASE_VECTOR_KSTEP_PADDED(LX)
 
#define CASE_VECTOR(LX)
 
#define CASE_VECTOR_PADDED(LX)
 

Functions

template<const int >
int tune (void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
template<const int >
int tune_padded (void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void cuda_ax_helm (void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void cuda_ax_helm_vector (void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
 
void cuda_ax_helm_vector_part2 (void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
 

Macro Definition Documentation

◆ CASE

#define CASE (   LX)
Value:
case LX: \
if(autotune[LX] == 0 ) { \
autotune[LX]=tune<LX>( w, u, \
dx, dy, dz, \
dxt, dyt, dzt,h1, \
g11, g22, g33, \
g12, g13, g23, nelv, lx); \
} else if (autotune[LX] == 1 ) { \
CASE_1D(LX); \
} else if (autotune[LX] == 2 ) { \
CASE_KSTEP(LX); \
} \
break
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
Definition: cdtp_kernel.h:112
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
Definition: cdtp_kernel.h:111
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
Definition: cdtp_kernel.h:110
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11

◆ CASE_1D

#define CASE_1D (   LX)
Value:
ax_helm_kernel_1d<real, LX, 1024> \
<<<nblcks_1d, nthrds_1d, 0, stream>>>((real *) w, (real *) u, \
(real *) dx, (real *) dy, (real *) dz, \
(real *) dxt, (real *) dyt, (real *) dzt, (real *) h1,\
(real *) g11, (real *) g22, (real *) g33, \
(real *) g12, (real *) g13, (real *) g23); \
CUDA_CHECK(cudaGetLastError());
double real
Definition: device_config.h:12

◆ CASE_KSTEP

#define CASE_KSTEP (   LX)
Value:
ax_helm_kernel_kstep<real, LX> \
<<<nblcks_kstep, nthrds_kstep, 0, stream>>>((real *) w, (real *) u, \
(real *) dx, (real *) dy, (real *) dz, (real *) h1, \
(real *) g11, (real *) g22, (real *) g33, \
(real *) g12, (real *) g13, (real *) g23); \
CUDA_CHECK(cudaGetLastError());

◆ CASE_KSTEP_PADDED

#define CASE_KSTEP_PADDED (   LX)
Value:
ax_helm_kernel_kstep_padded<real, LX> \
<<<nblcks_kstep, nthrds_kstep, 0, stream>>>((real *) w, (real *) u, \
(real *) dx, (real *) dy, (real *) dz, (real *) h1, \
(real *) g11, (real *) g22, (real *) g33, \
(real *) g12, (real *) g13, (real *) g23); \
CUDA_CHECK(cudaGetLastError());

◆ CASE_LARGE

#define CASE_LARGE (   LX)
Value:
case LX: \
CASE_KSTEP(LX); \
break

◆ CASE_LARGE_PADDED

#define CASE_LARGE_PADDED (   LX)
Value:
case LX: \
CASE_KSTEP_PADDED(LX); \
break

◆ CASE_PADDED

#define CASE_PADDED (   LX)
Value:
case LX: \
if(autotune[LX] == 0 ) { \
autotune[LX]=tune_padded<LX>(w, u, \
dx, dy, dz, \
dxt, dyt, dzt,h1, \
g11, g22, g33, \
g12, g13, g23,nelv,lx); \
} else if (autotune[LX] == 1 ) { \
CASE_1D(LX); \
} else if (autotune[LX] == 2 ) { \
CASE_KSTEP_PADDED(LX); \
} \
break

◆ CASE_VECTOR

#define CASE_VECTOR (   LX)
Value:
case LX: \
CASE_VECTOR_KSTEP(LX); \
break

◆ CASE_VECTOR_KSTEP

#define CASE_VECTOR_KSTEP (   LX)
Value:
ax_helm_kernel_vector_kstep<real, LX> \
<<<nblcks, nthrds, 0, stream>>> ((real *) au, (real *) av, (real *) aw, \
(real *) u, (real *) v, (real *) w, \
(real *) dx, (real *) dy, (real *) dz, \
(real *) h1, (real *) g11, (real *) g22, \
(real *) g33, (real *) g12, (real *) g13, \
(real *) g23); \
CUDA_CHECK(cudaGetLastError());
__global__ void T *__restrict__ T *__restrict__ aw
__global__ void T *__restrict__ av
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v

◆ CASE_VECTOR_KSTEP_PADDED

#define CASE_VECTOR_KSTEP_PADDED (   LX)
Value:
ax_helm_kernel_vector_kstep_padded<real, LX> \
<<<nblcks, nthrds, 0, stream>>> ((real *) au, (real *) av, (real *) aw, \
(real *) u, (real *) v, (real *) w, \
(real *) dx, (real *) dy, (real *) dz, \
(real *) h1, (real *) g11, (real *) g22, \
(real *) g33, (real *) g12, (real *) g13, \
(real *) g23); \
CUDA_CHECK(cudaGetLastError());

◆ CASE_VECTOR_PADDED

#define CASE_VECTOR_PADDED (   LX)
Value:
case LX: \
CASE_VECTOR_KSTEP_PADDED(LX); \
break

Function Documentation

◆ cuda_ax_helm()

void cuda_ax_helm ( void *  w,
void *  u,
void *  dx,
void *  dy,
void *  dz,
void *  dxt,
void *  dyt,
void *  dzt,
void *  h1,
void *  g11,
void *  g22,
void *  g33,
void *  g12,
void *  g13,
void *  g23,
int *  nelv,
int *  lx 
)

Fortran wrapper for device CUDA Ax

Definition at line 63 of file ax_helm.cu.

Here is the caller graph for this function:

◆ cuda_ax_helm_vector()

void cuda_ax_helm_vector ( void *  au,
void *  av,
void *  aw,
void *  u,
void *  v,
void *  w,
void *  dx,
void *  dy,
void *  dz,
void *  dxt,
void *  dyt,
void *  dzt,
void *  h1,
void *  g11,
void *  g22,
void *  g33,
void *  g12,
void *  g13,
void *  g23,
int *  nelv,
int *  lx 
)

Fortran wrapper for device CUDA Ax vector version

Definition at line 184 of file ax_helm.cu.

Here is the caller graph for this function:

◆ cuda_ax_helm_vector_part2()

void cuda_ax_helm_vector_part2 ( void *  au,
void *  av,
void *  aw,
void *  u,
void *  v,
void *  w,
void *  h2,
void *  B,
int *  n 
)

Fortran wrapper for device CUDA Ax vector version part2

Definition at line 254 of file ax_helm.cu.

Here is the caller graph for this function:

◆ tune()

template<const int >
int tune ( void *  w,
void *  u,
void *  dx,
void *  dy,
void *  dz,
void *  dxt,
void *  dyt,
void *  dzt,
void *  h1,
void *  g11,
void *  g22,
void *  g33,
void *  g12,
void *  g13,
void *  g23,
int *  nelv,
int *  lx 
)

Definition at line 270 of file ax_helm.cu.

Here is the call graph for this function:

◆ tune_padded()

template<const int >
int tune_padded ( void *  w,
void *  u,
void *  dx,
void *  dy,
void *  dz,
void *  dxt,
void *  dyt,
void *  dzt,
void *  h1,
void *  g11,
void *  g22,
void *  g33,
void *  g12,
void *  g13,
void *  g23,
int *  nelv,
int *  lx 
)

Definition at line 348 of file ax_helm.cu.

Here is the call graph for this function: