Neko  0.8.99
A portable framework for high-order spectral element flow simulations
opr_opgrad.cu File Reference
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "opgrad_kernel.h"
#include <device/device_config.h>
#include <device/cuda/check.h>
#include <common/neko_log.h>
Include dependency graph for opr_opgrad.cu:

Go to the source code of this file.

Macros

#define CASE_1D(LX)
 
#define CASE_KSTEP(LX)
 
#define CASE(LX)
 

Functions

template<const int >
int tune_opgrad (void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
 
void cuda_opgrad (void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
 

Macro Definition Documentation

◆ CASE

#define CASE (   LX)
Value:
case LX: \
if(autotune[LX] == 0 ) { \
autotune[LX]=tune_opgrad<LX>(ux, uy, uz, u, \
dx, dy, dz, \
drdx, dsdx, dtdx, \
drdy, dsdy, dtdy, \
drdz, dsdz, dtdz, \
w3, nel, lx); \
} else if (autotune[LX] == 1 ) { \
CASE_1D(LX); \
} else if (autotune[LX] == 2 ) { \
CASE_KSTEP(LX); \
} \
break
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
Definition: cdtp_kernel.h:113
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
Definition: conv1_kernel.h:139
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
Definition: conv1_kernel.h:141
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
Definition: conv1_kernel.h:147
__global__ void const T *__restrict__ u
Definition: conv1_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
Definition: conv1_kernel.h:136
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
Definition: conv1_kernel.h:140
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
Definition: conv1_kernel.h:144
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
Definition: conv1_kernel.h:146
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
Definition: conv1_kernel.h:143
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
Definition: conv1_kernel.h:142
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
Definition: conv1_kernel.h:138
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
Definition: conv1_kernel.h:145
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
Definition: conv1_kernel.h:137
__global__ void T *__restrict__ uy
__global__ void T *__restrict__ T *__restrict__ uz

◆ CASE_1D

#define CASE_1D (   LX)
Value:
opgrad_kernel_1d<real, LX, 1024> \
<<<nblcks, nthrds_1d, 0, stream>>> \
((real *) ux, (real *) uy, (real *) uz, (real *) u, \
(real *) dx, (real *) dy, (real *) dz, \
(real *) drdx, (real *) dsdx, (real *) dtdx, \
(real *) drdy, (real *) dsdy, (real *) dtdy, \
(real *) drdz, (real *) dsdz, (real *) dtdz, \
(real *) w3); \
CUDA_CHECK(cudaGetLastError());
double real
Definition: device_config.h:12

◆ CASE_KSTEP

#define CASE_KSTEP (   LX)
Value:
opgrad_kernel_kstep<real, LX> <<<nblcks, nthrds_kstep, 0, stream>>> \
((real *) ux, (real *) uy, (real *) uz, (real *) u, \
(real *) dx, (real *) dy, (real *) dz, \
(real *) drdx, (real *) dsdx, (real *) dtdx, \
(real *) drdy, (real *) dsdy, (real *) dtdy, \
(real *) drdz, (real *) dsdz, (real *) dtdz, \
(real *) w3); \
CUDA_CHECK(cudaGetLastError());

Function Documentation

◆ cuda_opgrad()

void cuda_opgrad ( void *  ux,
void *  uy,
void *  uz,
void *  u,
void *  dx,
void *  dy,
void *  dz,
void *  drdx,
void *  dsdx,
void *  dtdx,
void *  drdy,
void *  dsdy,
void *  dtdy,
void *  drdz,
void *  dsdz,
void *  dtdz,
void *  w3,
int *  nel,
int *  lx 
)

Fortran wrapper for device cuda convective terms

Definition at line 59 of file opr_opgrad.cu.

Here is the caller graph for this function:

◆ tune_opgrad()

template<const int >
int tune_opgrad ( void *  ux,
void *  uy,
void *  uz,
void *  u,
void *  dx,
void *  dy,
void *  dz,
void *  drdx,
void *  dsdx,
void *  dtdx,
void *  drdy,
void *  dsdy,
void *  dtdy,
void *  drdz,
void *  dsdz,
void *  dtdz,
void *  w3,
int *  nel,
int *  lx 
)

Definition at line 137 of file opr_opgrad.cu.

Here is the call graph for this function: