#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "dudxyz_kernel.h"
#include <device/device_config.h>
#include <device/cuda/check.h>
#include <common/neko_log.h>
Go to the source code of this file.
|
template<const int > |
int | tune_dudxyz (void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx) |
|
void | cuda_dudxyz (void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx) |
|
◆ CASE
Value: case LX: \
if(autotune[LX] == 0 ) { \
autotune[LX]=tune_dudxyz<LX>(du,
u, \
} else if (autotune[LX] == 1 ) { \
CASE_1D(LX); \
} else if (autotune[LX] == 2 ) { \
CASE_KSTEP(LX); \
} \
break
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ const T *__restrict__ dr
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
◆ CASE_1D
Value: dudxyz_kernel_1d<real, LX, 1024> \
<<<nblcks, nthrds_1d, 0, stream>>>((
real *) du, (
real *)
u, \
CUDA_CHECK(cudaGetLastError());
◆ CASE_KSTEP
Value: dudxyz_kernel_kstep<real, LX> \
<<<nblcks, nthrds_kstep, 0, stream>>>((
real *) du, (
real *)
u, \
CUDA_CHECK(cudaGetLastError());
◆ CASE_LARGE
Value: case LX: \
CASE_KSTEP(LX); \
break
◆ cuda_dudxyz()
void cuda_dudxyz |
( |
void * |
du, |
|
|
void * |
u, |
|
|
void * |
dr, |
|
|
void * |
ds, |
|
|
void * |
dt, |
|
|
void * |
dx, |
|
|
void * |
dy, |
|
|
void * |
dz, |
|
|
void * |
jacinv, |
|
|
int * |
nel, |
|
|
int * |
lx |
|
) |
| |
Fortran wrapper for device cuda derivative kernels
Definition at line 57 of file opr_dudxyz.cu.
◆ tune_dudxyz()
template<const int >
int tune_dudxyz |
( |
void * |
du, |
|
|
void * |
u, |
|
|
void * |
dr, |
|
|
void * |
ds, |
|
|
void * |
dt, |
|
|
void * |
dx, |
|
|
void * |
dy, |
|
|
void * |
dz, |
|
|
void * |
jacinv, |
|
|
int * |
nel, |
|
|
int * |
lx |
|
) |
| |