1#ifndef __MATH_CDTP_KERNEL_H__ 
    2#define __MATH_CDTP_KERNEL_H__ 
   41template< 
typename T, const 
int LX, const 
int CHUNKS >
 
   71  while(l < (
LX * 
LX * 
LX)) {
 
   82  for (
int n = 0; n < 
nchunks; n++) {
 
   86    const int k = 
jk / 
LX;
 
   92      for (
int l = 0; l < 
LX; l++) {
 
 
  103template< 
typename T, const 
int LX >
 
  149  for (
int k = 0; 
k < 
LX; ++
k) {
 
  154    for (
int l = 0; l < 
LX; l++) {
 
  162    for (
int l = 0; l < 
LX; l++) {
 
__shared__ T shdyt[LX *LX]
 
__global__ void const T *__restrict__ const T *__restrict__ dr
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
 
__global__ void const T *__restrict__ x
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
 
__shared__ T shtas[LX *LX]
 
__global__ void __launch_bounds__(LX *LX, 3) cdtp_kernel_kstep(T *__restrict__ dtx
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
 
__shared__ T shdzt[LX *LX]
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
 
__shared__ T shtar[LX *LX]
 
__global__ void cdtp_kernel_1d(T *__restrict__ dtx, const T *__restrict__ x, const T *__restrict__ dr, const T *__restrict__ ds, const T *__restrict__ dt, const T *__restrict__ dxt, const T *__restrict__ dyt, const T *__restrict__ dzt, const T *__restrict__ w3)
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
 
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
 
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)