1#ifndef __MATH_AX_HELM_KERNEL_H__
2#define __MATH_AX_HELM_KERNEL_H__
41template<
typename T, const
int LX, const
int CHUNKS >
115 for (
int l = 0; l<
LX; l++){
137 for (
int l = 0; l<
LX; l++){
148template<
typename T, const
int LX >
185 for(
int k = 0;
k <
LX; ++
k){
193 for (
int k = 0;
k <
LX; ++
k){
204 for (
int l = 0; l <
LX; l++){
212 for (
int l = 0; l <
LX; l++){
233 for (
int l = 0; l <
LX; l++){
241 for (
int k = 0;
k <
LX; ++
k){
251template<
typename T, const
int LX >
289 for(
int k = 0;
k <
LX; ++
k){
297 for (
int k = 0;
k <
LX; ++
k){
308 for (
int l = 0; l <
LX; l++){
316 for (
int l = 0; l <
LX; l++){
337 for (
int l = 0; l <
LX; l++){
345 for (
int k = 0;
k <
LX; ++
k){
354template<
typename T, const
int LX >
411 for(
int k = 0;
k <
LX; ++
k){
425 for (
int k = 0;
k <
LX; ++
k){
440 for (
int l = 0; l <
LX; l++){
456 for (
int l = 0; l <
LX; l++){
512 for (
int l = 0; l <
LX; l++){
530 for (
int k = 0;
k <
LX; ++
k){
537template<
typename T, const
int LX >
595 for(
int k = 0;
k <
LX; ++
k){
609 for (
int k = 0;
k <
LX; ++
k){
624 for (
int l = 0; l <
LX; l++){
640 for (
int l = 0; l <
LX; l++){
696 for (
int l = 0; l <
LX; l++){
714 for (
int k = 0;
k <
LX; ++
k){
721template<
typename T >
735 for (
int i = idx;
i < n;
i +=
str) {
__shared__ T shdy[LX *LX]
__global__ void T *__restrict__ T *__restrict__ aw
__shared__ T shdz[LX *LX]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__shared__ T shvs[LX *LX]
__shared__ T shws[LX *LX]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ av
__shared__ T shwr[LX *LX]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__shared__ T shus[LX *LX]
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ h1
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__shared__ T shvr[LX *LX]
__shared__ T shur[LX *LX]
__global__ void ax_helm_kernel_vector_kstep(T *__restrict__ au, T *__restrict__ av, T *__restrict__ aw, const T *__restrict__ u, const T *__restrict__ v, const T *__restrict__ w, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ h1, const T *__restrict__ g11, const T *__restrict__ g22, const T *__restrict__ g33, const T *__restrict__ g12, const T *__restrict__ g13, const T *__restrict__ g23)
__global__ void ax_helm_kernel_1d(T *__restrict__ w, const T *__restrict__ u, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ dxt, const T *__restrict__ dyt, const T *__restrict__ dzt, const T *__restrict__ h1, const T *__restrict__ g11, const T *__restrict__ g22, const T *__restrict__ g33, const T *__restrict__ g12, const T *__restrict__ g13, const T *__restrict__ g23)
__global__ void ax_helm_kernel_kstep(T *__restrict__ w, const T *__restrict__ u, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ h1, const T *__restrict__ g11, const T *__restrict__ g22, const T *__restrict__ g33, const T *__restrict__ g12, const T *__restrict__ g13, const T *__restrict__ g23)
__global__ void ax_helm_kernel_kstep_padded(T *__restrict__ w, const T *__restrict__ u, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ h1, const T *__restrict__ g11, const T *__restrict__ g22, const T *__restrict__ g33, const T *__restrict__ g12, const T *__restrict__ g13, const T *__restrict__ g23)
__global__ void ax_helm_kernel_vector_kstep_padded(T *__restrict__ au, T *__restrict__ av, T *__restrict__ aw, const T *__restrict__ u, const T *__restrict__ v, const T *__restrict__ w, const T *__restrict__ dx, const T *__restrict__ dy, const T *__restrict__ dz, const T *__restrict__ h1, const T *__restrict__ g11, const T *__restrict__ g22, const T *__restrict__ g33, const T *__restrict__ g12, const T *__restrict__ g13, const T *__restrict__ g23)
__global__ void ax_helm_kernel_vector_part2(T *__restrict__ au, T *__restrict__ av, T *__restrict__ aw, const T *__restrict__ u, const T *__restrict__ v, const T *__restrict__ w, const T *__restrict__ h2, const T *__restrict__ B, const int n)
__shared__ T shdyt[LX *LX]
__shared__ T shdzt[LX *LX]
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g23
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g22
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g13
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g12
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g33
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ g11