35 #include <hip/hip_runtime.h>
39 template<
typename T, const
int LX >
41 const T * __restrict__
dxt,
42 const T * __restrict__
dyt,
43 const T * __restrict__
dzt,
44 const T * __restrict__ G11,
45 const T * __restrict__ G22,
46 const T * __restrict__ G33,
47 const T * __restrict__ G12,
48 const T * __restrict__ G13,
49 const T * __restrict__ G23,
51 const int idx = threadIdx.x + blockIdx.x * blockDim.x;
52 const int e = idx / (LX*LX*LX);
53 const int ijk = idx -
e*LX*LX*LX;
54 const int jk = ijk / LX;
55 const int i = ijk - jk * LX;
56 const int k = jk / LX;
57 const int j = jk - k * LX;
66 for (
int l = 0; l < LX; l++) {
67 T g = G11[l + LX*
j + LX*LX*k + LX*LX*LX*
e];
72 for (
int l = 0; l < LX; l++) {
73 T g = G22[
i + LX*l + LX*LX*k + LX*LX*LX*
e];
78 for (
int l = 0; l < LX; l++) {
79 T g = G33[
i + LX*
j + LX*LX*l + LX*LX*LX*
e];
85 if (
i == 0 ||
i == LX-1) {
86 d += G12[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dxt[
i + LX*
i] *
dyt[
j + LX*
j];
87 d += G13[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dxt[
i + LX*
i] *
dzt[k + LX*k];
90 if (
j == 0 ||
j == LX-1) {
91 d += G12[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dyt[
j + LX*
j] *
dxt[
i + LX*
i];
92 d += G23[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dyt[
j + LX*
j] *
dzt[k + LX*k];
95 if (k == 0 || k == LX-1) {
96 d += G13[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dzt[k + LX*k] *
dxt[
i + LX*
i];
97 d += G23[
i + LX*
j + LX*LX*k + LX*LX*LX*
e] *
dzt[k + LX*k] *
dyt[
j + LX*
j];
108 void *G11,
void *G22,
void *G33,
109 void *G12,
void *G13,
void *G23,
110 int *nel,
int *lxp) {
113 const int threads = 1024;
114 const int blocks = ((*nel * lx*lx*lx) + threads - 1) / threads;
118 hipLaunchKernelGGL(HIP_KERNEL_NAME( jacobi_kernel<real, N> ),\
119 blocks, threads, 0, (hipStream_t) glb_cmd_queue,\
121 (real *) dxt, (real *) dyt, (real *) dzt,\
122 (real *) G11, (real *) G22, (real *) G33,\
123 (real *) G12, (real *) G13, (real *) G23,\
145 fprintf(stderr, __FILE__
": size not supported: %d\n", lx);
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dzt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dyt
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dxt
void hip_jacobi_update(void *d, void *dxt, void *dyt, void *dzt, void *G11, void *G22, void *G33, void *G12, void *G13, void *G23, int *nel, int *lxp)
__global__ void jacobi_kernel(T *__restrict__ du, const T *__restrict__ dxt, const T *__restrict__ dyt, const T *__restrict__ dzt, const T *__restrict__ G11, const T *__restrict__ G22, const T *__restrict__ G33, const T *__restrict__ G12, const T *__restrict__ G13, const T *__restrict__ G23, const int nel)