35 #include <hip/hip_runtime.h>
53 void *
u,
int *n,
void *gd,
int *nb,
54 void *b,
void *bo,
int *op,
57 if ((*m) == 0)
return;
59 const dim3 nthrds(1024, 1, 1);
60 const dim3 nblcks(((*m)+ 1024 - 1)/ 1024, 1, 1);
64 hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_add<real>),
65 nblcks, nthrds, 0, stream,
66 (
real *)
v, *m, *o, (
int *) dg,
67 (
real *)
u, *n, (
int *) gd,
68 *nb, (
int *) b, (
int *) bo);
72 hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_mul<real>),
73 nblcks, nthrds, 0, stream,
74 (
real *)
v, *m, *o, (
int *) dg,
75 (
real *)
u, *n, (
int *) gd,
76 *nb, (
int *) b, (
int *) bo);
80 hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_min<real>),
81 nblcks, nthrds, 0, stream,
82 (
real *)
v, *m, *o, (
int *) dg,
83 (
real *)
u, *n, (
int *) gd,
84 *nb, (
int *) b, (
int *) bo);
88 hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_max<real>),
89 nblcks, nthrds, 0, stream,
90 (
real *)
v, *m, *o, (
int *) dg,
91 (
real *)
u, *n, (
int *) gd,
92 *nb, (
int *) b, (
int *) bo);
102 void *
u,
int *n,
void *gd,
103 int *nb,
void *b,
void *bo,
104 hipStream_t stream) {
106 if ((*m) == 0)
return;
108 const dim3 nthrds(1024, 1, 1);
109 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
111 hipLaunchKernelGGL(HIP_KERNEL_NAME(scatter_kernel<real>),
112 nblcks, nthrds, 0, stream,
113 (
real *)
v, *m, (
int *) dg,
114 (
real *)
u, *n, (
int *) gd,
115 *nb, (
int *) b, (
int *) bo);
123 int offset,
int n, hipStream_t stream) {
125 const int nthrds = 1024;
126 const int nblcks = (n + nthrds - 1) / nthrds;
128 if (stream == NULL) {
129 hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_pack_kernel<real>),
130 nblcks, nthrds, 0, 0,
131 (
real *) u_d, (
real *) buf_d + offset,
132 (
int *) dof_d + offset, n);
135 hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_pack_kernel<real>),
136 nblcks, nthrds, 0, stream,
137 (
real *) u_d, (
real *) buf_d + offset,
138 (
int *) dof_d + offset, n);
147 int offset,
int n, hipStream_t stream) {
149 const int nthrds = 1024;
150 const int nblcks = (n + nthrds - 1) / nthrds;
154 if (stream == NULL) {
155 hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_unpack_add_kernel<real>),
156 nblcks, nthrds, 0, 0,
161 hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_unpack_add_kernel<real>),
162 nblcks, nthrds, 0, stream,
169 printf(
"%s: unknown gs op %d\n", __FILE__, op);
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
void hip_gs_unpack(real *u_d, int op, real *buf_d, int *dof_d, int offset, int n, hipStream_t stream)
void hip_scatter_kernel(void *v, int *m, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, hipStream_t stream)
void hip_gs_pack(void *u_d, void *buf_d, void *dof_d, int offset, int n, hipStream_t stream)
void hip_gather_kernel(void *v, int *m, int *o, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, int *op, hipStream_t stream)