Neko  0.8.99
A portable framework for high-order spectral element flow simulations
gs.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2022, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <hip/hip_runtime.h>
36 #include <climits>
37 #include <cstdio>
38 #include <device/device_config.h>
39 #include <device/hip/check.h>
40 #include "gs_kernels.h"
41 
42 #define GS_OP_ADD 1
43 #define GS_OP_MUL 2
44 #define GS_OP_MIN 3
45 #define GS_OP_MAX 4
46 
47 extern "C" {
48 
52  void hip_gather_kernel(void *v, int *m, int *o, void *dg,
53  void *u, int *n, void *gd, int *nb,
54  void *b, void *bo, int *op,
55  hipStream_t stream) {
56 
57  if ((*m) == 0) return;
58 
59  const dim3 nthrds(1024, 1, 1);
60  const dim3 nblcks(((*m)+ 1024 - 1)/ 1024, 1, 1);
61 
62  switch (*op) {
63  case GS_OP_ADD:
64  hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_add<real>),
65  nblcks, nthrds, 0, stream,
66  (real *) v, *m, *o, (int *) dg,
67  (real *) u, *n, (int *) gd,
68  *nb, (int *) b, (int *) bo);
69  HIP_CHECK(hipGetLastError());
70  break;
71  case GS_OP_MUL:
72  hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_mul<real>),
73  nblcks, nthrds, 0, stream,
74  (real *) v, *m, *o, (int *) dg,
75  (real *) u, *n, (int *) gd,
76  *nb, (int *) b, (int *) bo);
77  HIP_CHECK(hipGetLastError());
78  break;
79  case GS_OP_MIN:
80  hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_min<real>),
81  nblcks, nthrds, 0, stream,
82  (real *) v, *m, *o, (int *) dg,
83  (real *) u, *n, (int *) gd,
84  *nb, (int *) b, (int *) bo);
85  HIP_CHECK(hipGetLastError());
86  break;
87  case GS_OP_MAX:
88  hipLaunchKernelGGL(HIP_KERNEL_NAME(gather_kernel_max<real>),
89  nblcks, nthrds, 0, stream,
90  (real *) v, *m, *o, (int *) dg,
91  (real *) u, *n, (int *) gd,
92  *nb, (int *) b, (int *) bo);
93  HIP_CHECK(hipGetLastError());
94  break;
95  }
96  }
97 
101  void hip_scatter_kernel(void *v, int *m, void *dg,
102  void *u, int *n, void *gd,
103  int *nb, void *b, void *bo,
104  hipStream_t stream) {
105 
106  if ((*m) == 0) return;
107 
108  const dim3 nthrds(1024, 1, 1);
109  const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
110 
111  hipLaunchKernelGGL(HIP_KERNEL_NAME(scatter_kernel<real>),
112  nblcks, nthrds, 0, stream,
113  (real *) v, *m, (int *) dg,
114  (real *) u, *n, (int *) gd,
115  *nb, (int *) b, (int *) bo);
116  HIP_CHECK(hipGetLastError());
117  }
118 
122  void hip_gs_pack(void *u_d, void *buf_d, void *dof_d,
123  int offset, int n, hipStream_t stream) {
124 
125  const int nthrds = 1024;
126  const int nblcks = (n + nthrds - 1) / nthrds;
127 
128  if (stream == NULL) {
129  hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_pack_kernel<real>),
130  nblcks, nthrds, 0, 0,
131  (real *) u_d, (real *) buf_d + offset,
132  (int *) dof_d + offset, n);
133  }
134  else {
135  hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_pack_kernel<real>),
136  nblcks, nthrds, 0, stream,
137  (real *) u_d, (real *) buf_d + offset,
138  (int *) dof_d + offset, n);
139  }
140  HIP_CHECK(hipGetLastError());
141  }
142 
146  void hip_gs_unpack(real *u_d, int op, real *buf_d, int *dof_d,
147  int offset, int n, hipStream_t stream) {
148 
149  const int nthrds = 1024;
150  const int nblcks = (n + nthrds - 1) / nthrds;
151 
152  switch (op) {
153  case GS_OP_ADD:
154  if (stream == NULL) {
155  hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_unpack_add_kernel<real>),
156  nblcks, nthrds, 0, 0,
157  u_d, buf_d + offset,
158  dof_d + offset, n);
159  }
160  else {
161  hipLaunchKernelGGL(HIP_KERNEL_NAME(gs_unpack_add_kernel<real>),
162  nblcks, nthrds, 0, stream,
163  u_d, buf_d + offset,
164  dof_d + offset, n);
165 
166  }
167  break;
168  default:
169  printf("%s: unknown gs op %d\n", __FILE__, op);
170  abort();
171  }
172 
173  HIP_CHECK(hipGetLastError());
174  }
175 }
__global__ void const T *__restrict__ u
Definition: conv1_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ v
double real
Definition: device_config.h:12
#define GS_OP_MAX
Definition: gs.hip:45
#define GS_OP_ADD
Definition: gs.hip:42
void hip_gs_unpack(real *u_d, int op, real *buf_d, int *dof_d, int offset, int n, hipStream_t stream)
Definition: gs.hip:146
void hip_scatter_kernel(void *v, int *m, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, hipStream_t stream)
Definition: gs.hip:101
void hip_gs_pack(void *u_d, void *buf_d, void *dof_d, int offset, int n, hipStream_t stream)
Definition: gs.hip:122
#define GS_OP_MUL
Definition: gs.hip:43
void hip_gather_kernel(void *v, int *m, int *o, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, int *op, hipStream_t stream)
Definition: gs.hip:52
#define GS_OP_MIN
Definition: gs.hip:44
#define HIP_CHECK(err)
Definition: check.h:8