Neko  0.9.99
A portable framework for high-order spectral element flow simulations
opr_conv1.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2023, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <string.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <hip/hip_runtime.h>
39 #include <device/device_config.h>
40 #include <device/hip/check.h>
41 #include "conv1_kernel.h"
42 
43 extern "C" {
44  #include <common/neko_log.h>
45 }
46 
47 template < const int >
48 int tune_conv1(void *du, void *u,
49  void *vx, void *vy, void *vz,
50  void *dx, void *dy, void *dz,
51  void *drdx, void *dsdx, void *dtdx,
52  void *drdy, void *dsdy, void *dtdy,
53  void *drdz, void *dsdz, void *dtdz,
54  void *jacinv, int *nel, int *gdim, int *lx);
55 
56 extern "C" {
57 
61  void hip_conv1(void *du, void *u,
62  void *vx, void *vy, void *vz,
63  void *dx, void *dy, void *dz,
64  void *drdx, void *dsdx, void *dtdx,
65  void *drdy, void *dsdy, void *dtdy,
66  void *drdz, void *dsdz, void *dtdz,
67  void *jacinv, int *nel, int *gdim, int *lx) {
68 
69  static int autotune[17] = { 0 };
70 
71  const dim3 nthrds_1d(1024, 1, 1);
72  const dim3 nthrds_kstep((*lx), (*lx), 1);
73  const dim3 nblcks((*nel), 1, 1);
74 
75 #define CASE_1D(LX) \
76  hipLaunchKernelGGL( HIP_KERNEL_NAME(conv1_kernel_1d<real, LX, 1024> ), \
77  nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
78  (real *) du, (real *) u, \
79  (real *) vx, (real *) vy, (real *) vz, \
80  (real *) dx, (real *) dy, (real *) dz, \
81  (real *) drdx, (real *) dsdx, (real *) dtdx, \
82  (real *) drdy, (real *) dsdy, (real *) dtdy, \
83  (real *) drdz, (real *) dsdz, (real *) dtdz, \
84  (real *) jacinv); \
85  HIP_CHECK(hipGetLastError());
86 
87 #define CASE_KSTEP(LX) \
88  hipLaunchKernelGGL( HIP_KERNEL_NAME(conv1_kernel_kstep<real, LX> ), \
89  nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
90  (real *) du, (real *) u, \
91  (real *) vx, (real *) vy, (real *) vz, \
92  (real *) dx, (real *) dy, (real *) dz, \
93  (real *) drdx, (real *) dsdx, (real *) dtdx, \
94  (real *) drdy, (real *) dsdy, (real *) dtdy, \
95  (real *) drdz, (real *) dsdz, (real *) dtdz, \
96  (real *) jacinv); \
97  HIP_CHECK(hipGetLastError());
98 
99 #define CASE(LX) \
100  case LX: \
101  if(autotune[LX] == 0 ) { \
102  autotune[LX]=tune_conv1<LX>(du, u, \
103  vx, vy, vz, \
104  dx, dy, dz, \
105  drdx, dsdx, dtdx, \
106  drdy, dsdy, dtdy, \
107  drdz, dsdz, dtdz, \
108  jacinv, nel, gdim, lx); \
109  } else if (autotune[LX] == 1 ) { \
110  CASE_1D(LX); \
111  } else if (autotune[LX] == 2 ) { \
112  CASE_KSTEP(LX); \
113  } \
114  break
115 
116 #define CASE_LARGE(LX) \
117  case LX: \
118  CASE_KSTEP(LX); \
119  break
120 
121 
122  if ((*lx) < 11) {
123  switch(*lx) {
124  CASE(2);
125  CASE(3);
126  CASE(4);
127  CASE(5);
128  CASE(6);
129  CASE(7);
130  CASE(8);
131  CASE(9);
132  CASE(10);
133  default:
134  {
135  fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
136  exit(1);
137  }
138  }
139  }
140  else {
141  switch(*lx) {
142  CASE_LARGE(11);
143  CASE_LARGE(12);
144  CASE_LARGE(13);
145  CASE_LARGE(14);
146  CASE_LARGE(15);
147  CASE_LARGE(16);
148  default:
149  {
150  fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
151  exit(1);
152  }
153  }
154  }
155  }
156 }
157 
158 template < const int LX >
159 int tune_conv1(void *du, void *u,
160  void *vx, void *vy, void *vz,
161  void *dx, void *dy, void *dz,
162  void *drdx, void *dsdx, void *dtdx,
163  void *drdy, void *dsdy, void *dtdy,
164  void *drdz, void *dsdz, void *dtdz,
165  void *jacinv, int *nel, int *gdim, int *lx) {
166  hipEvent_t start,stop;
167  float time1,time2;
168  int retval;
169 
170  const dim3 nthrds_1d(1024, 1, 1);
171  const dim3 nthrds_kstep((*lx), (*lx), 1);
172  const dim3 nblcks((*nel), 1, 1);
173 
174  char *env_value = NULL;
175  char neko_log_buf[80];
176 
177  env_value=getenv("NEKO_AUTOTUNE");
178 
179  sprintf(neko_log_buf, "Autotune conv1 (lx: %d)", *lx);
180  log_section(neko_log_buf);
181 
182  if(env_value) {
183  if( !strcmp(env_value,"1D") ) {
184  CASE_1D(LX);
185  sprintf(neko_log_buf,"Set by env : 1 (1D)");
186  log_message(neko_log_buf);
187  log_end_section();
188  return 1;
189  } else if( !strcmp(env_value,"KSTEP") ) {
190  CASE_KSTEP(LX);
191  sprintf(neko_log_buf,"Set by env : 2 (KSTEP)");
192  log_message(neko_log_buf);
193  log_end_section();
194  return 2;
195  } else {
196  sprintf(neko_log_buf, "Invalid value set for NEKO_AUTOTUNE");
197  log_error(neko_log_buf);
198  }
199  }
200 
201  HIP_CHECK(hipEventCreate(&start));
202  HIP_CHECK(hipEventCreate(&stop));
203 
204  HIP_CHECK(hipEventRecord(start,0));
205 
206  for(int i = 0; i < 100; i++) {
207  CASE_1D(LX);
208  }
209 
210  HIP_CHECK(hipEventRecord(stop,0));
211  HIP_CHECK(hipEventSynchronize(stop));
212  HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
213 
214  HIP_CHECK(hipEventRecord(start,0));
215 
216  for(int i = 0; i < 100; i++) {
217  CASE_KSTEP(LX);
218  }
219 
220  HIP_CHECK(hipEventRecord(stop,0));
221  HIP_CHECK(hipEventSynchronize(stop));
222  HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
223 
224  if(time1 < time2) {
225  retval = 1;
226  } else {
227  retval = 2;
228  }
229 
230  sprintf(neko_log_buf, "Chose : %d (%s)", retval,
231  (retval > 1 ? "KSTEP" : "1D"));
232  log_message(neko_log_buf);
233  log_end_section();
234  return retval;
235 }
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
const int i
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ vz
Definition: conv1_kernel.h:135
__global__ void const T *__restrict__ const T *__restrict__ vx
Definition: conv1_kernel.h:133
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ vy
Definition: conv1_kernel.h:134
#define HIP_CHECK(err)
Definition: check.h:8
void log_error(char *msg)
void log_message(char *msg)
void log_end_section()
void log_section(char *msg)
#define CASE(LX)
#define CASE_KSTEP(LX)
void hip_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.hip:61
#define CASE_LARGE(LX)
#define CASE_1D(LX)
int tune_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition: opr_conv1.hip:159