Neko  0.8.99
A portable framework for high-order spectral element flow simulations
opr_dudxyz.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2022, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <string.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <hip/hip_runtime.h>
39 #include <device/device_config.h>
40 #include <device/hip/check.h>
41 #include "dudxyz_kernel.h"
42 
43 extern "C" {
44  #include <common/neko_log.h>
45 }
46 
47 template <const int >
48 int tune_dudxyz(void *du, void *u,
49  void *dr, void *ds, void *dt,
50  void *dx, void *dy, void *dz,
51  void *jacinv, int *nel, int *lx);
52 
53 extern "C" {
54 
58  void hip_dudxyz(void *du, void *u,
59  void *dr, void *ds, void *dt,
60  void *dx, void *dy, void *dz,
61  void *jacinv, int *nel, int *lx) {
62 
63  static int autotune[16] = { 0 };
64 
65  const dim3 nthrds_1d(1024, 1, 1);
66  const dim3 nthrds_kstep((*lx), (*lx), 1);
67  const dim3 nblcks((*nel), 1, 1);
68 
69 #define CASE_1D(LX) \
70  hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_1d<real, LX, 1024> ), \
71  nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
72  (real *) du, (real *) u, \
73  (real *) dr, (real *) ds, (real *) dt, \
74  (real *) dx, (real *) dy, (real *) dz, \
75  (real *) jacinv); \
76  HIP_CHECK(hipGetLastError());
77 
78 #define CASE_KSTEP(LX) \
79  hipLaunchKernelGGL( HIP_KERNEL_NAME(dudxyz_kernel_kstep<real, LX> ), \
80  nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
81  (real *) du, (real *) u, \
82  (real *) dr, (real *) ds, (real *) dt, \
83  (real *) dx, (real *) dy, (real *) dz, \
84  (real *) jacinv); \
85  HIP_CHECK(hipGetLastError());
86 
87  #define CASE(LX) \
88  case LX: \
89  if(autotune[LX] == 0 ) { \
90  autotune[LX]=tune_dudxyz<LX>(du, u, \
91  dr, ds, dt, \
92  dx, dy, dz, \
93  jacinv, nel, lx); \
94  } else if (autotune[LX] == 1 ) { \
95  CASE_1D(LX); \
96  } else if (autotune[LX] == 2 ) { \
97  CASE_KSTEP(LX); \
98  } \
99  break
100 
101 #define CASE_LARGE(LX) \
102  case LX: \
103  CASE_KSTEP(LX); \
104  break
105 
106 
107  if ((*lx) < 11) {
108  switch(*lx) {
109  CASE(2);
110  CASE(3);
111  CASE(4);
112  CASE(5);
113  CASE(6);
114  CASE(7);
115  CASE(8);
116  CASE(9);
117  CASE(10);
118  default:
119  {
120  fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
121  exit(1);
122  }
123  }
124  }
125  else {
126  switch(*lx) {
127  CASE_LARGE(11);
128  CASE_LARGE(12);
129  CASE_LARGE(13);
130  CASE_LARGE(14);
131  CASE_LARGE(15);
132  CASE_LARGE(16);
133  default:
134  {
135  fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
136  exit(1);
137  }
138  }
139  }
140  }
141 }
142 
143 template < const int LX >
144 int tune_dudxyz(void *du, void *u,
145  void *dr, void *ds, void *dt,
146  void *dx, void *dy, void *dz,
147  void *jacinv, int *nel, int *lx) {
148  hipEvent_t start,stop;
149  float time1,time2;
150  int retval;
151 
152  const dim3 nthrds_1d(1024, 1, 1);
153  const dim3 nthrds_kstep((*lx), (*lx), 1);
154  const dim3 nblcks((*nel), 1, 1);
155 
156  char *env_value = NULL;
157  char neko_log_buf[80];
158 
159  env_value=getenv("NEKO_AUTOTUNE");
160 
161  sprintf(neko_log_buf, "Autotune dudxyz (lx: %d)", *lx);
162  log_section(neko_log_buf);
163 
164  if(env_value) {
165  if( !strcmp(env_value,"1D") ) {
166  CASE_1D(LX);
167  sprintf(neko_log_buf,"Set by env : 1 (1D)");
168  log_message(neko_log_buf);
169  log_end_section();
170  return 1;
171  } else if( !strcmp(env_value,"KSTEP") ) {
172  CASE_KSTEP(LX);
173  sprintf(neko_log_buf,"Set by env : 2 (KSTEP)");
174  log_message(neko_log_buf);
175  log_end_section();
176  return 2;
177  } else {
178  sprintf(neko_log_buf, "Invalid value set for NEKO_AUTOTUNE");
179  log_error(neko_log_buf);
180  }
181  }
182 
183  HIP_CHECK(hipEventCreate(&start));
184  HIP_CHECK(hipEventCreate(&stop));
185 
186  HIP_CHECK(hipEventRecord(start,0));
187 
188  for(int i = 0; i < 100; i++) {
189  CASE_1D(LX);
190  }
191 
192  HIP_CHECK(hipEventRecord(stop,0));
193  HIP_CHECK(hipEventSynchronize(stop));
194  HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
195 
196  HIP_CHECK(hipEventRecord(start,0));
197 
198  for(int i = 0; i < 100; i++) {
199  CASE_KSTEP(LX);
200  }
201 
202  HIP_CHECK(hipEventRecord(stop,0));
203  HIP_CHECK(hipEventSynchronize(stop));
204  HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
205 
206  if(time1 < time2) {
207  retval = 1;
208  } else {
209  retval = 2;
210  }
211 
212  sprintf(neko_log_buf, "Chose : %d (%s)", retval,
213  (retval > 1 ? "KSTEP" : "1D"));
214  log_message(neko_log_buf);
215  log_end_section();
216  return retval;
217 }
__global__ void const T *__restrict__ const T *__restrict__ dr
Definition: cdtp_kernel.h:107
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ ds
Definition: cdtp_kernel.h:108
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
Definition: cdtp_kernel.h:109
const int i
Definition: cdtp_kernel.h:128
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
Definition: conv1_kernel.h:148
__global__ void const T *__restrict__ u
Definition: conv1_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
Definition: conv1_kernel.h:136
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
Definition: conv1_kernel.h:138
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
Definition: conv1_kernel.h:137
#define HIP_CHECK(err)
Definition: check.h:8
void log_error(char *msg)
void log_message(char *msg)
void log_end_section()
void log_section(char *msg)
int tune_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.hip:144
#define CASE(LX)
#define CASE_KSTEP(LX)
#define CASE_LARGE(LX)
#define CASE_1D(LX)
void hip_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition: opr_dudxyz.hip:58