Neko  0.9.0
A portable framework for high-order spectral element flow simulations
opr_lambda2.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2023, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <string.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <hip/hip_runtime.h>
39 #include "lambda2_kernel.h"
40 #include <device/device_config.h>
41 #include <device/hip/check.h>
42 
43 extern "C" {
44  #include <common/neko_log.h>
45 }
46 
47 template < const int >
48 int tune_lambda2(void *ux, void *uy, void *uz, void *u,
49  void *dx, void *dy, void *dz,
50  void *drdx, void *dsdx, void *dtdx,
51  void *drdy, void *dsdy, void *dtdy,
52  void *drdz, void *dsdz, void *dtdz,
53  void *jacinv, int *nel, int *lx);
54 
55 extern "C" {
56 
60  void hip_lambda2(void *lambda2, void *u, void *v, void *w,
61  void *dx, void *dy, void *dz,
62  void *drdx, void *dsdx, void *dtdx,
63  void *drdy, void *dsdy, void *dtdy,
64  void *drdz, void *dsdz, void *dtdz,
65  void *jacinv, int *nel, int *lx) {
66 
67  static int autotune[17] = { 0 };
68 
69  const dim3 nthrds_1d(1024, 1, 1);
70  const dim3 nthrds_kstep((*lx), (*lx), 1);
71  const dim3 nblcks((*nel), 1, 1);
72  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
73 
74 #define CASE_1D(LX) \
75  hipLaunchKernelGGL( HIP_KERNEL_NAME(lambda2_kernel_1d<real, LX, 1024> ), \
76  nblcks, nthrds_1d, 0, (hipStream_t) glb_cmd_queue, \
77  (real *) lambda2, (real *) u, (real *) v, (real *) w, \
78  (real *) dx, (real *) dy, (real *) dz, \
79  (real *) drdx, (real *) dsdx, (real *) dtdx, \
80  (real *) drdy, (real *) dsdy, (real *) dtdy, \
81  (real *) drdz, (real *) dsdz, (real *) dtdz, \
82  (real *) jacinv); \
83  HIP_CHECK(hipGetLastError());
84 
85 
86 
87 #define CASE_KSTEP(LX) \
88  hipLaunchKernelGGL( HIP_KERNEL_NAME(lambda2_kernel_kstep<real, LX> ), \
89  nblcks, nthrds_kstep, 0, (hipStream_t) glb_cmd_queue, \
90  (real *) lambda2, (real *) u, (real *) v, (real *) w, \
91  (real *) dx, (real *) dy, (real *) dz, \
92  (real *) drdx, (real *) dsdx, (real *) dtdx, \
93  (real *) drdy, (real *) dsdy, (real *) dtdy, \
94  (real *) drdz, (real *) dsdz, (real *) dtdz, \
95  (real *) jacinv); \
96  HIP_CHECK(hipGetLastError());
97 
98 #define CASE(LX) \
99  case LX: \
100  if(autotune[LX] == 0 ) { \
101  autotune[LX]=tune_lambda2<LX>(lambda2, u, v, w, \
102  dx, dy, dz, \
103  drdx, dsdx, dtdx, \
104  drdy, dsdy, dtdy, \
105  drdz, dsdz, dtdz, \
106  jacinv, nel, lx); \
107  } else if (autotune[LX] == 1 ) { \
108  CASE_1D(LX); \
109  } else if (autotune[LX] == 2 ) { \
110  CASE_KSTEP(LX); \
111  } \
112  break
113 
114  switch(*lx) {
115  CASE(2);
116  CASE(3);
117  CASE(4);
118  CASE(5);
119  CASE(6);
120  CASE(7);
121  CASE(8);
122  CASE(9);
123  CASE(10);
124  CASE(11);
125  CASE(12);
126  default:
127  {
128  fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
129  exit(1);
130  }
131  }
132  }
133 }
134 
135 template < const int LX >
136 int tune_lambda2(void *lambda2, void *u, void *v, void *w,
137  void *dx, void *dy, void *dz,
138  void *drdx, void *dsdx, void *dtdx,
139  void *drdy, void *dsdy, void *dtdy,
140  void *drdz, void *dsdz, void *dtdz,
141  void *jacinv, int *nel, int *lx) {
142  hipEvent_t start,stop;
143  float time1,time2;
144  int retval;
145 
146  const dim3 nthrds_1d(1024, 1, 1);
147  const dim3 nthrds_kstep((*lx), (*lx), 1);
148  const dim3 nblcks((*nel), 1, 1);
149  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
150 
151  char *env_value = NULL;
152  char neko_log_buf[80];
153 
154  env_value=getenv("NEKO_AUTOTUNE");
155 
156  sprintf(neko_log_buf, "Autotune lambda2 (lx: %d)", *lx);
157  log_section(neko_log_buf);
158 
159  if(env_value) {
160  if( !strcmp(env_value,"1D") ) {
161  CASE_1D(LX);
162  sprintf(neko_log_buf,"Set by env : 1 (1D)");
163  log_message(neko_log_buf);
164  log_end_section();
165  return 1;
166  } else if( !strcmp(env_value,"KSTEP") ) {
167  CASE_KSTEP(LX);
168  sprintf(neko_log_buf,"Set by env : 2 (KSTEP)");
169  log_message(neko_log_buf);
170  log_end_section();
171  return 2;
172  } else {
173  sprintf(neko_log_buf, "Invalid value set for NEKO_AUTOTUNE");
174  log_error(neko_log_buf);
175  }
176  }
177 
178  HIP_CHECK(hipEventCreate(&start));
179  HIP_CHECK(hipEventCreate(&stop));
180 
181  HIP_CHECK(hipEventRecord(start,0));
182 
183  for(int i = 0; i < 100; i++) {
184  CASE_1D(LX);
185  }
186 
187  HIP_CHECK(hipEventRecord(stop,0));
188  HIP_CHECK(hipEventSynchronize(stop));
189  HIP_CHECK(hipEventElapsedTime(&time1, start, stop));
190 
191  HIP_CHECK(hipEventRecord(start,0));
192 
193  for(int i = 0; i < 100; i++) {
194  CASE_KSTEP(LX);
195  }
196 
197  HIP_CHECK(hipEventRecord(stop,0));
198  HIP_CHECK(hipEventSynchronize(stop));
199  HIP_CHECK(hipEventElapsedTime(&time2, start, stop));
200 
201  if(time1 < time2) {
202  retval = 1;
203  } else {
204  retval = 2;
205  }
206 
207  sprintf(neko_log_buf, "Chose : %d (%s)", retval,
208  (retval > 1 ? "KSTEP" : "1D"));
209  log_message(neko_log_buf);
210  log_end_section();
211  return retval;
212 }
213 
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
const int i
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
__global__ void T *__restrict__ uy
__global__ void T *__restrict__ T *__restrict__ uz
void * glb_cmd_queue
#define HIP_CHECK(err)
Definition: check.h:8
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
void log_error(char *msg)
void log_message(char *msg)
void log_end_section()
void log_section(char *msg)
#define CASE(LX)
#define CASE_KSTEP(LX)
void hip_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition: opr_lambda2.hip:60
#define CASE_1D(LX)
int tune_lambda2(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)