Neko  0.8.1
A portable framework for high-order spectral element flow simulations
coef.c
Go to the documentation of this file.
1 /*
2  Copyright (c) 2022-2023, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 
36 #ifdef __APPLE__
37 #include <OpenCL/cl.h>
38 #else
39 #include <CL/cl.h>
40 #endif
41 
42 #include <stdio.h>
43 #include <math.h>
44 #include <device/device_config.h>
45 #include <device/opencl/jit.h>
46 #include <device/opencl/prgm_lib.h>
47 #include <device/opencl/check.h>
48 
49 #include "coef_kernel.cl.h"
50 
54 void opencl_coef_generate_geo(void *G11, void *G12, void *G13,
55  void *G22, void *G23, void *G33,
56  void *drdx, void *drdy, void *drdz,
57  void *dsdx, void *dsdy, void *dsdz,
58  void *dtdx, void *dtdy, void *dtdz,
59  void *jacinv, void *w3, int *nel,
60  int *lx, int *gdim) {
61 
62  cl_int err;
63  int i;
64  if (coef_program == NULL)
65  opencl_kernel_jit(coef_kernel, (cl_program *) &coef_program);
66 
67  const size_t global_item_size = 256 * (*nel);
68  const size_t local_item_size = 256;
69 
70 #define STR(X) #X
71 #define GEO_CASE(LX) \
72  case LX: \
73  { \
74  cl_kernel kernel = clCreateKernel(coef_program, \
75  STR(coef_generate_geo_kernel_lx##LX), &err); \
76  CL_CHECK(err); \
77  \
78  CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &G11)); \
79  CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &G12)); \
80  CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &G13)); \
81  CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &G22)); \
82  CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &G23)); \
83  CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &G33)); \
84  CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &drdx)); \
85  CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &drdy)); \
86  CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &drdz)); \
87  CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dsdx)); \
88  CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dsdy)); \
89  CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dsdz)); \
90  CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dtdx)); \
91  CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &dtdy)); \
92  CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &dtdz)); \
93  CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &jacinv)); \
94  CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_mem), (void *) &w3)); \
95  CL_CHECK(clSetKernelArg(kernel, 17, sizeof(int), gdim)); \
96  \
97  CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
98  kernel, 1, NULL, &global_item_size, \
99  &local_item_size, 0, NULL, NULL)); \
100  } \
101  break
102 
103  switch(*lx) {
104  GEO_CASE(2);
105  GEO_CASE(3);
106  GEO_CASE(4);
107  GEO_CASE(5);
108  GEO_CASE(6);
109  GEO_CASE(7);
110  GEO_CASE(8);
111  GEO_CASE(9);
112  GEO_CASE(10);
113  GEO_CASE(11);
114  GEO_CASE(12);
115  GEO_CASE(13);
116  GEO_CASE(14);
117  GEO_CASE(15);
118  GEO_CASE(16);
119  }
120 }
121 
125 void opencl_coef_generate_dxyzdrst(void *drdx, void *drdy, void *drdz,
126  void *dsdx, void *dsdy, void *dsdz,
127  void *dtdx, void *dtdy, void *dtdz,
128  void *dxdr, void *dydr, void *dzdr,
129  void *dxds, void *dyds, void *dzds,
130  void *dxdt, void *dydt, void *dzdt,
131  void *dx, void *dy, void *dz,
132  void *x, void *y, void *z,
133  void *jacinv, void *jac,
134  int *lx, int *nel) {
135 
136  cl_int err;
137  int i;
138  if (coef_program == NULL)
139  opencl_kernel_jit(coef_kernel, (cl_program *) &coef_program);
140 
141  const int n = (*nel) * (*lx) * (*lx) * (*lx);
142  const size_t global_item_size_dxyz = 256 * (*nel);
143  const size_t global_item_size_drst = 256 * n;
144  const size_t local_item_size = 256;
145 
146 #define STR(X) #X
147 #define DXYZDRST_CASE(LX) \
148  case LX: \
149  { \
150  cl_kernel kernel = clCreateKernel(coef_program, \
151  STR(coef_generate_dxyz_kernel_lx##LX), &err); \
152  CL_CHECK(err); \
153  \
154  CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &dxdr)); \
155  CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &dydr)); \
156  CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &dzdr)); \
157  CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &dxds)); \
158  CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &dyds)); \
159  CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dzds)); \
160  CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dxdt)); \
161  CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dydt)); \
162  CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &dzdt)); \
163  CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dx)); \
164  CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dy)); \
165  CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dz)); \
166  CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &x)); \
167  CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &y)); \
168  CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &z)); \
169  \
170  CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, \
171  kernel, 1, NULL, &global_item_size_dxyz, \
172  &local_item_size, 0, NULL, NULL)); \
173  } \
174  break
175 
176  switch(*lx) {
177  DXYZDRST_CASE(2);
178  DXYZDRST_CASE(3);
179  DXYZDRST_CASE(4);
180  DXYZDRST_CASE(5);
181  DXYZDRST_CASE(6);
182  DXYZDRST_CASE(7);
183  DXYZDRST_CASE(8);
184  DXYZDRST_CASE(9);
185  DXYZDRST_CASE(10);
186  DXYZDRST_CASE(11);
187  DXYZDRST_CASE(12);
188  DXYZDRST_CASE(13);
189  DXYZDRST_CASE(14);
190  DXYZDRST_CASE(15);
191  DXYZDRST_CASE(16);
192  }
193 
194  cl_kernel kernel = clCreateKernel(coef_program,
195  "coef_generate_drst_kernel", &err);
196  CL_CHECK(err);
197 
198  CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &jac));
199  CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &jacinv));
200  CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &drdx));
201  CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &drdy));
202  CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &drdz));
203  CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &dsdx));
204  CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &dsdy));
205  CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &dsdz));
206  CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &dtdx));
207  CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &dtdy));
208  CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *) &dtdz));
209  CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *) &dxdr));
210  CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *) &dydr));
211  CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_mem), (void *) &dzdr));
212  CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_mem), (void *) &dxds));
213  CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_mem), (void *) &dyds));
214  CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_mem), (void *) &dzds));
215  CL_CHECK(clSetKernelArg(kernel, 17, sizeof(cl_mem), (void *) &dxdt));
216  CL_CHECK(clSetKernelArg(kernel, 18, sizeof(cl_mem), (void *) &dydt));
217  CL_CHECK(clSetKernelArg(kernel, 19, sizeof(cl_mem), (void *) &dzdt));
218  CL_CHECK(clSetKernelArg(kernel, 20, sizeof(int), &n));
219 
220  CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue,
221  kernel, 1, NULL, &global_item_size_drst,
222  &local_item_size, 0, NULL, NULL));
223 }
#define GEO_CASE(LX)
void opencl_coef_generate_geo(void *G11, void *G12, void *G13, void *G22, void *G23, void *G33, void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *jacinv, void *w3, int *nel, int *lx, int *gdim)
Definition: coef.c:54
void opencl_coef_generate_dxyzdrst(void *drdx, void *drdy, void *drdz, void *dsdx, void *dsdy, void *dsdz, void *dtdx, void *dtdy, void *dtdz, void *dxdr, void *dydr, void *dzdr, void *dxds, void *dyds, void *dzds, void *dxdt, void *dydt, void *dzdt, void *dx, void *dy, void *dz, void *x, void *y, void *z, void *jacinv, void *jac, int *lx, int *nel)
Definition: coef.c:125
#define DXYZDRST_CASE(LX)
__global__ void const T *__restrict__ x
Definition: cdtp_kernel.h:109
const int i
Definition: cdtp_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jac
Definition: cdtp_kernel.h:117
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ jacinv
Definition: conv1_kernel.h:148
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dz
Definition: conv1_kernel.h:138
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dx
Definition: conv1_kernel.h:136
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdx
Definition: conv1_kernel.h:139
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdx
Definition: conv1_kernel.h:141
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdz
Definition: conv1_kernel.h:147
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdx
Definition: conv1_kernel.h:140
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dtdy
Definition: conv1_kernel.h:144
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdz
Definition: conv1_kernel.h:146
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dsdy
Definition: conv1_kernel.h:143
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdy
Definition: conv1_kernel.h:142
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dy
Definition: conv1_kernel.h:137
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ drdz
Definition: conv1_kernel.h:145
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
void * glb_cmd_queue
void opencl_kernel_jit(const char *kernel, cl_program *program)
Definition: jit.c:50
#define CL_CHECK(err)
Definition: check.h:12
void * coef_program