Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
euler_res.c
Go to the documentation of this file.
1/*
2 Copyright (c) 2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#ifdef __APPLE__
36#include <OpenCL/cl.h>
37#else
38#include <CL/cl.h>
39#endif
40
41#include <stdio.h>
43#include <device/opencl/jit.h>
45#include <device/opencl/check.h>
46
47#include "euler_res_kernel.cl.h"
48
50 void *effective_visc, int *n) {
51 cl_int err;
52
55
57 "euler_res_part_visc_kernel", &err);
59
60 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &rhs_u));
61 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &Binv));
62 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &lap_sol));
63 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &effective_visc));
64 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), n));
65
66 const int nb = ((*n) + 256 - 1) / 256;
67 const size_t global_item_size = 256 * nb;
68 const size_t local_item_size = 256;
69
72 0, NULL, NULL));
74}
75
76void euler_res_part_mx_flux_opencl(void *f_x, void *f_y, void *f_z,
77 void *m_x, void *m_y, void *m_z,
78 void *rho_field, void *p, int *n) {
79 cl_int err;
80
83
85 "euler_res_part_mx_flux_kernel", &err);
87
88 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &f_x));
89 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &f_y));
90 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &f_z));
91 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &m_x));
92 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &m_y));
93 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &m_z));
94 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &rho_field));
95 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &p));
96 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n));
97
98 const int nb = ((*n) + 256 - 1) / 256;
99 const size_t global_item_size = 256 * nb;
100 const size_t local_item_size = 256;
101
104 0, NULL, NULL));
106}
107
108void euler_res_part_my_flux_opencl(void *f_x, void *f_y, void *f_z,
109 void *m_x, void *m_y, void *m_z,
110 void *rho_field, void *p, int *n) {
111 cl_int err;
112
113 if (euler_res_program == NULL)
115
117 "euler_res_part_my_flux_kernel", &err);
118 CL_CHECK(err);
119
120 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &f_x));
121 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &f_y));
122 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &f_z));
123 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &m_x));
124 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &m_y));
125 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &m_z));
126 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &rho_field));
127 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &p));
128 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n));
129
130 const int nb = ((*n) + 256 - 1) / 256;
131 const size_t global_item_size = 256 * nb;
132 const size_t local_item_size = 256;
133
136 0, NULL, NULL));
138}
139
140void euler_res_part_mz_flux_opencl(void *f_x, void *f_y, void *f_z,
141 void *m_x, void *m_y, void *m_z,
142 void *rho_field, void *p, int *n) {
143 cl_int err;
144
145 if (euler_res_program == NULL)
147
149 "euler_res_part_mz_flux_kernel", &err);
150 CL_CHECK(err);
151
152 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &f_x));
153 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &f_y));
154 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &f_z));
155 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &m_x));
156 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &m_y));
157 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &m_z));
158 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &rho_field));
159 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &p));
160 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n));
161
162 const int nb = ((*n) + 256 - 1) / 256;
163 const size_t global_item_size = 256 * nb;
164 const size_t local_item_size = 256;
165
168 0, NULL, NULL));
170}
171
172void euler_res_part_E_flux_opencl(void *f_x, void *f_y, void *f_z,
173 void *m_x, void *m_y, void *m_z,
174 void *rho_field, void *p, void * E, int *n) {
175 cl_int err;
176
177 if (euler_res_program == NULL)
179
181 "euler_res_part_E_flux_kernel", &err);
182 CL_CHECK(err);
183
184 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &f_x));
185 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &f_y));
186 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &f_z));
187 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &m_x));
188 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &m_y));
189 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &m_z));
190 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &rho_field));
191 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &E));
192 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &p));
193 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), n));
194
195 const int nb = ((*n) + 256 - 1) / 256;
196 const size_t global_item_size = 256 * nb;
197 const size_t local_item_size = 256;
198
201 0, NULL, NULL));
203}
204
206 void *rhs_m_y, void *rhs_m_z,
207 void *rhs_E, void *mult, int *n) {
208 cl_int err;
209
210 if (euler_res_program == NULL)
212
214 "euler_res_part_coef_mult_kernel", &err);
215 CL_CHECK(err);
216
217 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &rhs_rho));
218 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &rhs_m_x));
219 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &rhs_m_y));
220 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &rhs_m_z));
221 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &rhs_E));
222 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &mult));
223 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), n));
224
225 const int nb = ((*n) + 256 - 1) / 256;
226 const size_t global_item_size = 256 * nb;
227 const size_t local_item_size = 256;
228
231 0, NULL, NULL));
233}
234
235void euler_res_part_rk_sum_opencl(void *rho, void *m_x, void *m_y, void *m_z,
236 void *E, void *k_rho_i, void *k_m_x_i,
237 void *k_m_y_i, void *k_m_z_i, void *k_E_i,
238 real *dt, real *c, int *n) {
239 cl_int err;
240
241 if (euler_res_program == NULL)
243
245 "euler_res_part_rk_sum_kernel", &err);
246 CL_CHECK(err);
247
248 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &rho));
249 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &m_x));
250 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &m_y));
251 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &m_z));
252 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &E));
253 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &k_rho_i));
254 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &k_m_x_i));
255 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *) &k_m_y_i));
256 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *) &k_m_z_i));
257 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *) &k_E_i));
258 CL_CHECK(clSetKernelArg(kernel, 10, sizeof(real), dt));
259 CL_CHECK(clSetKernelArg(kernel, 11, sizeof(real), c));
260 CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), n));
261
262 const int nb = ((*n) + 256 - 1) / 256;
263 const size_t global_item_size = 256 * nb;
264 const size_t local_item_size = 256;
265
268 0, NULL, NULL));
270}
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ dt
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
void euler_res_part_coef_mult_opencl(void *rhs_rho, void *rhs_m_x, void *rhs_m_y, void *rhs_m_z, void *rhs_E, void *mult, int *n)
Definition euler_res.c:205
void euler_res_part_visc_opencl(void *rhs_u, void *Binv, void *lap_sol, void *effective_visc, int *n)
Definition euler_res.c:49
void euler_res_part_E_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, void *E, int *n)
Definition euler_res.c:172
void euler_res_part_my_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:108
void euler_res_part_mx_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:76
void euler_res_part_mz_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:140
void euler_res_part_rk_sum_opencl(void *rho, void *m_x, void *m_y, void *m_z, void *E, void *k_rho_i, void *k_m_x_i, void *k_m_y_i, void *k_m_z_i, void *k_E_i, real *dt, real *c, int *n)
Definition euler_res.c:235
void opencl_kernel_jit(const char *kernel, cl_program *program)
Definition jit.c:50
#define CL_CHECK(err)
Definition check.h:12
void * euler_res_program