Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
mathops.c
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2022, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#ifdef __APPLE__
36#include <OpenCL/cl.h>
37#else
38#include <CL/cl.h>
39#endif
40
41#include <stdio.h>
43#include <device/opencl/jit.h>
45#include <device/opencl/check.h>
46
47#include "mathops_kernel.cl.h"
48
50void opencl_opchsign(void *a1, void *a2, void *a3, int *gdim, int *n) {
51 cl_int err;
52
53 if (mathops_program == NULL)
55
56 cl_kernel kernel = clCreateKernel(mathops_program, "opchsign_kernel", &err);
58
59 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a1));
60 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &a2));
61 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &a3));
62 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), gdim));
63 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), n));
64
65 const int nb = ((*n) + 256 - 1) / 256;
66 const size_t global_item_size = 256 * nb;
67 const size_t local_item_size = 256;
68
71 0, NULL, NULL));
73}
74
76void opencl_opcolv(void *a1, void *a2, void *a3, void *c, int *gdim, int *n) {
77 cl_int err;
78
79 if (mathops_program == NULL)
81
84
85 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a1));
86 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &a2));
87 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &a3));
88 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &c));
89 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), gdim));
90 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), n));
91
92 const int nb = ((*n) + 256 - 1) / 256;
93 const size_t global_item_size = 256 * nb;
94 const size_t local_item_size = 256;
95
98 0, NULL, NULL));
100}
101
103void opencl_opcolv3c(void *a1, void *a2, void *a3,
104 void *b1, void *b2, void *b3,
105 void *c, real *d, int *gdim, int *n) {
106 cl_int err;
107
108 if (mathops_program == NULL)
110
111 cl_kernel kernel = clCreateKernel(mathops_program, "opcolv3c_kernel", &err);
112 CL_CHECK(err);
113
114 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a1));
115 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &a2));
116 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &a3));
117 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &b1));
118 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &b2));
119 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &b3));
120 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &c));
121 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(real), d));
122 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), gdim));
123 CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), n));
124
125 const int nb = ((*n) + 256 - 1) / 256;
126 const size_t global_item_size = 256 * nb;
127 const size_t local_item_size = 256;
128
131 0, NULL, NULL));
133}
134
136void opencl_opadd2cm(void *a1, void *a2, void *a3,
137 void *b1, void *b2, void *b3,
138 real *c, int *gdim, int *n) {
139 cl_int err;
140
141 if (mathops_program == NULL)
143
144 cl_kernel kernel = clCreateKernel(mathops_program, "opadd2cm_kernel", &err);
145 CL_CHECK(err);
146
147 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a1));
148 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &a2));
149 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &a3));
150 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &b1));
151 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &b2));
152 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &b3));
153 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(real), c));
154 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), gdim));
155 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n));
156
157 const int nb = ((*n) + 256 - 1) / 256;
158 const size_t global_item_size = 256 * nb;
159 const size_t local_item_size = 256;
160
163 0, NULL, NULL));
165}
166
168void opencl_opadd2col(void *a1, void *a2, void *a3,
169 void *b1, void *b2, void *b3,
170 void *c, int *gdim, int *n) {
171 cl_int err;
172
173 if (mathops_program == NULL)
175
176 cl_kernel kernel = clCreateKernel(mathops_program, "opadd2col_kernel", &err);
177 CL_CHECK(err);
178
179 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a1));
180 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &a2));
181 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &a3));
182 CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &b1));
183 CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &b2));
184 CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *) &b3));
185 CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *) &c));
186 CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), gdim));
187 CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), n));
188
189 const int nb = ((*n) + 256 - 1) / 256;
190 const size_t global_item_size = 256 * nb;
191 const size_t local_item_size = 256;
192
195 0, NULL, NULL));
197}
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
void opencl_kernel_jit(const char *kernel, cl_program *program)
Definition jit.c:50
void opencl_opcolv3c(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, real *d, int *gdim, int *n)
Definition mathops.c:103
void opencl_opadd2cm(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, real *c, int *gdim, int *n)
Definition mathops.c:136
void opencl_opcolv(void *a1, void *a2, void *a3, void *c, int *gdim, int *n)
Definition mathops.c:76
void opencl_opchsign(void *a1, void *a2, void *a3, int *gdim, int *n)
Definition mathops.c:50
void opencl_opadd2col(void *a1, void *a2, void *a3, void *b1, void *b2, void *b3, void *c, int *gdim, int *n)
Definition mathops.c:168
#define CL_CHECK(err)
Definition check.h:12
void * mathops_program