Neko  0.8.99
A portable framework for high-order spectral element flow simulations
mathops_kernel.h
Go to the documentation of this file.
1 #ifndef __MATH_MATHOPS_KERNEL_H__
2 #define __MATH_MATHOPS_KERNEL_H__
3 /*
4  Copyright (c) 2021, The Neko Authors
5  All rights reserved.
6 
7  Redistribution and use in source and binary forms, with or without
8  modification, are permitted provided that the following conditions
9  are met:
10 
11  * Redistributions of source code must retain the above copyright
12  notice, this list of conditions and the following disclaimer.
13 
14  * Redistributions in binary form must reproduce the above
15  copyright notice, this list of conditions and the following
16  disclaimer in the documentation and/or other materials provided
17  with the distribution.
18 
19  * Neither the name of the authors nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  POSSIBILITY OF SUCH DAMAGE.
35 */
36 
40 template< typename T>
41 __global__ void opchsign_kernel(T * __restrict__ a1,
42  T * __restrict__ a2,
43  T * __restrict__ a3,
44  const int gdim,
45  const int n) {
46 
47  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
48  const int str = blockDim.x * gridDim.x;
49 
50  if (gdim == 3) {
51  for (int i = idx; i < n; i += str) {
52  a1[i] = -a1[i];
53  a2[i] = -a2[i];
54  a3[i] = -a3[i];
55  }
56  }
57  else {
58  for (int i = idx; i < n; i += str) {
59  a1[i] = -a1[i];
60  a2[i] = -a2[i];
61  }
62  }
63 
64 }
65 
69 template< typename T>
70 __global__ void opcolv_kernel(T * __restrict__ a1,
71  T * __restrict__ a2,
72  T * __restrict__ a3,
73  const T * __restrict__ c,
74  const int gdim,
75  const int n) {
76 
77  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
78  const int str = blockDim.x * gridDim.x;
79 
80  if (gdim == 3) {
81  for (int i = idx; i < n; i += str) {
82  a1[i] = a1[i] * c[i];
83  a2[i] = a2[i] * c[i];
84  a3[i] = a3[i] * c[i];
85  }
86  }
87  else {
88  for (int i = idx; i < n; i += str) {
89  a1[i] = a1[i] * c[i];
90  a2[i] = a2[i] * c[i];
91  }
92  }
93 
94 }
95 
99 template< typename T>
100 __global__ void opcolv3c_kernel(T * __restrict__ a1,
101  T * __restrict__ a2,
102  T * __restrict__ a3,
103  const T * __restrict__ b1,
104  const T * __restrict__ b2,
105  const T * __restrict__ b3,
106  const T * __restrict__ c,
107  const T d,
108  const int gdim,
109  const int n) {
110 
111  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
112  const int str = blockDim.x * gridDim.x;
113 
114  if (gdim == 3) {
115  for (int i = idx; i < n; i += str) {
116  a1[i] = b1[i] * c[i] * d;
117  a2[i] = b2[i] * c[i] * d;
118  a3[i] = b3[i] * c[i] * d;
119  }
120  }
121  else {
122  for (int i = idx; i < n; i += str) {
123  a1[i] = b1[i] * c[i] * d;
124  a2[i] = b2[i] * c[i] * d;
125  }
126  }
127 
128 }
129 
133 template< typename T>
134 __global__ void opadd2cm_kernel(T * __restrict__ a1,
135  T * __restrict__ a2,
136  T * __restrict__ a3,
137  const T * __restrict__ b1,
138  const T * __restrict__ b2,
139  const T * __restrict__ b3,
140  const T c,
141  const int gdim,
142  const int n) {
143 
144  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
145  const int str = blockDim.x * gridDim.x;
146 
147  if (gdim == 3) {
148  for (int i = idx; i < n; i += str) {
149  a1[i] = a1[i] + b1[i] * c;
150  a2[i] = a2[i] + b2[i] * c;
151  a3[i] = a3[i] + b3[i] * c;
152  }
153  }
154  else {
155  for (int i = idx; i < n; i += str) {
156  a1[i] = a1[i] + b1[i] * c;
157  a2[i] = a2[i] + b2[i] * c;
158  }
159  }
160 
161 }
162 
163 
167 template< typename T>
168 __global__ void opadd2col_kernel(T * __restrict__ a1,
169  T * __restrict__ a2,
170  T * __restrict__ a3,
171  const T * __restrict__ b1,
172  const T * __restrict__ b2,
173  const T * __restrict__ b3,
174  const T * __restrict__ c,
175  const int gdim,
176  const int n) {
177 
178  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
179  const int str = blockDim.x * gridDim.x;
180 
181  if (gdim == 3) {
182  for (int i = idx; i < n; i += str) {
183  a1[i] = a1[i] + b1[i] * c[i];
184  a2[i] = a2[i] + b2[i] * c[i];
185  a3[i] = a3[i] + b3[i] * c[i];
186  }
187  }
188  else {
189  for (int i = idx; i < n; i += str) {
190  a1[i] = a1[i] + b1[i] * c[i];
191  a2[i] = a2[i] + b2[i] * c[i];
192  }
193  }
194 
195 }
196 
197 
198 #endif // __MATH_MATHOPS_KERNEL_H__
const int i
Definition: cdtp_kernel.h:128
__global__ void opcolv_kernel(T *__restrict__ a1, T *__restrict__ a2, T *__restrict__ a3, const T *__restrict__ c, const int gdim, const int n)
__global__ void opchsign_kernel(T *__restrict__ a1, T *__restrict__ a2, T *__restrict__ a3, const int gdim, const int n)
__global__ void opadd2col_kernel(T *__restrict__ a1, T *__restrict__ a2, T *__restrict__ a3, const T *__restrict__ b1, const T *__restrict__ b2, const T *__restrict__ b3, const T *__restrict__ c, const int gdim, const int n)
__global__ void opcolv3c_kernel(T *__restrict__ a1, T *__restrict__ a2, T *__restrict__ a3, const T *__restrict__ b1, const T *__restrict__ b2, const T *__restrict__ b3, const T *__restrict__ c, const T d, const int gdim, const int n)
__global__ void opadd2cm_kernel(T *__restrict__ a1, T *__restrict__ a2, T *__restrict__ a3, const T *__restrict__ b1, const T *__restrict__ b2, const T *__restrict__ b3, const T c, const int gdim, const int n)