Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
cuda_math.f90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use num_types, only: rp, c_rp
35 implicit none
36 public
37
38 interface
39 subroutine cuda_copy(a_d, b_d, n) &
40 bind(c, name = 'cuda_copy')
41 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
42 type(c_ptr), value :: a_d, b_d
43 integer(c_int) :: n
44 end subroutine cuda_copy
45
46 subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
47 bind(c, name = 'cuda_masked_copy')
48 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
49 type(c_ptr), value :: a_d, b_d, mask_d
50 integer(c_int) :: n, m
51 end subroutine cuda_masked_copy
52
53 subroutine cuda_masked_red_copy(a_d, b_d, mask_d, n, m) &
54 bind(c, name = 'cuda_masked_red_copy')
55 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
56 type(c_ptr), value :: a_d, b_d, mask_d
57 integer(c_int) :: n, m
58 end subroutine cuda_masked_red_copy
59
60 subroutine cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, m) &
61 bind(c, name = 'cuda_masked_atomic_reduction')
62 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
63 type(c_ptr), value :: a_d, b_d, mask_d
64 integer(c_int) :: n, m
65 end subroutine cuda_masked_atomic_reduction
66
67 subroutine cuda_cfill_mask(a_d, c, size, mask_d, mask_size) &
68 bind(c, name = 'cuda_cfill_mask')
69 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
70 import c_rp
71 type(c_ptr), value :: a_d
72 real(c_rp) :: c
73 integer(c_int) :: size
74 type(c_ptr), value :: mask_d
75 integer(c_int) :: mask_size
76 end subroutine cuda_cfill_mask
77
78 subroutine cuda_cmult(a_d, c, n) &
79 bind(c, name = 'cuda_cmult')
80 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
81 import c_rp
82 type(c_ptr), value :: a_d
83 real(c_rp) :: c
84 integer(c_int) :: n
85 end subroutine cuda_cmult
86
87 subroutine cuda_cmult2(a_d, b_d, c, n) &
88 bind(c, name = 'cuda_cmult2')
89 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
90 import c_rp
91 type(c_ptr), value :: a_d, b_d
92 real(c_rp) :: c
93 integer(c_int) :: n
94 end subroutine cuda_cmult2
95
96 subroutine cuda_cadd(a_d, c, n) &
97 bind(c, name = 'cuda_cadd')
98 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
99 import c_rp
100 type(c_ptr), value :: a_d
101 real(c_rp) :: c
102 integer(c_int) :: n
103 end subroutine cuda_cadd
104
105 subroutine cuda_cadd2(a_d, b_d, c, n) &
106 bind(c, name = 'cuda_cadd2')
107 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
108 import c_rp
109 type(c_ptr), value :: a_d
110 type(c_ptr), value :: b_d
111 real(c_rp) :: c
112 integer(c_int) :: n
113 end subroutine cuda_cadd2
114
115 subroutine cuda_cfill(a_d, c, n) &
116 bind(c, name = 'cuda_cfill')
117 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
118 import c_rp
119 type(c_ptr), value :: a_d
120 real(c_rp) :: c
121 integer(c_int) :: n
122 end subroutine cuda_cfill
123
124 subroutine cuda_rzero(a_d, n) &
125 bind(c, name = 'cuda_rzero')
126 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
127 type(c_ptr), value :: a_d
128 integer(c_int) :: n
129 end subroutine cuda_rzero
130
131 subroutine cuda_add2(a_d, b_d, n) &
132 bind(c, name = 'cuda_add2')
133 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
134 import c_rp
135 type(c_ptr), value :: a_d, b_d
136 integer(c_int) :: n
137 end subroutine cuda_add2
138
139 subroutine cuda_add4(a_d, b_d, c_d, d_d, n) &
140 bind(c, name = 'cuda_add4')
141 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
142 import c_rp
143 type(c_ptr), value :: a_d, b_d, c_d, d_d
144 integer(c_int) :: n
145 end subroutine cuda_add4
146
147 subroutine cuda_add2s1(a_d, b_d, c1, n) &
148 bind(c, name = 'cuda_add2s1')
149 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
150 import c_rp
151 type(c_ptr), value :: a_d, b_d
152 real(c_rp) :: c1
153 integer(c_int) :: n
154 end subroutine cuda_add2s1
155
156 subroutine cuda_add2s2(a_d, b_d, c1, n) &
157 bind(c, name = 'cuda_add2s2')
158 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
159 import c_rp
160 type(c_ptr), value :: a_d, b_d
161 real(c_rp) :: c1
162 integer(c_int) :: n
163 end subroutine cuda_add2s2
164
165 subroutine cuda_addsqr2s2(a_d, b_d, c1, n) &
166 bind(c, name = 'cuda_addsqr2s2')
167 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
168 import c_rp
169 type(c_ptr), value :: a_d, b_d
170 real(c_rp) :: c1
171 integer(c_int) :: n
172 end subroutine cuda_addsqr2s2
173
174 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
175 bind(c, name = 'cuda_add3s2')
176 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
177 import c_rp
178 type(c_ptr), value :: a_d, b_d, c_d
179 real(c_rp) :: c1, c2
180 integer(c_int) :: n
181 end subroutine cuda_add3s2
182
183 subroutine cuda_invcol1(a_d, n) &
184 bind(c, name = 'cuda_invcol1')
185 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
186 type(c_ptr), value :: a_d
187 integer(c_int) :: n
188 end subroutine cuda_invcol1
189
190 subroutine cuda_invcol2(a_d, b_d, n) &
191 bind(c, name = 'cuda_invcol2')
192 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
193 type(c_ptr), value :: a_d, b_d
194 integer(c_int) :: n
195 end subroutine cuda_invcol2
196
197 subroutine cuda_col2(a_d, b_d, n) &
198 bind(c, name = 'cuda_col2')
199 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
200 type(c_ptr), value :: a_d, b_d
201 integer(c_int) :: n
202 end subroutine cuda_col2
203
204 subroutine cuda_col3(a_d, b_d, c_d, n) &
205 bind(c, name = 'cuda_col3')
206 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
207 type(c_ptr), value :: a_d, b_d, c_d
208 integer(c_int) :: n
209 end subroutine cuda_col3
210
211 subroutine cuda_subcol3(a_d, b_d, c_d, n) &
212 bind(c, name = 'cuda_subcol3')
213 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
214 type(c_ptr), value :: a_d, b_d, c_d
215 integer(c_int) :: n
216 end subroutine cuda_subcol3
217
218 subroutine cuda_sub2(a_d, b_d, n) &
219 bind(c, name = 'cuda_sub2')
220 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
221 type(c_ptr), value :: a_d, b_d
222 integer(c_int) :: n
223 end subroutine cuda_sub2
224
225 subroutine cuda_sub3(a_d, b_d, c_d, n) &
226 bind(c, name = 'cuda_sub3')
227 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
228 type(c_ptr), value :: a_d, b_d, c_d
229 integer(c_int) :: n
230 end subroutine cuda_sub3
231
232 subroutine cuda_add3(a_d, b_d, c_d, n) &
233 bind(c, name = 'cuda_add3')
234 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
235 type(c_ptr), value :: a_d, b_d, c_d
236 integer(c_int) :: n
237 end subroutine cuda_add3
238
239 subroutine cuda_addcol3(a_d, b_d, c_d, n) &
240 bind(c, name = 'cuda_addcol3')
241 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
242 type(c_ptr), value :: a_d, b_d, c_d
243 integer(c_int) :: n
244 end subroutine cuda_addcol3
245
246 subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n) &
247 bind(c, name = 'cuda_addcol4')
248 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
249 type(c_ptr), value :: a_d, b_d, c_d, d_d
250 integer(c_int) :: n
251 end subroutine cuda_addcol4
252
253 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
254 bind(c, name = 'cuda_vdot3')
255 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
256 type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
257 integer(c_int) :: n
258 end subroutine cuda_vdot3
259
260 subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
261 w1_d, w2_d, w3_d, n) &
262 bind(c, name = 'cuda_vcross')
263 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
264 type(c_ptr), value :: u1_d, u2_d, u3_d
265 type(c_ptr), value :: v1_d, v2_d, v3_d
266 type(c_ptr), value :: w1_d, w2_d, w3_d
267 integer(c_int) :: n
268 end subroutine cuda_vcross
269
270 real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
271 bind(c, name = 'cuda_vlsc3')
272 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
273 import c_rp
274 type(c_ptr), value :: u_d, v_d, w_d
275 integer(c_int) :: n
276 end function cuda_vlsc3
277
278 subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n) &
279 bind(c, name = 'cuda_add2s2_many')
280 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
281 import c_rp
282 type(c_ptr), value :: y_d, x_d_d, a_d
283 integer(c_int) :: j, n
284 end subroutine cuda_add2s2_many
285
286 real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n) &
287 bind(c, name = 'cuda_glsc3')
288 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
289 import c_rp
290 type(c_ptr), value :: a_d, b_d, c_d
291 integer(c_int) :: n
292 end function cuda_glsc3
293
294 subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n) &
295 bind(c, name = 'cuda_glsc3_many')
296 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
297 import c_rp
298 type(c_ptr), value :: w_d, v_d_d, mult_d
299 integer(c_int) :: j, n
300 real(c_rp) :: h(j)
301 end subroutine cuda_glsc3_many
302
303 real(c_rp) function cuda_glsc2(a_d, b_d, n) &
304 bind(c, name = 'cuda_glsc2')
305 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
306 import c_rp
307 type(c_ptr), value :: a_d, b_d
308 integer(c_int) :: n
309 end function cuda_glsc2
310
311 real(c_rp) function cuda_glsum(a_d, n) &
312 bind(c, name = 'cuda_glsum')
313 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
314 import c_rp
315 type(c_ptr), value :: a_d
316 integer(c_int) :: n
317 end function cuda_glsum
318
319 subroutine cuda_absval(a_d, n) &
320 bind(c, name = 'cuda_absval')
321 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
322 import c_rp
323 type(c_ptr), value :: a_d
324 integer(c_int) :: n
325 end subroutine cuda_absval
326 end interface
327
328 ! ========================================================================== !
329 ! Interfaces for the pointwise operations.
330
331 interface
332 subroutine cuda_pwmax_vec2(a_d, b_d, n) &
333 bind(c, name = 'cuda_pwmax_vec2')
334 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
335 type(c_ptr), value :: a_d, b_d
336 integer(c_int) :: n
337 end subroutine cuda_pwmax_vec2
338
339 subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n) &
340 bind(c, name = 'cuda_pwmax_vec3')
341 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
342 type(c_ptr), value :: a_d, b_d, c_d
343 integer(c_int) :: n
344 end subroutine cuda_pwmax_vec3
345
346 subroutine cuda_pwmax_sca2(a_d, c_d, n) &
347 bind(c, name = 'cuda_pwmax_sca2')
348 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
349 import c_rp
350 type(c_ptr), value :: a_d
351 real(c_rp) :: c_d
352 integer(c_int) :: n
353 end subroutine cuda_pwmax_sca2
354
355 subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n) &
356 bind(c, name = 'cuda_pwmax_sca3')
357 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
358 import c_rp
359 type(c_ptr), value :: a_d, b_d
360 real(c_rp) :: c_d
361 integer(c_int) :: n
362 end subroutine cuda_pwmax_sca3
363
364 subroutine cuda_pwmin_vec2(a_d, b_d, n) &
365 bind(c, name = 'cuda_pwmin_vec2')
366 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
367 type(c_ptr), value :: a_d, b_d
368 integer(c_int) :: n
369 end subroutine cuda_pwmin_vec2
370
371 subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n) &
372 bind(c, name = 'cuda_pwmin_vec3')
373 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
374 type(c_ptr), value :: a_d, b_d, c_d
375 integer(c_int) :: n
376 end subroutine cuda_pwmin_vec3
377
378 subroutine cuda_pwmin_sca2(a_d, c_d, n) &
379 bind(c, name = 'cuda_pwmin_sca2')
380 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
381 import c_rp
382 type(c_ptr), value :: a_d
383 real(c_rp) :: c_d
384 integer(c_int) :: n
385 end subroutine cuda_pwmin_sca2
386
387 subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n) &
388 bind(c, name = 'cuda_pwmin_sca3')
389 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
390 import c_rp
391 type(c_ptr), value :: a_d, b_d
392 real(c_rp) :: c_d
393 integer(c_int) :: n
394 end subroutine cuda_pwmin_sca3
395
396 end interface
397end module cuda_math
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12