Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cuda_math.f90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use num_types, only : rp, c_rp
35 implicit none
36 public
37
38 interface
39 subroutine cuda_copy(a_d, b_d, n, strm) &
40 bind(c, name = 'cuda_copy')
41 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
42 type(c_ptr), value :: a_d, b_d, strm
43 integer(c_int) :: n
44 end subroutine cuda_copy
45
46 subroutine cuda_masked_copy(a_d, b_d, mask_d, n, n_mask, strm) &
47 bind(c, name = 'cuda_masked_copy')
48 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
49 type(c_ptr), value :: a_d, b_d, mask_d, strm
50 integer(c_int) :: n, n_mask
51 end subroutine cuda_masked_copy
52
53 subroutine cuda_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm) &
54 bind(c, name = 'cuda_masked_gather_copy')
55 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
56 type(c_ptr), value :: a_d, b_d, mask_d, strm
57 integer(c_int) :: n, n_mask
58 end subroutine cuda_masked_gather_copy
59
60 subroutine cuda_masked_gather_copy_aligned(a_d, b_d, mask_d, n, n_mask, &
61 strm) bind(c, name = 'cuda_masked_gather_copy_aligned')
62 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
63 type(c_ptr), value :: a_d, b_d, mask_d, strm
64 integer(c_int) :: n, n_mask
66
67 subroutine cuda_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm) &
68 bind(c, name = 'cuda_masked_scatter_copy')
69 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
70 type(c_ptr), value :: a_d, b_d, mask_d, strm
71 integer(c_int) :: n, n_mask
72 end subroutine cuda_masked_scatter_copy
73
74 subroutine cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, m, strm) &
75 bind(c, name = 'cuda_masked_atomic_reduction')
76 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
77 type(c_ptr), value :: a_d, b_d, mask_d, strm
78 integer(c_int) :: n, m
79 end subroutine cuda_masked_atomic_reduction
80
81 subroutine cuda_cfill_mask(a_d, c, n, mask_d, n_mask, strm) &
82 bind(c, name = 'cuda_cfill_mask')
83 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
84 import c_rp
85 type(c_ptr), value :: a_d, strm
86 real(c_rp) :: c
87 integer(c_int) :: n
88 type(c_ptr), value :: mask_d
89 integer(c_int) :: n_mask
90 end subroutine cuda_cfill_mask
91
92 subroutine cuda_cmult(a_d, c, n, strm) &
93 bind(c, name = 'cuda_cmult')
94 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
95 import c_rp
96 type(c_ptr), value :: a_d, strm
97 real(c_rp) :: c
98 integer(c_int) :: n
99 end subroutine cuda_cmult
100
101 subroutine cuda_cmult2(a_d, b_d, c, n, strm) &
102 bind(c, name = 'cuda_cmult2')
103 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
104 import c_rp
105 type(c_ptr), value :: a_d, b_d, strm
106 real(c_rp) :: c
107 integer(c_int) :: n
108 end subroutine cuda_cmult2
109
110 subroutine cuda_cdiv(a_d, c, n, strm) &
111 bind(c, name = 'cuda_cdiv')
112 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
113 import c_rp
114 type(c_ptr), value :: a_d, strm
115 real(c_rp) :: c
116 integer(c_int) :: n
117 end subroutine cuda_cdiv
118
119 subroutine cuda_cdiv2(a_d, b_d, c, n, strm) &
120 bind(c, name = 'cuda_cdiv2')
121 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
122 import c_rp
123 type(c_ptr), value :: a_d, b_d, strm
124 real(c_rp) :: c
125 integer(c_int) :: n
126 end subroutine cuda_cdiv2
127
128 subroutine cuda_radd(a_d, c, n, strm) &
129 bind(c, name = 'cuda_radd')
130 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
131 import c_rp
132 type(c_ptr), value :: a_d, strm
133 real(c_rp) :: c
134 integer(c_int) :: n
135 end subroutine cuda_radd
136
137 subroutine cuda_cadd2(a_d, b_d, c, n, strm) &
138 bind(c, name = 'cuda_cadd2')
139 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
140 import c_rp
141 type(c_ptr), value :: a_d
142 type(c_ptr), value :: b_d
143 type(c_ptr), value :: strm
144 real(c_rp) :: c
145 integer(c_int) :: n
146 end subroutine cuda_cadd2
147
148 subroutine cuda_cfill(a_d, c, n, strm) &
149 bind(c, name = 'cuda_cfill')
150 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
151 import c_rp
152 type(c_ptr), value :: a_d, strm
153 real(c_rp) :: c
154 integer(c_int) :: n
155 end subroutine cuda_cfill
156
157 subroutine cuda_rzero(a_d, n, strm) &
158 bind(c, name = 'cuda_rzero')
159 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
160 type(c_ptr), value :: a_d, strm
161 integer(c_int) :: n
162 end subroutine cuda_rzero
163
164 subroutine cuda_add2(a_d, b_d, n, strm) &
165 bind(c, name = 'cuda_add2')
166 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
167 import c_rp
168 type(c_ptr), value :: a_d, b_d, strm
169 integer(c_int) :: n
170 end subroutine cuda_add2
171
172 subroutine cuda_add4(a_d, b_d, c_d, d_d, n, strm) &
173 bind(c, name = 'cuda_add4')
174 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
175 import c_rp
176 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
177 integer(c_int) :: n
178 end subroutine cuda_add4
179
180 subroutine cuda_add2s1(a_d, b_d, c1, n, strm) &
181 bind(c, name = 'cuda_add2s1')
182 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
183 import c_rp
184 type(c_ptr), value :: a_d, b_d, strm
185 real(c_rp) :: c1
186 integer(c_int) :: n
187 end subroutine cuda_add2s1
188
189 subroutine cuda_add2s2(a_d, b_d, c1, n, strm) &
190 bind(c, name = 'cuda_add2s2')
191 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
192 import c_rp
193 type(c_ptr), value :: a_d, b_d, strm
194 real(c_rp) :: c1
195 integer(c_int) :: n
196 end subroutine cuda_add2s2
197
198 subroutine cuda_addsqr2s2(a_d, b_d, c1, n, strm) &
199 bind(c, name = 'cuda_addsqr2s2')
200 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
201 import c_rp
202 type(c_ptr), value :: a_d, b_d, strm
203 real(c_rp) :: c1
204 integer(c_int) :: n
205 end subroutine cuda_addsqr2s2
206
207 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n, strm) &
208 bind(c, name = 'cuda_add3s2')
209 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
210 import c_rp
211 type(c_ptr), value :: a_d, b_d, c_d, strm
212 real(c_rp) :: c1, c2
213 integer(c_int) :: n
214 end subroutine cuda_add3s2
215
216 subroutine cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm) &
217 bind(c, name = 'cuda_add4s3')
218 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
219 import c_rp
220 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
221 real(c_rp) :: c1, c2, c3
222 integer(c_int) :: n
223 end subroutine cuda_add4s3
224
225 subroutine cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm) &
226 bind(c, name = 'cuda_add5s4')
227 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
228 import c_rp
229 type(c_ptr), value :: a_d, b_d, c_d, d_d, e_d, strm
230 real(c_rp) :: c1, c2, c3, c4
231 integer(c_int) :: n
232 end subroutine cuda_add5s4
233
234 subroutine cuda_invcol1(a_d, n, strm) &
235 bind(c, name = 'cuda_invcol1')
236 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
237 type(c_ptr), value :: a_d, strm
238 integer(c_int) :: n
239 end subroutine cuda_invcol1
240
241 subroutine cuda_invcol2(a_d, b_d, n, strm) &
242 bind(c, name = 'cuda_invcol2')
243 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
244 type(c_ptr), value :: a_d, b_d, strm
245 integer(c_int) :: n
246 end subroutine cuda_invcol2
247
248 subroutine cuda_invcol3(a_d, b_d, c_d, n, strm) &
249 bind(c, name = 'cuda_invcol3')
250 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
251 type(c_ptr), value :: a_d, b_d, c_d, strm
252 integer(c_int) :: n
253 end subroutine cuda_invcol3
254
255 subroutine cuda_col2(a_d, b_d, n, strm) &
256 bind(c, name = 'cuda_col2')
257 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
258 type(c_ptr), value :: a_d, b_d, strm
259 integer(c_int) :: n
260 end subroutine cuda_col2
261
262 subroutine cuda_col3(a_d, b_d, c_d, n, strm) &
263 bind(c, name = 'cuda_col3')
264 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
265 type(c_ptr), value :: a_d, b_d, c_d, strm
266 integer(c_int) :: n
267 end subroutine cuda_col3
268
269 subroutine cuda_subcol3(a_d, b_d, c_d, n, strm) &
270 bind(c, name = 'cuda_subcol3')
271 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
272 type(c_ptr), value :: a_d, b_d, c_d, strm
273 integer(c_int) :: n
274 end subroutine cuda_subcol3
275
276 subroutine cuda_sub2(a_d, b_d, n, strm) &
277 bind(c, name = 'cuda_sub2')
278 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
279 type(c_ptr), value :: a_d, b_d, strm
280 integer(c_int) :: n
281 end subroutine cuda_sub2
282
283 subroutine cuda_sub3(a_d, b_d, c_d, n, strm) &
284 bind(c, name = 'cuda_sub3')
285 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
286 type(c_ptr), value :: a_d, b_d, c_d, strm
287 integer(c_int) :: n
288 end subroutine cuda_sub3
289
290 subroutine cuda_add3(a_d, b_d, c_d, n, strm) &
291 bind(c, name = 'cuda_add3')
292 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
293 type(c_ptr), value :: a_d, b_d, c_d, strm
294 integer(c_int) :: n
295 end subroutine cuda_add3
296
297 subroutine cuda_addcol3(a_d, b_d, c_d, n, strm) &
298 bind(c, name = 'cuda_addcol3')
299 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
300 type(c_ptr), value :: a_d, b_d, c_d, strm
301 integer(c_int) :: n
302 end subroutine cuda_addcol3
303
304 subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n, strm) &
305 bind(c, name = 'cuda_addcol4')
306 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
307 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
308 integer(c_int) :: n
309 end subroutine cuda_addcol4
310
311 subroutine cuda_addcol3s2(a_d, b_d, c_d, s, n, strm) &
312 bind(c, name = 'cuda_addcol3s2')
313 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
314 import c_rp
315 type(c_ptr), value :: a_d, b_d, c_d, strm
316 real(c_rp) :: s
317 integer(c_int) :: n
318 end subroutine cuda_addcol3s2
319
320 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm) &
321 bind(c, name = 'cuda_vdot3')
322 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
323 type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, strm
324 integer(c_int) :: n
325 end subroutine cuda_vdot3
326
327 subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
328 w1_d, w2_d, w3_d, n, strm) &
329 bind(c, name = 'cuda_vcross')
330 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
331 type(c_ptr), value :: u1_d, u2_d, u3_d
332 type(c_ptr), value :: v1_d, v2_d, v3_d
333 type(c_ptr), value :: w1_d, w2_d, w3_d
334 type(c_ptr), value :: strm
335 integer(c_int) :: n
336 end subroutine cuda_vcross
337
338 real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n, strm) &
339 bind(c, name = 'cuda_vlsc3')
340 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
341 import c_rp
342 type(c_ptr), value :: u_d, v_d, w_d, strm
343 integer(c_int) :: n
344 end function cuda_vlsc3
345
346 subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n, strm) &
347 bind(c, name = 'cuda_add2s2_many')
348 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
349 import c_rp
350 type(c_ptr), value :: y_d, x_d_d, a_d, strm
351 integer(c_int) :: j, n
352 end subroutine cuda_add2s2_many
353
354 real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n, strm) &
355 bind(c, name = 'cuda_glsc3')
356 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
357 import c_rp
358 type(c_ptr), value :: a_d, b_d, c_d, strm
359 integer(c_int) :: n
360 end function cuda_glsc3
361
362 subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm) &
363 bind(c, name = 'cuda_glsc3_many')
364 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
365 import c_rp
366 type(c_ptr), value :: w_d, v_d_d, mult_d, strm
367 integer(c_int) :: j, n
368 real(c_rp) :: h(j)
369 end subroutine cuda_glsc3_many
370
371 real(c_rp) function cuda_glsc2(a_d, b_d, n, strm) &
372 bind(c, name = 'cuda_glsc2')
373 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
374 import c_rp
375 type(c_ptr), value :: a_d, b_d, strm
376 integer(c_int) :: n
377 end function cuda_glsc2
378
379 real(c_rp) function cuda_glsubnorm2(a_d, b_d, n, strm) &
380 bind(c, name = 'cuda_glsubnorm2')
381 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
382 import c_rp
383 type(c_ptr), value :: a_d, b_d, strm
384 integer(c_int) :: n
385 end function cuda_glsubnorm2
386
387 real(c_rp) function cuda_glsum(a_d, n, strm) &
388 bind(c, name = 'cuda_glsum')
389 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
390 import c_rp
391 type(c_ptr), value :: a_d, strm
392 integer(c_int) :: n
393 end function cuda_glsum
394
395 subroutine cuda_absval(a_d, n, strm) &
396 bind(c, name = 'cuda_absval')
397 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
398 import c_rp
399 type(c_ptr), value :: a_d, strm
400 integer(c_int) :: n
401 end subroutine cuda_absval
402 end interface
403
404 ! ========================================================================== !
405 ! Interfaces for the pointwise operations.
406
407 interface
408 subroutine cuda_pwmax_vec2(a_d, b_d, n, strm) &
409 bind(c, name = 'cuda_pwmax_vec2')
410 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
411 type(c_ptr), value :: a_d, b_d, strm
412 integer(c_int) :: n
413 end subroutine cuda_pwmax_vec2
414
415 subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n, strm) &
416 bind(c, name = 'cuda_pwmax_vec3')
417 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
418 type(c_ptr), value :: a_d, b_d, c_d, strm
419 integer(c_int) :: n
420 end subroutine cuda_pwmax_vec3
421
422 subroutine cuda_pwmax_sca2(a_d, c_d, n, strm) &
423 bind(c, name = 'cuda_pwmax_sca2')
424 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
425 import c_rp
426 type(c_ptr), value :: a_d, strm
427 real(c_rp) :: c_d
428 integer(c_int) :: n
429 end subroutine cuda_pwmax_sca2
430
431 subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n, strm) &
432 bind(c, name = 'cuda_pwmax_sca3')
433 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
434 import c_rp
435 type(c_ptr), value :: a_d, b_d, strm
436 real(c_rp) :: c_d
437 integer(c_int) :: n
438 end subroutine cuda_pwmax_sca3
439
440 subroutine cuda_pwmin_vec2(a_d, b_d, n, strm) &
441 bind(c, name = 'cuda_pwmin_vec2')
442 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
443 type(c_ptr), value :: a_d, b_d, strm
444 integer(c_int) :: n
445 end subroutine cuda_pwmin_vec2
446
447 subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n, strm) &
448 bind(c, name = 'cuda_pwmin_vec3')
449 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
450 type(c_ptr), value :: a_d, b_d, c_d, strm
451 integer(c_int) :: n
452 end subroutine cuda_pwmin_vec3
453
454 subroutine cuda_pwmin_sca2(a_d, c_d, n, strm) &
455 bind(c, name = 'cuda_pwmin_sca2')
456 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
457 import c_rp
458 type(c_ptr), value :: a_d, strm
459 real(c_rp) :: c_d
460 integer(c_int) :: n
461 end subroutine cuda_pwmin_sca2
462
463 subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n, strm) &
464 bind(c, name = 'cuda_pwmin_sca3')
465 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
466 import c_rp
467 type(c_ptr), value :: a_d, b_d, strm
468 real(c_rp) :: c_d
469 integer(c_int) :: n
470 end subroutine cuda_pwmin_sca3
471
472 end interface
473
474 ! ========================================================================== !
475 ! Interfaces for integer operations.
476
477 interface
478
479 subroutine cuda_iadd(a_d, c, n, strm) &
480 bind(c, name = 'cuda_iadd')
481 use, intrinsic :: iso_c_binding, only : c_int, c_ptr
482 import c_rp
483 type(c_ptr), value :: a_d, strm
484 integer(c_int) :: c
485 integer(c_int) :: n
486 end subroutine cuda_iadd
487
488 end interface
489end module cuda_math
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12