Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cuda_math.f90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use num_types, only: rp, c_rp
35 implicit none
36 public
37
38 interface
39 subroutine cuda_copy(a_d, b_d, n, strm) &
40 bind(c, name = 'cuda_copy')
41 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
42 type(c_ptr), value :: a_d, b_d, strm
43 integer(c_int) :: n
44 end subroutine cuda_copy
45
46 subroutine cuda_masked_copy(a_d, b_d, mask_d, n, n_mask, strm) &
47 bind(c, name = 'cuda_masked_copy')
48 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
49 type(c_ptr), value :: a_d, b_d, mask_d, strm
50 integer(c_int) :: n, n_mask
51 end subroutine cuda_masked_copy
52
53 subroutine cuda_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm) &
54 bind(c, name = 'cuda_masked_gather_copy')
55 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
56 type(c_ptr), value :: a_d, b_d, mask_d, strm
57 integer(c_int) :: n, n_mask
58 end subroutine cuda_masked_gather_copy
59
60 subroutine cuda_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm) &
61 bind(c, name = 'cuda_masked_scatter_copy')
62 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
63 type(c_ptr), value :: a_d, b_d, mask_d, strm
64 integer(c_int) :: n, n_mask
65 end subroutine cuda_masked_scatter_copy
66
67 subroutine cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, m, strm) &
68 bind(c, name = 'cuda_masked_atomic_reduction')
69 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
70 type(c_ptr), value :: a_d, b_d, mask_d, strm
71 integer(c_int) :: n, m
72 end subroutine cuda_masked_atomic_reduction
73
74 subroutine cuda_cfill_mask(a_d, c, n, mask_d, n_mask, strm) &
75 bind(c, name = 'cuda_cfill_mask')
76 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
77 import c_rp
78 type(c_ptr), value :: a_d, strm
79 real(c_rp) :: c
80 integer(c_int) :: n
81 type(c_ptr), value :: mask_d
82 integer(c_int) :: n_mask
83 end subroutine cuda_cfill_mask
84
85 subroutine cuda_cmult(a_d, c, n, strm) &
86 bind(c, name = 'cuda_cmult')
87 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
88 import c_rp
89 type(c_ptr), value :: a_d, strm
90 real(c_rp) :: c
91 integer(c_int) :: n
92 end subroutine cuda_cmult
93
94 subroutine cuda_cmult2(a_d, b_d, c, n, strm) &
95 bind(c, name = 'cuda_cmult2')
96 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
97 import c_rp
98 type(c_ptr), value :: a_d, b_d, strm
99 real(c_rp) :: c
100 integer(c_int) :: n
101 end subroutine cuda_cmult2
102
103 subroutine cuda_cdiv(a_d, c, n, strm) &
104 bind(c, name = 'cuda_cdiv')
105 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
106 import c_rp
107 type(c_ptr), value :: a_d, strm
108 real(c_rp) :: c
109 integer(c_int) :: n
110 end subroutine cuda_cdiv
111
112 subroutine cuda_cdiv2(a_d, b_d, c, n, strm) &
113 bind(c, name = 'cuda_cdiv2')
114 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
115 import c_rp
116 type(c_ptr), value :: a_d, b_d, strm
117 real(c_rp) :: c
118 integer(c_int) :: n
119 end subroutine cuda_cdiv2
120
121 subroutine cuda_radd(a_d, c, n, strm) &
122 bind(c, name = 'cuda_radd')
123 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
124 import c_rp
125 type(c_ptr), value :: a_d, strm
126 real(c_rp) :: c
127 integer(c_int) :: n
128 end subroutine cuda_radd
129
130 subroutine cuda_cadd2(a_d, b_d, c, n, strm) &
131 bind(c, name = 'cuda_cadd2')
132 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
133 import c_rp
134 type(c_ptr), value :: a_d
135 type(c_ptr), value :: b_d
136 type(c_ptr), value :: strm
137 real(c_rp) :: c
138 integer(c_int) :: n
139 end subroutine cuda_cadd2
140
141 subroutine cuda_cfill(a_d, c, n, strm) &
142 bind(c, name = 'cuda_cfill')
143 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
144 import c_rp
145 type(c_ptr), value :: a_d, strm
146 real(c_rp) :: c
147 integer(c_int) :: n
148 end subroutine cuda_cfill
149
150 subroutine cuda_rzero(a_d, n, strm) &
151 bind(c, name = 'cuda_rzero')
152 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
153 type(c_ptr), value :: a_d, strm
154 integer(c_int) :: n
155 end subroutine cuda_rzero
156
157 subroutine cuda_add2(a_d, b_d, n, strm) &
158 bind(c, name = 'cuda_add2')
159 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
160 import c_rp
161 type(c_ptr), value :: a_d, b_d, strm
162 integer(c_int) :: n
163 end subroutine cuda_add2
164
165 subroutine cuda_add4(a_d, b_d, c_d, d_d, n, strm) &
166 bind(c, name = 'cuda_add4')
167 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
168 import c_rp
169 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
170 integer(c_int) :: n
171 end subroutine cuda_add4
172
173 subroutine cuda_add2s1(a_d, b_d, c1, n, strm) &
174 bind(c, name = 'cuda_add2s1')
175 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
176 import c_rp
177 type(c_ptr), value :: a_d, b_d, strm
178 real(c_rp) :: c1
179 integer(c_int) :: n
180 end subroutine cuda_add2s1
181
182 subroutine cuda_add2s2(a_d, b_d, c1, n, strm) &
183 bind(c, name = 'cuda_add2s2')
184 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
185 import c_rp
186 type(c_ptr), value :: a_d, b_d, strm
187 real(c_rp) :: c1
188 integer(c_int) :: n
189 end subroutine cuda_add2s2
190
191 subroutine cuda_addsqr2s2(a_d, b_d, c1, n, strm) &
192 bind(c, name = 'cuda_addsqr2s2')
193 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
194 import c_rp
195 type(c_ptr), value :: a_d, b_d, strm
196 real(c_rp) :: c1
197 integer(c_int) :: n
198 end subroutine cuda_addsqr2s2
199
200 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n, strm) &
201 bind(c, name = 'cuda_add3s2')
202 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
203 import c_rp
204 type(c_ptr), value :: a_d, b_d, c_d, strm
205 real(c_rp) :: c1, c2
206 integer(c_int) :: n
207 end subroutine cuda_add3s2
208
209 subroutine cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm) &
210 bind(c, name = 'cuda_add4s3')
211 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
212 import c_rp
213 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
214 real(c_rp) :: c1, c2, c3
215 integer(c_int) :: n
216 end subroutine cuda_add4s3
217
218 subroutine cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm) &
219 bind(c, name = 'cuda_add5s4')
220 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
221 import c_rp
222 type(c_ptr), value :: a_d, b_d, c_d, d_d, e_d, strm
223 real(c_rp) :: c1, c2, c3, c4
224 integer(c_int) :: n
225 end subroutine cuda_add5s4
226
227 subroutine cuda_invcol1(a_d, n, strm) &
228 bind(c, name = 'cuda_invcol1')
229 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
230 type(c_ptr), value :: a_d, strm
231 integer(c_int) :: n
232 end subroutine cuda_invcol1
233
234 subroutine cuda_invcol2(a_d, b_d, n, strm) &
235 bind(c, name = 'cuda_invcol2')
236 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
237 type(c_ptr), value :: a_d, b_d, strm
238 integer(c_int) :: n
239 end subroutine cuda_invcol2
240
241 subroutine cuda_invcol3(a_d, b_d, c_d, n, strm) &
242 bind(c, name = 'cuda_invcol3')
243 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
244 type(c_ptr), value :: a_d, b_d, c_d, strm
245 integer(c_int) :: n
246 end subroutine cuda_invcol3
247
248 subroutine cuda_col2(a_d, b_d, n, strm) &
249 bind(c, name = 'cuda_col2')
250 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
251 type(c_ptr), value :: a_d, b_d, strm
252 integer(c_int) :: n
253 end subroutine cuda_col2
254
255 subroutine cuda_col3(a_d, b_d, c_d, n, strm) &
256 bind(c, name = 'cuda_col3')
257 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
258 type(c_ptr), value :: a_d, b_d, c_d, strm
259 integer(c_int) :: n
260 end subroutine cuda_col3
261
262 subroutine cuda_subcol3(a_d, b_d, c_d, n, strm) &
263 bind(c, name = 'cuda_subcol3')
264 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
265 type(c_ptr), value :: a_d, b_d, c_d, strm
266 integer(c_int) :: n
267 end subroutine cuda_subcol3
268
269 subroutine cuda_sub2(a_d, b_d, n, strm) &
270 bind(c, name = 'cuda_sub2')
271 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
272 type(c_ptr), value :: a_d, b_d, strm
273 integer(c_int) :: n
274 end subroutine cuda_sub2
275
276 subroutine cuda_sub3(a_d, b_d, c_d, n, strm) &
277 bind(c, name = 'cuda_sub3')
278 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
279 type(c_ptr), value :: a_d, b_d, c_d, strm
280 integer(c_int) :: n
281 end subroutine cuda_sub3
282
283 subroutine cuda_add3(a_d, b_d, c_d, n, strm) &
284 bind(c, name = 'cuda_add3')
285 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
286 type(c_ptr), value :: a_d, b_d, c_d, strm
287 integer(c_int) :: n
288 end subroutine cuda_add3
289
290 subroutine cuda_addcol3(a_d, b_d, c_d, n, strm) &
291 bind(c, name = 'cuda_addcol3')
292 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
293 type(c_ptr), value :: a_d, b_d, c_d, strm
294 integer(c_int) :: n
295 end subroutine cuda_addcol3
296
297 subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n, strm) &
298 bind(c, name = 'cuda_addcol4')
299 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
300 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
301 integer(c_int) :: n
302 end subroutine cuda_addcol4
303
304 subroutine cuda_addcol3s2(a_d, b_d, c_d, s, n, strm) &
305 bind(c, name = 'cuda_addcol3s2')
306 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
307 import c_rp
308 type(c_ptr), value :: a_d, b_d, c_d, strm
309 real(c_rp) :: s
310 integer(c_int) :: n
311 end subroutine cuda_addcol3s2
312
313 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm) &
314 bind(c, name = 'cuda_vdot3')
315 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
316 type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, strm
317 integer(c_int) :: n
318 end subroutine cuda_vdot3
319
320 subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
321 w1_d, w2_d, w3_d, n, strm) &
322 bind(c, name = 'cuda_vcross')
323 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
324 type(c_ptr), value :: u1_d, u2_d, u3_d
325 type(c_ptr), value :: v1_d, v2_d, v3_d
326 type(c_ptr), value :: w1_d, w2_d, w3_d
327 type(c_ptr), value :: strm
328 integer(c_int) :: n
329 end subroutine cuda_vcross
330
331 real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n, strm) &
332 bind(c, name = 'cuda_vlsc3')
333 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
334 import c_rp
335 type(c_ptr), value :: u_d, v_d, w_d, strm
336 integer(c_int) :: n
337 end function cuda_vlsc3
338
339 subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n, strm) &
340 bind(c, name = 'cuda_add2s2_many')
341 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
342 import c_rp
343 type(c_ptr), value :: y_d, x_d_d, a_d, strm
344 integer(c_int) :: j, n
345 end subroutine cuda_add2s2_many
346
347 real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n, strm) &
348 bind(c, name = 'cuda_glsc3')
349 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
350 import c_rp
351 type(c_ptr), value :: a_d, b_d, c_d, strm
352 integer(c_int) :: n
353 end function cuda_glsc3
354
355 subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm) &
356 bind(c, name = 'cuda_glsc3_many')
357 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
358 import c_rp
359 type(c_ptr), value :: w_d, v_d_d, mult_d, strm
360 integer(c_int) :: j, n
361 real(c_rp) :: h(j)
362 end subroutine cuda_glsc3_many
363
364 real(c_rp) function cuda_glsc2(a_d, b_d, n, strm) &
365 bind(c, name = 'cuda_glsc2')
366 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
367 import c_rp
368 type(c_ptr), value :: a_d, b_d, strm
369 integer(c_int) :: n
370 end function cuda_glsc2
371
372 real(c_rp) function cuda_glsubnorm2(a_d, b_d, n, strm) &
373 bind(c, name = 'cuda_glsubnorm2')
374 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
375 import c_rp
376 type(c_ptr), value :: a_d, b_d, strm
377 integer(c_int) :: n
378 end function cuda_glsubnorm2
379
380 real(c_rp) function cuda_glsum(a_d, n, strm) &
381 bind(c, name = 'cuda_glsum')
382 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
383 import c_rp
384 type(c_ptr), value :: a_d, strm
385 integer(c_int) :: n
386 end function cuda_glsum
387
388 subroutine cuda_absval(a_d, n, strm) &
389 bind(c, name = 'cuda_absval')
390 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
391 import c_rp
392 type(c_ptr), value :: a_d, strm
393 integer(c_int) :: n
394 end subroutine cuda_absval
395 end interface
396
397 ! ========================================================================== !
398 ! Interfaces for the pointwise operations.
399
400 interface
401 subroutine cuda_pwmax_vec2(a_d, b_d, n, strm) &
402 bind(c, name = 'cuda_pwmax_vec2')
403 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
404 type(c_ptr), value :: a_d, b_d, strm
405 integer(c_int) :: n
406 end subroutine cuda_pwmax_vec2
407
408 subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n, strm) &
409 bind(c, name = 'cuda_pwmax_vec3')
410 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
411 type(c_ptr), value :: a_d, b_d, c_d, strm
412 integer(c_int) :: n
413 end subroutine cuda_pwmax_vec3
414
415 subroutine cuda_pwmax_sca2(a_d, c_d, n, strm) &
416 bind(c, name = 'cuda_pwmax_sca2')
417 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
418 import c_rp
419 type(c_ptr), value :: a_d, strm
420 real(c_rp) :: c_d
421 integer(c_int) :: n
422 end subroutine cuda_pwmax_sca2
423
424 subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n, strm) &
425 bind(c, name = 'cuda_pwmax_sca3')
426 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
427 import c_rp
428 type(c_ptr), value :: a_d, b_d, strm
429 real(c_rp) :: c_d
430 integer(c_int) :: n
431 end subroutine cuda_pwmax_sca3
432
433 subroutine cuda_pwmin_vec2(a_d, b_d, n, strm) &
434 bind(c, name = 'cuda_pwmin_vec2')
435 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
436 type(c_ptr), value :: a_d, b_d, strm
437 integer(c_int) :: n
438 end subroutine cuda_pwmin_vec2
439
440 subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n, strm) &
441 bind(c, name = 'cuda_pwmin_vec3')
442 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
443 type(c_ptr), value :: a_d, b_d, c_d, strm
444 integer(c_int) :: n
445 end subroutine cuda_pwmin_vec3
446
447 subroutine cuda_pwmin_sca2(a_d, c_d, n, strm) &
448 bind(c, name = 'cuda_pwmin_sca2')
449 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
450 import c_rp
451 type(c_ptr), value :: a_d, strm
452 real(c_rp) :: c_d
453 integer(c_int) :: n
454 end subroutine cuda_pwmin_sca2
455
456 subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n, strm) &
457 bind(c, name = 'cuda_pwmin_sca3')
458 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
459 import c_rp
460 type(c_ptr), value :: a_d, b_d, strm
461 real(c_rp) :: c_d
462 integer(c_int) :: n
463 end subroutine cuda_pwmin_sca3
464
465 end interface
466
467 ! ========================================================================== !
468 ! Interfaces for integer operations.
469
470 interface
471
472 subroutine cuda_iadd(a_d, c, n, strm) &
473 bind(c, name = 'cuda_iadd')
474 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
475 import c_rp
476 type(c_ptr), value :: a_d, strm
477 integer(c_int) :: c
478 integer(c_int) :: n
479 end subroutine cuda_iadd
480
481 end interface
482end module cuda_math
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12