Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cuda_math.f90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use num_types, only: rp, c_rp
35 implicit none
36 public
37
38 interface
39 subroutine cuda_copy(a_d, b_d, n, strm) &
40 bind(c, name = 'cuda_copy')
41 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
42 type(c_ptr), value :: a_d, b_d, strm
43 integer(c_int) :: n
44 end subroutine cuda_copy
45
46 subroutine cuda_masked_copy(a_d, b_d, mask_d, n, n_mask, strm) &
47 bind(c, name = 'cuda_masked_copy')
48 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
49 type(c_ptr), value :: a_d, b_d, mask_d, strm
50 integer(c_int) :: n, n_mask
51 end subroutine cuda_masked_copy
52
53 subroutine cuda_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm) &
54 bind(c, name = 'cuda_masked_gather_copy')
55 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
56 type(c_ptr), value :: a_d, b_d, mask_d, strm
57 integer(c_int) :: n, n_mask
58 end subroutine cuda_masked_gather_copy
59
60 subroutine cuda_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm) &
61 bind(c, name = 'cuda_masked_scatter_copy')
62 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
63 type(c_ptr), value :: a_d, b_d, mask_d, strm
64 integer(c_int) :: n, n_mask
65 end subroutine cuda_masked_scatter_copy
66
67 subroutine cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, m, strm) &
68 bind(c, name = 'cuda_masked_atomic_reduction')
69 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
70 type(c_ptr), value :: a_d, b_d, mask_d, strm
71 integer(c_int) :: n, m
72 end subroutine cuda_masked_atomic_reduction
73
74 subroutine cuda_cfill_mask(a_d, c, n, mask_d, n_mask, strm) &
75 bind(c, name = 'cuda_cfill_mask')
76 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
77 import c_rp
78 type(c_ptr), value :: a_d, strm
79 real(c_rp) :: c
80 integer(c_int) :: n
81 type(c_ptr), value :: mask_d
82 integer(c_int) :: n_mask
83 end subroutine cuda_cfill_mask
84
85 subroutine cuda_cmult(a_d, c, n, strm) &
86 bind(c, name = 'cuda_cmult')
87 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
88 import c_rp
89 type(c_ptr), value :: a_d, strm
90 real(c_rp) :: c
91 integer(c_int) :: n
92 end subroutine cuda_cmult
93
94 subroutine cuda_cmult2(a_d, b_d, c, n, strm) &
95 bind(c, name = 'cuda_cmult2')
96 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
97 import c_rp
98 type(c_ptr), value :: a_d, b_d, strm
99 real(c_rp) :: c
100 integer(c_int) :: n
101 end subroutine cuda_cmult2
102
103 subroutine cuda_cdiv(a_d, c, n, strm) &
104 bind(c, name = 'cuda_cdiv')
105 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
106 import c_rp
107 type(c_ptr), value :: a_d, strm
108 real(c_rp) :: c
109 integer(c_int) :: n
110 end subroutine cuda_cdiv
111
112 subroutine cuda_cdiv2(a_d, b_d, c, n, strm) &
113 bind(c, name = 'cuda_cdiv2')
114 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
115 import c_rp
116 type(c_ptr), value :: a_d, b_d, strm
117 real(c_rp) :: c
118 integer(c_int) :: n
119 end subroutine cuda_cdiv2
120
121 subroutine cuda_radd(a_d, c, n, strm) &
122 bind(c, name = 'cuda_radd')
123 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
124 import c_rp
125 type(c_ptr), value :: a_d, strm
126 real(c_rp) :: c
127 integer(c_int) :: n
128 end subroutine cuda_radd
129
130 subroutine cuda_cadd2(a_d, b_d, c, n, strm) &
131 bind(c, name = 'cuda_cadd2')
132 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
133 import c_rp
134 type(c_ptr), value :: a_d
135 type(c_ptr), value :: b_d
136 type(c_ptr), value :: strm
137 real(c_rp) :: c
138 integer(c_int) :: n
139 end subroutine cuda_cadd2
140
141 subroutine cuda_cfill(a_d, c, n, strm) &
142 bind(c, name = 'cuda_cfill')
143 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
144 import c_rp
145 type(c_ptr), value :: a_d, strm
146 real(c_rp) :: c
147 integer(c_int) :: n
148 end subroutine cuda_cfill
149
150 subroutine cuda_rzero(a_d, n, strm) &
151 bind(c, name = 'cuda_rzero')
152 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
153 type(c_ptr), value :: a_d, strm
154 integer(c_int) :: n
155 end subroutine cuda_rzero
156
157 subroutine cuda_add2(a_d, b_d, n, strm) &
158 bind(c, name = 'cuda_add2')
159 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
160 import c_rp
161 type(c_ptr), value :: a_d, b_d, strm
162 integer(c_int) :: n
163 end subroutine cuda_add2
164
165 subroutine cuda_add4(a_d, b_d, c_d, d_d, n, strm) &
166 bind(c, name = 'cuda_add4')
167 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
168 import c_rp
169 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
170 integer(c_int) :: n
171 end subroutine cuda_add4
172
173 subroutine cuda_add2s1(a_d, b_d, c1, n, strm) &
174 bind(c, name = 'cuda_add2s1')
175 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
176 import c_rp
177 type(c_ptr), value :: a_d, b_d, strm
178 real(c_rp) :: c1
179 integer(c_int) :: n
180 end subroutine cuda_add2s1
181
182 subroutine cuda_add2s2(a_d, b_d, c1, n, strm) &
183 bind(c, name = 'cuda_add2s2')
184 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
185 import c_rp
186 type(c_ptr), value :: a_d, b_d, strm
187 real(c_rp) :: c1
188 integer(c_int) :: n
189 end subroutine cuda_add2s2
190
191 subroutine cuda_addsqr2s2(a_d, b_d, c1, n, strm) &
192 bind(c, name = 'cuda_addsqr2s2')
193 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
194 import c_rp
195 type(c_ptr), value :: a_d, b_d, strm
196 real(c_rp) :: c1
197 integer(c_int) :: n
198 end subroutine cuda_addsqr2s2
199
200 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n, strm) &
201 bind(c, name = 'cuda_add3s2')
202 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
203 import c_rp
204 type(c_ptr), value :: a_d, b_d, c_d, strm
205 real(c_rp) :: c1, c2
206 integer(c_int) :: n
207 end subroutine cuda_add3s2
208
209 subroutine cuda_invcol1(a_d, n, strm) &
210 bind(c, name = 'cuda_invcol1')
211 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
212 type(c_ptr), value :: a_d, strm
213 integer(c_int) :: n
214 end subroutine cuda_invcol1
215
216 subroutine cuda_invcol2(a_d, b_d, n, strm) &
217 bind(c, name = 'cuda_invcol2')
218 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
219 type(c_ptr), value :: a_d, b_d, strm
220 integer(c_int) :: n
221 end subroutine cuda_invcol2
222
223 subroutine cuda_invcol3(a_d, b_d, c_d, n, strm) &
224 bind(c, name = 'cuda_invcol3')
225 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
226 type(c_ptr), value :: a_d, b_d, c_d, strm
227 integer(c_int) :: n
228 end subroutine cuda_invcol3
229
230 subroutine cuda_col2(a_d, b_d, n, strm) &
231 bind(c, name = 'cuda_col2')
232 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
233 type(c_ptr), value :: a_d, b_d, strm
234 integer(c_int) :: n
235 end subroutine cuda_col2
236
237 subroutine cuda_col3(a_d, b_d, c_d, n, strm) &
238 bind(c, name = 'cuda_col3')
239 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
240 type(c_ptr), value :: a_d, b_d, c_d, strm
241 integer(c_int) :: n
242 end subroutine cuda_col3
243
244 subroutine cuda_subcol3(a_d, b_d, c_d, n, strm) &
245 bind(c, name = 'cuda_subcol3')
246 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
247 type(c_ptr), value :: a_d, b_d, c_d, strm
248 integer(c_int) :: n
249 end subroutine cuda_subcol3
250
251 subroutine cuda_sub2(a_d, b_d, n, strm) &
252 bind(c, name = 'cuda_sub2')
253 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
254 type(c_ptr), value :: a_d, b_d, strm
255 integer(c_int) :: n
256 end subroutine cuda_sub2
257
258 subroutine cuda_sub3(a_d, b_d, c_d, n, strm) &
259 bind(c, name = 'cuda_sub3')
260 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
261 type(c_ptr), value :: a_d, b_d, c_d, strm
262 integer(c_int) :: n
263 end subroutine cuda_sub3
264
265 subroutine cuda_add3(a_d, b_d, c_d, n, strm) &
266 bind(c, name = 'cuda_add3')
267 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
268 type(c_ptr), value :: a_d, b_d, c_d, strm
269 integer(c_int) :: n
270 end subroutine cuda_add3
271
272 subroutine cuda_addcol3(a_d, b_d, c_d, n, strm) &
273 bind(c, name = 'cuda_addcol3')
274 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
275 type(c_ptr), value :: a_d, b_d, c_d, strm
276 integer(c_int) :: n
277 end subroutine cuda_addcol3
278
279 subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n, strm) &
280 bind(c, name = 'cuda_addcol4')
281 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
282 type(c_ptr), value :: a_d, b_d, c_d, d_d, strm
283 integer(c_int) :: n
284 end subroutine cuda_addcol4
285
286 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm) &
287 bind(c, name = 'cuda_vdot3')
288 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
289 type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, strm
290 integer(c_int) :: n
291 end subroutine cuda_vdot3
292
293 subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
294 w1_d, w2_d, w3_d, n, strm) &
295 bind(c, name = 'cuda_vcross')
296 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
297 type(c_ptr), value :: u1_d, u2_d, u3_d
298 type(c_ptr), value :: v1_d, v2_d, v3_d
299 type(c_ptr), value :: w1_d, w2_d, w3_d
300 type(c_ptr), value :: strm
301 integer(c_int) :: n
302 end subroutine cuda_vcross
303
304 real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n, strm) &
305 bind(c, name = 'cuda_vlsc3')
306 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
307 import c_rp
308 type(c_ptr), value :: u_d, v_d, w_d, strm
309 integer(c_int) :: n
310 end function cuda_vlsc3
311
312 subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n, strm) &
313 bind(c, name = 'cuda_add2s2_many')
314 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
315 import c_rp
316 type(c_ptr), value :: y_d, x_d_d, a_d, strm
317 integer(c_int) :: j, n
318 end subroutine cuda_add2s2_many
319
320 real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n, strm) &
321 bind(c, name = 'cuda_glsc3')
322 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
323 import c_rp
324 type(c_ptr), value :: a_d, b_d, c_d, strm
325 integer(c_int) :: n
326 end function cuda_glsc3
327
328 subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm) &
329 bind(c, name = 'cuda_glsc3_many')
330 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
331 import c_rp
332 type(c_ptr), value :: w_d, v_d_d, mult_d, strm
333 integer(c_int) :: j, n
334 real(c_rp) :: h(j)
335 end subroutine cuda_glsc3_many
336
337 real(c_rp) function cuda_glsc2(a_d, b_d, n, strm) &
338 bind(c, name = 'cuda_glsc2')
339 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
340 import c_rp
341 type(c_ptr), value :: a_d, b_d, strm
342 integer(c_int) :: n
343 end function cuda_glsc2
344
345 real(c_rp) function cuda_glsubnorm2(a_d, b_d, n, strm) &
346 bind(c, name = 'cuda_glsubnorm2')
347 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
348 import c_rp
349 type(c_ptr), value :: a_d, b_d, strm
350 integer(c_int) :: n
351 end function cuda_glsubnorm2
352
353 real(c_rp) function cuda_glsum(a_d, n, strm) &
354 bind(c, name = 'cuda_glsum')
355 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
356 import c_rp
357 type(c_ptr), value :: a_d, strm
358 integer(c_int) :: n
359 end function cuda_glsum
360
361 subroutine cuda_absval(a_d, n, strm) &
362 bind(c, name = 'cuda_absval')
363 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
364 import c_rp
365 type(c_ptr), value :: a_d, strm
366 integer(c_int) :: n
367 end subroutine cuda_absval
368 end interface
369
370 ! ========================================================================== !
371 ! Interfaces for the pointwise operations.
372
373 interface
374 subroutine cuda_pwmax_vec2(a_d, b_d, n, strm) &
375 bind(c, name = 'cuda_pwmax_vec2')
376 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
377 type(c_ptr), value :: a_d, b_d, strm
378 integer(c_int) :: n
379 end subroutine cuda_pwmax_vec2
380
381 subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n, strm) &
382 bind(c, name = 'cuda_pwmax_vec3')
383 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
384 type(c_ptr), value :: a_d, b_d, c_d, strm
385 integer(c_int) :: n
386 end subroutine cuda_pwmax_vec3
387
388 subroutine cuda_pwmax_sca2(a_d, c_d, n, strm) &
389 bind(c, name = 'cuda_pwmax_sca2')
390 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
391 import c_rp
392 type(c_ptr), value :: a_d, strm
393 real(c_rp) :: c_d
394 integer(c_int) :: n
395 end subroutine cuda_pwmax_sca2
396
397 subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n, strm) &
398 bind(c, name = 'cuda_pwmax_sca3')
399 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
400 import c_rp
401 type(c_ptr), value :: a_d, b_d, strm
402 real(c_rp) :: c_d
403 integer(c_int) :: n
404 end subroutine cuda_pwmax_sca3
405
406 subroutine cuda_pwmin_vec2(a_d, b_d, n, strm) &
407 bind(c, name = 'cuda_pwmin_vec2')
408 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
409 type(c_ptr), value :: a_d, b_d, strm
410 integer(c_int) :: n
411 end subroutine cuda_pwmin_vec2
412
413 subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n, strm) &
414 bind(c, name = 'cuda_pwmin_vec3')
415 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
416 type(c_ptr), value :: a_d, b_d, c_d, strm
417 integer(c_int) :: n
418 end subroutine cuda_pwmin_vec3
419
420 subroutine cuda_pwmin_sca2(a_d, c_d, n, strm) &
421 bind(c, name = 'cuda_pwmin_sca2')
422 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
423 import c_rp
424 type(c_ptr), value :: a_d, strm
425 real(c_rp) :: c_d
426 integer(c_int) :: n
427 end subroutine cuda_pwmin_sca2
428
429 subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n, strm) &
430 bind(c, name = 'cuda_pwmin_sca3')
431 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
432 import c_rp
433 type(c_ptr), value :: a_d, b_d, strm
434 real(c_rp) :: c_d
435 integer(c_int) :: n
436 end subroutine cuda_pwmin_sca3
437
438 end interface
439
440 ! ========================================================================== !
441 ! Interfaces for integer operations.
442
443 interface
444
445 subroutine cuda_iadd(a_d, c, n, strm) &
446 bind(c, name = 'cuda_iadd')
447 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
448 import c_rp
449 type(c_ptr), value :: a_d, strm
450 integer(c_int) :: c
451 integer(c_int) :: n
452 end subroutine cuda_iadd
453
454 end interface
455end module cuda_math
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12