Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cuda_math.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use num_types, only: rp, c_rp
35 implicit none
36 public
37
38 interface
39 subroutine cuda_copy(a_d, b_d, n) &
40 bind(c, name = 'cuda_copy')
41 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
42 type(c_ptr), value :: a_d, b_d
43 integer(c_int) :: n
44 end subroutine cuda_copy
45
46 subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
47 bind(c, name = 'cuda_masked_copy')
48 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
49 type(c_ptr), value :: a_d, b_d, mask_d
50 integer(c_int) :: n, m
51 end subroutine cuda_masked_copy
52
53 subroutine cuda_masked_red_copy(a_d, b_d, mask_d, n, m) &
54 bind(c, name = 'cuda_masked_red_copy')
55 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
56 type(c_ptr), value :: a_d, b_d, mask_d
57 integer(c_int) :: n, m
58 end subroutine cuda_masked_red_copy
59
60 subroutine cuda_cfill_mask(a_d, c, size, mask_d, mask_size) &
61 bind(c, name = 'cuda_cfill_mask')
62 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
63 import c_rp
64 type(c_ptr), value :: a_d
65 real(c_rp) :: c
66 integer(c_int) :: size
67 type(c_ptr), value :: mask_d
68 integer(c_int) :: mask_size
69 end subroutine cuda_cfill_mask
70
71 subroutine cuda_cmult(a_d, c, n) &
72 bind(c, name = 'cuda_cmult')
73 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
74 import c_rp
75 type(c_ptr), value :: a_d
76 real(c_rp) :: c
77 integer(c_int) :: n
78 end subroutine cuda_cmult
79
80 subroutine cuda_cmult2(a_d, b_d, c, n) &
81 bind(c, name = 'cuda_cmult2')
82 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
83 import c_rp
84 type(c_ptr), value :: a_d, b_d
85 real(c_rp) :: c
86 integer(c_int) :: n
87 end subroutine cuda_cmult2
88
89 subroutine cuda_cadd(a_d, c, n) &
90 bind(c, name = 'cuda_cadd')
91 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
92 import c_rp
93 type(c_ptr), value :: a_d
94 real(c_rp) :: c
95 integer(c_int) :: n
96 end subroutine cuda_cadd
97
98 subroutine cuda_cadd2(a_d, b_d, c, n) &
99 bind(c, name = 'cuda_cadd2')
100 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
101 import c_rp
102 type(c_ptr), value :: a_d
103 type(c_ptr), value :: b_d
104 real(c_rp) :: c
105 integer(c_int) :: n
106 end subroutine cuda_cadd2
107
108 subroutine cuda_cfill(a_d, c, n) &
109 bind(c, name = 'cuda_cfill')
110 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
111 import c_rp
112 type(c_ptr), value :: a_d
113 real(c_rp) :: c
114 integer(c_int) :: n
115 end subroutine cuda_cfill
116
117 subroutine cuda_rzero(a_d, n) &
118 bind(c, name = 'cuda_rzero')
119 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
120 type(c_ptr), value :: a_d
121 integer(c_int) :: n
122 end subroutine cuda_rzero
123
124 subroutine cuda_add2(a_d, b_d, n) &
125 bind(c, name = 'cuda_add2')
126 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
127 import c_rp
128 type(c_ptr), value :: a_d, b_d
129 integer(c_int) :: n
130 end subroutine cuda_add2
131
132 subroutine cuda_add4(a_d, b_d, c_d, d_d, n) &
133 bind(c, name = 'cuda_add4')
134 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
135 import c_rp
136 type(c_ptr), value :: a_d, b_d, c_d, d_d
137 integer(c_int) :: n
138 end subroutine cuda_add4
139
140 subroutine cuda_add2s1(a_d, b_d, c1, n) &
141 bind(c, name = 'cuda_add2s1')
142 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
143 import c_rp
144 type(c_ptr), value :: a_d, b_d
145 real(c_rp) :: c1
146 integer(c_int) :: n
147 end subroutine cuda_add2s1
148
149 subroutine cuda_add2s2(a_d, b_d, c1, n) &
150 bind(c, name = 'cuda_add2s2')
151 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
152 import c_rp
153 type(c_ptr), value :: a_d, b_d
154 real(c_rp) :: c1
155 integer(c_int) :: n
156 end subroutine cuda_add2s2
157
158 subroutine cuda_addsqr2s2(a_d, b_d, c1, n) &
159 bind(c, name = 'cuda_addsqr2s2')
160 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
161 import c_rp
162 type(c_ptr), value :: a_d, b_d
163 real(c_rp) :: c1
164 integer(c_int) :: n
165 end subroutine cuda_addsqr2s2
166
167 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
168 bind(c, name = 'cuda_add3s2')
169 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
170 import c_rp
171 type(c_ptr), value :: a_d, b_d, c_d
172 real(c_rp) :: c1, c2
173 integer(c_int) :: n
174 end subroutine cuda_add3s2
175
176 subroutine cuda_invcol1(a_d, n) &
177 bind(c, name = 'cuda_invcol1')
178 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
179 type(c_ptr), value :: a_d
180 integer(c_int) :: n
181 end subroutine cuda_invcol1
182
183 subroutine cuda_invcol2(a_d, b_d, n) &
184 bind(c, name = 'cuda_invcol2')
185 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
186 type(c_ptr), value :: a_d, b_d
187 integer(c_int) :: n
188 end subroutine cuda_invcol2
189
190 subroutine cuda_col2(a_d, b_d, n) &
191 bind(c, name = 'cuda_col2')
192 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
193 type(c_ptr), value :: a_d, b_d
194 integer(c_int) :: n
195 end subroutine cuda_col2
196
197 subroutine cuda_col3(a_d, b_d, c_d, n) &
198 bind(c, name = 'cuda_col3')
199 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
200 type(c_ptr), value :: a_d, b_d, c_d
201 integer(c_int) :: n
202 end subroutine cuda_col3
203
204 subroutine cuda_subcol3(a_d, b_d, c_d, n) &
205 bind(c, name = 'cuda_subcol3')
206 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
207 type(c_ptr), value :: a_d, b_d, c_d
208 integer(c_int) :: n
209 end subroutine cuda_subcol3
210
211 subroutine cuda_sub2(a_d, b_d, n) &
212 bind(c, name = 'cuda_sub2')
213 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
214 type(c_ptr), value :: a_d, b_d
215 integer(c_int) :: n
216 end subroutine cuda_sub2
217
218 subroutine cuda_sub3(a_d, b_d, c_d, n) &
219 bind(c, name = 'cuda_sub3')
220 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
221 type(c_ptr), value :: a_d, b_d, c_d
222 integer(c_int) :: n
223 end subroutine cuda_sub3
224
225 subroutine cuda_add3(a_d, b_d, c_d, n) &
226 bind(c, name = 'cuda_add3')
227 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
228 type(c_ptr), value :: a_d, b_d, c_d
229 integer(c_int) :: n
230 end subroutine cuda_add3
231
232 subroutine cuda_addcol3(a_d, b_d, c_d, n) &
233 bind(c, name = 'cuda_addcol3')
234 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
235 type(c_ptr), value :: a_d, b_d, c_d
236 integer(c_int) :: n
237 end subroutine cuda_addcol3
238
239 subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n) &
240 bind(c, name = 'cuda_addcol4')
241 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
242 type(c_ptr), value :: a_d, b_d, c_d, d_d
243 integer(c_int) :: n
244 end subroutine cuda_addcol4
245
246 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
247 bind(c, name = 'cuda_vdot3')
248 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
249 type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
250 integer(c_int) :: n
251 end subroutine cuda_vdot3
252
253 subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
254 w1_d, w2_d, w3_d, n) &
255 bind(c, name = 'cuda_vcross')
256 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
257 type(c_ptr), value :: u1_d, u2_d, u3_d
258 type(c_ptr), value :: v1_d, v2_d, v3_d
259 type(c_ptr), value :: w1_d, w2_d, w3_d
260 integer(c_int) :: n
261 end subroutine cuda_vcross
262
263 real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
264 bind(c, name = 'cuda_vlsc3')
265 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
266 import c_rp
267 type(c_ptr), value :: u_d, v_d, w_d
268 integer(c_int) :: n
269 end function cuda_vlsc3
270
271 subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n) &
272 bind(c, name = 'cuda_add2s2_many')
273 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
274 import c_rp
275 type(c_ptr), value :: y_d, x_d_d, a_d
276 integer(c_int) :: j, n
277 end subroutine cuda_add2s2_many
278
279 real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n) &
280 bind(c, name = 'cuda_glsc3')
281 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
282 import c_rp
283 type(c_ptr), value :: a_d, b_d, c_d
284 integer(c_int) :: n
285 end function cuda_glsc3
286
287 subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n) &
288 bind(c, name = 'cuda_glsc3_many')
289 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
290 import c_rp
291 type(c_ptr), value :: w_d, v_d_d, mult_d
292 integer(c_int) :: j, n
293 real(c_rp) :: h(j)
294 end subroutine cuda_glsc3_many
295
296 real(c_rp) function cuda_glsc2(a_d, b_d, n) &
297 bind(c, name = 'cuda_glsc2')
298 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
299 import c_rp
300 type(c_ptr), value :: a_d, b_d
301 integer(c_int) :: n
302 end function cuda_glsc2
303
304 real(c_rp) function cuda_glsum(a_d, n) &
305 bind(c, name = 'cuda_glsum')
306 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
307 import c_rp
308 type(c_ptr), value :: a_d
309 integer(c_int) :: n
310 end function cuda_glsum
311
312 subroutine cuda_absval(a_d, n) &
313 bind(c, name = 'cuda_absval')
314 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
315 import c_rp
316 type(c_ptr), value :: a_d
317 integer(c_int) :: n
318 end subroutine cuda_absval
319 end interface
320
321 ! ========================================================================== !
322 ! Interfaces for the pointwise operations.
323
324 interface
325 subroutine cuda_pwmax_vec2(a_d, b_d, n) &
326 bind(c, name = 'cuda_pwmax_vec2')
327 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
328 type(c_ptr), value :: a_d, b_d
329 integer(c_int) :: n
330 end subroutine cuda_pwmax_vec2
331
332 subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n) &
333 bind(c, name = 'cuda_pwmax_vec3')
334 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
335 type(c_ptr), value :: a_d, b_d, c_d
336 integer(c_int) :: n
337 end subroutine cuda_pwmax_vec3
338
339 subroutine cuda_pwmax_sca2(a_d, c_d, n) &
340 bind(c, name = 'cuda_pwmax_sca2')
341 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
342 import c_rp
343 type(c_ptr), value :: a_d
344 real(c_rp) :: c_d
345 integer(c_int) :: n
346 end subroutine cuda_pwmax_sca2
347
348 subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n) &
349 bind(c, name = 'cuda_pwmax_sca3')
350 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
351 import c_rp
352 type(c_ptr), value :: a_d, b_d
353 real(c_rp) :: c_d
354 integer(c_int) :: n
355 end subroutine cuda_pwmax_sca3
356
357 subroutine cuda_pwmin_vec2(a_d, b_d, n) &
358 bind(c, name = 'cuda_pwmin_vec2')
359 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
360 type(c_ptr), value :: a_d, b_d
361 integer(c_int) :: n
362 end subroutine cuda_pwmin_vec2
363
364 subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n) &
365 bind(c, name = 'cuda_pwmin_vec3')
366 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
367 type(c_ptr), value :: a_d, b_d, c_d
368 integer(c_int) :: n
369 end subroutine cuda_pwmin_vec3
370
371 subroutine cuda_pwmin_sca2(a_d, c_d, n) &
372 bind(c, name = 'cuda_pwmin_sca2')
373 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
374 import c_rp
375 type(c_ptr), value :: a_d
376 real(c_rp) :: c_d
377 integer(c_int) :: n
378 end subroutine cuda_pwmin_sca2
379
380 subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n) &
381 bind(c, name = 'cuda_pwmin_sca3')
382 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
383 import c_rp
384 type(c_ptr), value :: a_d, b_d
385 real(c_rp) :: c_d
386 integer(c_int) :: n
387 end subroutine cuda_pwmin_sca3
388
389 end interface
390end module cuda_math
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12