Neko  0.9.99
A portable framework for high-order spectral element flow simulations
cuda_math.f90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
33 module cuda_math
34  use num_types, only: rp, c_rp
35  implicit none
36  public
37 
38  interface
39  subroutine cuda_copy(a_d, b_d, n) &
40  bind(c, name = 'cuda_copy')
41  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
42  type(c_ptr), value :: a_d, b_d
43  integer(c_int) :: n
44  end subroutine cuda_copy
45 
46  subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
47  bind(c, name = 'cuda_masked_copy')
48  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
49  type(c_ptr), value :: a_d, b_d, mask_d
50  integer(c_int) :: n, m
51  end subroutine cuda_masked_copy
52 
53  subroutine cuda_masked_red_copy(a_d, b_d, mask_d, n, m) &
54  bind(c, name = 'cuda_masked_red_copy')
55  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
56  type(c_ptr), value :: a_d, b_d, mask_d
57  integer(c_int) :: n, m
58  end subroutine cuda_masked_red_copy
59 
60  subroutine cuda_cfill_mask(a_d, c, size, mask_d, mask_size) &
61  bind(c, name = 'cuda_cfill_mask')
62  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
63  import c_rp
64  type(c_ptr), value :: a_d
65  real(c_rp) :: c
66  integer(c_int) :: size
67  type(c_ptr), value :: mask_d
68  integer(c_int) :: mask_size
69  end subroutine cuda_cfill_mask
70 
71  subroutine cuda_cmult(a_d, c, n) &
72  bind(c, name = 'cuda_cmult')
73  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
74  import c_rp
75  type(c_ptr), value :: a_d
76  real(c_rp) :: c
77  integer(c_int) :: n
78  end subroutine cuda_cmult
79 
80  subroutine cuda_cmult2(a_d, b_d, c, n) &
81  bind(c, name = 'cuda_cmult2')
82  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
83  import c_rp
84  type(c_ptr), value :: a_d, b_d
85  real(c_rp) :: c
86  integer(c_int) :: n
87  end subroutine cuda_cmult2
88 
89  subroutine cuda_cadd(a_d, c, n) &
90  bind(c, name = 'cuda_cadd')
91  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
92  import c_rp
93  type(c_ptr), value :: a_d
94  real(c_rp) :: c
95  integer(c_int) :: n
96  end subroutine cuda_cadd
97 
98  subroutine cuda_cadd2(a_d, b_d, c, n) &
99  bind(c, name = 'cuda_cadd2')
100  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
101  import c_rp
102  type(c_ptr), value :: a_d
103  type(c_ptr), value :: b_d
104  real(c_rp) :: c
105  integer(c_int) :: n
106  end subroutine cuda_cadd2
107 
108  subroutine cuda_cfill(a_d, c, n) &
109  bind(c, name = 'cuda_cfill')
110  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
111  import c_rp
112  type(c_ptr), value :: a_d
113  real(c_rp) :: c
114  integer(c_int) :: n
115  end subroutine cuda_cfill
116 
117  subroutine cuda_rzero(a_d, n) &
118  bind(c, name = 'cuda_rzero')
119  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
120  type(c_ptr), value :: a_d
121  integer(c_int) :: n
122  end subroutine cuda_rzero
123 
124  subroutine cuda_add2(a_d, b_d, n) &
125  bind(c, name = 'cuda_add2')
126  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
127  import c_rp
128  type(c_ptr), value :: a_d, b_d
129  integer(c_int) :: n
130  end subroutine cuda_add2
131 
132  subroutine cuda_add4(a_d, b_d, c_d, d_d, n) &
133  bind(c, name = 'cuda_add4')
134  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
135  import c_rp
136  type(c_ptr), value :: a_d, b_d, c_d, d_d
137  integer(c_int) :: n
138  end subroutine cuda_add4
139 
140  subroutine cuda_add2s1(a_d, b_d, c1, n) &
141  bind(c, name = 'cuda_add2s1')
142  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
143  import c_rp
144  type(c_ptr), value :: a_d, b_d
145  real(c_rp) :: c1
146  integer(c_int) :: n
147  end subroutine cuda_add2s1
148 
149  subroutine cuda_add2s2(a_d, b_d, c1, n) &
150  bind(c, name = 'cuda_add2s2')
151  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
152  import c_rp
153  type(c_ptr), value :: a_d, b_d
154  real(c_rp) :: c1
155  integer(c_int) :: n
156  end subroutine cuda_add2s2
157 
158  subroutine cuda_addsqr2s2(a_d, b_d, c1, n) &
159  bind(c, name = 'cuda_addsqr2s2')
160  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
161  import c_rp
162  type(c_ptr), value :: a_d, b_d
163  real(c_rp) :: c1
164  integer(c_int) :: n
165  end subroutine cuda_addsqr2s2
166 
167  subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
168  bind(c, name = 'cuda_add3s2')
169  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
170  import c_rp
171  type(c_ptr), value :: a_d, b_d, c_d
172  real(c_rp) :: c1, c2
173  integer(c_int) :: n
174  end subroutine cuda_add3s2
175 
176  subroutine cuda_invcol1(a_d, n) &
177  bind(c, name = 'cuda_invcol1')
178  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
179  type(c_ptr), value :: a_d
180  integer(c_int) :: n
181  end subroutine cuda_invcol1
182 
183  subroutine cuda_invcol2(a_d, b_d, n) &
184  bind(c, name = 'cuda_invcol2')
185  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
186  type(c_ptr), value :: a_d, b_d
187  integer(c_int) :: n
188  end subroutine cuda_invcol2
189 
190  subroutine cuda_col2(a_d, b_d, n) &
191  bind(c, name = 'cuda_col2')
192  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
193  type(c_ptr), value :: a_d, b_d
194  integer(c_int) :: n
195  end subroutine cuda_col2
196 
197  subroutine cuda_col3(a_d, b_d, c_d, n) &
198  bind(c, name = 'cuda_col3')
199  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
200  type(c_ptr), value :: a_d, b_d, c_d
201  integer(c_int) :: n
202  end subroutine cuda_col3
203 
204  subroutine cuda_subcol3(a_d, b_d, c_d, n) &
205  bind(c, name = 'cuda_subcol3')
206  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
207  type(c_ptr), value :: a_d, b_d, c_d
208  integer(c_int) :: n
209  end subroutine cuda_subcol3
210 
211  subroutine cuda_sub2(a_d, b_d, n) &
212  bind(c, name = 'cuda_sub2')
213  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
214  type(c_ptr), value :: a_d, b_d
215  integer(c_int) :: n
216  end subroutine cuda_sub2
217 
218  subroutine cuda_sub3(a_d, b_d, c_d, n) &
219  bind(c, name = 'cuda_sub3')
220  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
221  type(c_ptr), value :: a_d, b_d, c_d
222  integer(c_int) :: n
223  end subroutine cuda_sub3
224 
225  subroutine cuda_add3(a_d, b_d, c_d, n) &
226  bind(c, name = 'cuda_add3')
227  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
228  type(c_ptr), value :: a_d, b_d, c_d
229  integer(c_int) :: n
230  end subroutine cuda_add3
231 
232  subroutine cuda_addcol3(a_d, b_d, c_d, n) &
233  bind(c, name = 'cuda_addcol3')
234  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
235  type(c_ptr), value :: a_d, b_d, c_d
236  integer(c_int) :: n
237  end subroutine cuda_addcol3
238 
239  subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n) &
240  bind(c, name = 'cuda_addcol4')
241  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
242  type(c_ptr), value :: a_d, b_d, c_d, d_d
243  integer(c_int) :: n
244  end subroutine cuda_addcol4
245 
246  subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
247  bind(c, name = 'cuda_vdot3')
248  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
249  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
250  integer(c_int) :: n
251  end subroutine cuda_vdot3
252 
253  subroutine cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
254  w1_d, w2_d, w3_d, n) &
255  bind(c, name = 'cuda_vcross')
256  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
257  type(c_ptr), value :: u1_d, u2_d, u3_d
258  type(c_ptr), value :: v1_d, v2_d, v3_d
259  type(c_ptr), value :: w1_d, w2_d, w3_d
260  integer(c_int) :: n
261  end subroutine cuda_vcross
262 
263  real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
264  bind(c, name = 'cuda_vlsc3')
265  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
266  import c_rp
267  type(c_ptr), value :: u_d, v_d, w_d
268  integer(c_int) :: n
269  end function cuda_vlsc3
270 
271  subroutine cuda_add2s2_many(y_d, x_d_d, a_d, j, n) &
272  bind(c, name = 'cuda_add2s2_many')
273  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
274  import c_rp
275  type(c_ptr), value :: y_d, x_d_d, a_d
276  integer(c_int) :: j, n
277  end subroutine cuda_add2s2_many
278 
279  real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n) &
280  bind(c, name = 'cuda_glsc3')
281  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
282  import c_rp
283  type(c_ptr), value :: a_d, b_d, c_d
284  integer(c_int) :: n
285  end function cuda_glsc3
286 
287  subroutine cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n) &
288  bind(c, name = 'cuda_glsc3_many')
289  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
290  import c_rp
291  type(c_ptr), value :: w_d, v_d_d, mult_d
292  integer(c_int) :: j, n
293  real(c_rp) :: h(j)
294  end subroutine cuda_glsc3_many
295 
296  real(c_rp) function cuda_glsc2(a_d, b_d, n) &
297  bind(c, name = 'cuda_glsc2')
298  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
299  import c_rp
300  type(c_ptr), value :: a_d, b_d
301  integer(c_int) :: n
302  end function cuda_glsc2
303 
304  real(c_rp) function cuda_glsum(a_d, n) &
305  bind(c, name = 'cuda_glsum')
306  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
307  import c_rp
308  type(c_ptr), value :: a_d
309  integer(c_int) :: n
310  end function cuda_glsum
311 
312  subroutine cuda_absval(a_d, n) &
313  bind(c, name = 'cuda_absval')
314  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
315  import c_rp
316  type(c_ptr), value :: a_d
317  integer(c_int) :: n
318  end subroutine cuda_absval
319  end interface
320 
321  ! ========================================================================== !
322  ! Interfaces for the pointwise operations.
323 
324  interface
325  subroutine cuda_pwmax_vec2(a_d, b_d, n) &
326  bind(c, name = 'cuda_pwmax_vec2')
327  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
328  type(c_ptr), value :: a_d, b_d
329  integer(c_int) :: n
330  end subroutine cuda_pwmax_vec2
331 
332  subroutine cuda_pwmax_vec3(a_d, b_d, c_d, n) &
333  bind(c, name = 'cuda_pwmax_vec3')
334  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
335  type(c_ptr), value :: a_d, b_d, c_d
336  integer(c_int) :: n
337  end subroutine cuda_pwmax_vec3
338 
339  subroutine cuda_pwmax_sca2(a_d, c_d, n) &
340  bind(c, name = 'cuda_pwmax_sca2')
341  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
342  import c_rp
343  type(c_ptr), value :: a_d
344  real(c_rp) :: c_d
345  integer(c_int) :: n
346  end subroutine cuda_pwmax_sca2
347 
348  subroutine cuda_pwmax_sca3(a_d, b_d, c_d, n) &
349  bind(c, name = 'cuda_pwmax_sca3')
350  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
351  import c_rp
352  type(c_ptr), value :: a_d, b_d
353  real(c_rp) :: c_d
354  integer(c_int) :: n
355  end subroutine cuda_pwmax_sca3
356 
357  subroutine cuda_pwmin_vec2(a_d, b_d, n) &
358  bind(c, name = 'cuda_pwmin_vec2')
359  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
360  type(c_ptr), value :: a_d, b_d
361  integer(c_int) :: n
362  end subroutine cuda_pwmin_vec2
363 
364  subroutine cuda_pwmin_vec3(a_d, b_d, c_d, n) &
365  bind(c, name = 'cuda_pwmin_vec3')
366  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
367  type(c_ptr), value :: a_d, b_d, c_d
368  integer(c_int) :: n
369  end subroutine cuda_pwmin_vec3
370 
371  subroutine cuda_pwmin_sca2(a_d, c_d, n) &
372  bind(c, name = 'cuda_pwmin_sca2')
373  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
374  import c_rp
375  type(c_ptr), value :: a_d
376  real(c_rp) :: c_d
377  integer(c_int) :: n
378  end subroutine cuda_pwmin_sca2
379 
380  subroutine cuda_pwmin_sca3(a_d, b_d, c_d, n) &
381  bind(c, name = 'cuda_pwmin_sca3')
382  use, intrinsic :: iso_c_binding, only: c_int, c_ptr
383  import c_rp
384  type(c_ptr), value :: a_d, b_d
385  real(c_rp) :: c_d
386  integer(c_int) :: n
387  end subroutine cuda_pwmin_sca3
388 
389  end interface
390 end module cuda_math
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12