Neko  0.8.99
A portable framework for high-order spectral element flow simulations
device_math.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2023, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use comm
35  use utils, only : neko_error
36  use num_types, only : rp, c_rp
37  use, intrinsic :: iso_c_binding
38  implicit none
39  private
40 
41 #ifdef HAVE_HIP
42  interface
43  subroutine hip_copy(a_d, b_d, n) &
44  bind(c, name='hip_copy')
45  use, intrinsic :: iso_c_binding
46  type(c_ptr), value :: a_d, b_d
47  integer(c_int) :: n
48  end subroutine hip_copy
49  end interface
50 
51  interface
52  subroutine hip_masked_copy(a_d, b_d, mask_d, n, m) &
53  bind(c, name='hip_masked_copy')
54  use, intrinsic :: iso_c_binding
55  type(c_ptr), value :: a_d, b_d, mask_d
56  integer(c_int) :: n, m
57  end subroutine hip_masked_copy
58  end interface
59 
60  interface
61  subroutine hip_cfill_mask(a_d, c, size, mask_d, mask_size) &
62  bind(c, name='hip_cfill_mask')
63  use, intrinsic :: iso_c_binding
64  import c_rp
65  type(c_ptr), value :: a_d
66  real(c_rp) :: c
67  integer(c_int) :: size
68  type(c_ptr), value :: mask_d
69  integer(c_int) :: mask_size
70  end subroutine hip_cfill_mask
71  end interface
72 
73  interface
74  subroutine hip_cmult(a_d, c, n) &
75  bind(c, name='hip_cmult')
76  use, intrinsic :: iso_c_binding
77  import c_rp
78  type(c_ptr), value :: a_d
79  real(c_rp) :: c
80  integer(c_int) :: n
81  end subroutine hip_cmult
82  end interface
83 
84  interface
85  subroutine hip_cmult2(a_d, b_d, c, n) &
86  bind(c, name='hip_cmult2')
87  use, intrinsic :: iso_c_binding
88  import c_rp
89  type(c_ptr), value :: a_d, b_d
90  real(c_rp) :: c
91  integer(c_int) :: n
92  end subroutine hip_cmult2
93  end interface
94 
95  interface
96  subroutine hip_cadd(a_d, c, n) &
97  bind(c, name='hip_cadd')
98  use, intrinsic :: iso_c_binding
99  import c_rp
100  type(c_ptr), value :: a_d
101  real(c_rp) :: c
102  integer(c_int) :: n
103  end subroutine hip_cadd
104  end interface
105 
106  interface
107  subroutine hip_cadd2(a_d, b_d, c, n) &
108  bind(c, name='hip_cadd2')
109  use, intrinsic :: iso_c_binding
110  import c_rp
111  type(c_ptr), value :: a_d
112  type(c_ptr), value :: b_d
113  real(c_rp) :: c
114  integer(c_int) :: n
115  end subroutine hip_cadd2
116  end interface
117 
118  interface
119  subroutine hip_cfill(a_d, c, n) &
120  bind(c, name='hip_cfill')
121  use, intrinsic :: iso_c_binding
122  import c_rp
123  type(c_ptr), value :: a_d
124  real(c_rp) :: c
125  integer(c_int) :: n
126  end subroutine hip_cfill
127  end interface
128 
129  interface
130  subroutine hip_rzero(a_d, n) &
131  bind(c, name='hip_rzero')
132  use, intrinsic :: iso_c_binding
133  type(c_ptr), value :: a_d
134  integer(c_int) :: n
135  end subroutine hip_rzero
136  end interface
137 
138  interface
139  subroutine hip_add2(a_d, b_d, n) &
140  bind(c, name='hip_add2')
141  use, intrinsic :: iso_c_binding
142  import c_rp
143  implicit none
144  type(c_ptr), value :: a_d, b_d
145  integer(c_int) :: n
146  end subroutine hip_add2
147  end interface
148 
149  interface
150  subroutine hip_add2s1(a_d, b_d, c1, n) &
151  bind(c, name='hip_add2s1')
152  use, intrinsic :: iso_c_binding
153  import c_rp
154  implicit none
155  type(c_ptr), value :: a_d, b_d
156  real(c_rp) :: c1
157  integer(c_int) :: n
158  end subroutine hip_add2s1
159  end interface
160 
161  interface
162  subroutine hip_add2s2(a_d, b_d, c1, n) &
163  bind(c, name='hip_add2s2')
164  use, intrinsic :: iso_c_binding
165  import c_rp
166  implicit none
167  type(c_ptr), value :: a_d, b_d
168  real(c_rp) :: c1
169  integer(c_int) :: n
170  end subroutine hip_add2s2
171  end interface
172 
173  interface
174  subroutine hip_add2s2_many(y_d,x_d_d,a_d,j,n) &
175  bind(c, name='hip_add2s2_many')
176  use, intrinsic :: iso_c_binding
177  import c_rp
178  implicit none
179  type(c_ptr), value :: y_d, x_d_d, a_d
180  integer(c_int) :: j, n
181  end subroutine hip_add2s2_many
182  end interface
183 
184  interface
185  subroutine hip_addsqr2s2(a_d, b_d, c1, n) &
186  bind(c, name='hip_addsqr2s2')
187  use, intrinsic :: iso_c_binding
188  import c_rp
189  implicit none
190  type(c_ptr), value :: a_d, b_d
191  real(c_rp) :: c1
192  integer(c_int) :: n
193  end subroutine hip_addsqr2s2
194  end interface
195 
196  interface
197  subroutine hip_add3s2(a_d, b_d, c_d, c1, c2, n) &
198  bind(c, name='hip_add3s2')
199  use, intrinsic :: iso_c_binding
200  import c_rp
201  implicit none
202  type(c_ptr), value :: a_d, b_d, c_d
203  real(c_rp) :: c1, c2
204  integer(c_int) :: n
205  end subroutine hip_add3s2
206  end interface
207 
208  interface
209  subroutine hip_invcol1(a_d, n) &
210  bind(c, name='hip_invcol1')
211  use, intrinsic :: iso_c_binding
212  implicit none
213  type(c_ptr), value :: a_d
214  integer(c_int) :: n
215  end subroutine hip_invcol1
216  end interface
217 
218  interface
219  subroutine hip_invcol2(a_d, b_d, n) &
220  bind(c, name='hip_invcol2')
221  use, intrinsic :: iso_c_binding
222  implicit none
223  type(c_ptr), value :: a_d, b_d
224  integer(c_int) :: n
225  end subroutine hip_invcol2
226  end interface
227 
228  interface
229  subroutine hip_col2(a_d, b_d, n) &
230  bind(c, name='hip_col2')
231  use, intrinsic :: iso_c_binding
232  implicit none
233  type(c_ptr), value :: a_d, b_d
234  integer(c_int) :: n
235  end subroutine hip_col2
236  end interface
237 
238  interface
239  subroutine hip_col3(a_d, b_d, c_d, n) &
240  bind(c, name='hip_col3')
241  use, intrinsic :: iso_c_binding
242  implicit none
243  type(c_ptr), value :: a_d, b_d, c_d
244  integer(c_int) :: n
245  end subroutine hip_col3
246  end interface
247 
248  interface
249  subroutine hip_subcol3(a_d, b_d, c_d, n) &
250  bind(c, name='hip_subcol3')
251  use, intrinsic :: iso_c_binding
252  implicit none
253  type(c_ptr), value :: a_d, b_d, c_d
254  integer(c_int) :: n
255  end subroutine hip_subcol3
256  end interface
257 
258  interface
259  subroutine hip_sub2(a_d, b_d, n) &
260  bind(c, name='hip_sub2')
261  use, intrinsic :: iso_c_binding
262  implicit none
263  type(c_ptr), value :: a_d, b_d
264  integer(c_int) :: n
265  end subroutine hip_sub2
266  end interface
267 
268  interface
269  subroutine hip_sub3(a_d, b_d, c_d, n) &
270  bind(c, name='hip_sub3')
271  use, intrinsic :: iso_c_binding
272  implicit none
273  type(c_ptr), value :: a_d, b_d, c_d
274  integer(c_int) :: n
275  end subroutine hip_sub3
276  end interface
277 
278  interface
279  subroutine hip_add3(a_d, b_d, c_d, n) &
280  bind(c, name='hip_add3')
281  use, intrinsic :: iso_c_binding
282  implicit none
283  type(c_ptr), value :: a_d, b_d, c_d
284  integer(c_int) :: n
285  end subroutine hip_add3
286  end interface
287 
288  interface
289  subroutine hip_addcol3(a_d, b_d, c_d, n) &
290  bind(c, name='hip_addcol3')
291  use, intrinsic :: iso_c_binding
292  implicit none
293  type(c_ptr), value :: a_d, b_d, c_d
294  integer(c_int) :: n
295  end subroutine hip_addcol3
296  end interface
297 
298  interface
299  subroutine hip_addcol4(a_d, b_d, c_d, d_d, n) &
300  bind(c, name='hip_addcol4')
301  use, intrinsic :: iso_c_binding
302  implicit none
303  type(c_ptr), value :: a_d, b_d, c_d, d_d
304  integer(c_int) :: n
305  end subroutine hip_addcol4
306  end interface
307 
308  interface
309  subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
310  bind(c, name='hip_vdot3')
311  use, intrinsic :: iso_c_binding
312  implicit none
313  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
314  integer(c_int) :: n
315  end subroutine hip_vdot3
316  end interface
317 
318  interface
319  real(c_rp) function hip_vlsc3(u_d, v_d, w_d, n) &
320  bind(c, name='hip_vlsc3')
321  use, intrinsic :: iso_c_binding
322  import c_rp
323  implicit none
324  type(c_ptr), value :: u_d, v_d, w_d
325  integer(c_int) :: n
326  end function hip_vlsc3
327  end interface
328 
329  interface
330  real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) &
331  bind(c, name='hip_glsc3')
332  use, intrinsic :: iso_c_binding
333  import c_rp
334  implicit none
335  type(c_ptr), value :: a_d, b_d, c_d
336  integer(c_int) :: n
337  end function hip_glsc3
338  end interface
339 
340  interface
341  subroutine hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n) &
342  bind(c, name='hip_glsc3_many')
343  use, intrinsic :: iso_c_binding
344  import c_rp
345  implicit none
346  type(c_ptr), value :: w_d, v_d_d, mult_d
347  integer(c_int) :: j, n
348  real(c_rp) :: h(j)
349  end subroutine hip_glsc3_many
350  end interface
351 
352  interface
353  real(c_rp) function hip_glsc2(a_d, b_d, n) &
354  bind(c, name='hip_glsc2')
355  use, intrinsic :: iso_c_binding
356  import c_rp
357  implicit none
358  type(c_ptr), value :: a_d, b_d
359  integer(c_int) :: n
360  end function hip_glsc2
361  end interface
362 
363  interface
364  real(c_rp) function hip_glsum(a_d, n) &
365  bind(c, name='hip_glsum')
366  use, intrinsic :: iso_c_binding
367  import c_rp
368  implicit none
369  type(c_ptr), value :: a_d
370  integer(c_int) :: n
371  end function hip_glsum
372  end interface
373 #elif HAVE_CUDA
374  interface
375  subroutine cuda_copy(a_d, b_d, n) &
376  bind(c, name='cuda_copy')
377  use, intrinsic :: iso_c_binding
378  type(c_ptr), value :: a_d, b_d
379  integer(c_int) :: n
380  end subroutine cuda_copy
381  end interface
382 
383  interface
384  subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
385  bind(c, name='cuda_masked_copy')
386  use, intrinsic :: iso_c_binding
387  type(c_ptr), value :: a_d, b_d, mask_d
388  integer(c_int) :: n, m
389  end subroutine cuda_masked_copy
390  end interface
391 
392  interface
393  subroutine cuda_cfill_mask(a_d, c, size, mask_d, mask_size) &
394  bind(c, name='cuda_cfill_mask')
395  use, intrinsic :: iso_c_binding
396  import c_rp
397  type(c_ptr), value :: a_d
398  real(c_rp) :: c
399  integer(c_int) :: size
400  type(c_ptr), value :: mask_d
401  integer(c_int) :: mask_size
402  end subroutine cuda_cfill_mask
403  end interface
404 
405  interface
406  subroutine cuda_cmult(a_d, c, n) &
407  bind(c, name='cuda_cmult')
408  use, intrinsic :: iso_c_binding
409  import c_rp
410  type(c_ptr), value :: a_d
411  real(c_rp) :: c
412  integer(c_int) :: n
413  end subroutine cuda_cmult
414  end interface
415 
416  interface
417  subroutine cuda_cmult2(a_d, b_d, c, n) &
418  bind(c, name='cuda_cmult2')
419  use, intrinsic :: iso_c_binding
420  import c_rp
421  type(c_ptr), value :: a_d, b_d
422  real(c_rp) :: c
423  integer(c_int) :: n
424  end subroutine cuda_cmult2
425  end interface
426 
427  interface
428  subroutine cuda_cadd(a_d, c, n) &
429  bind(c, name='cuda_cadd')
430  use, intrinsic :: iso_c_binding
431  import c_rp
432  type(c_ptr), value :: a_d
433  real(c_rp) :: c
434  integer(c_int) :: n
435  end subroutine cuda_cadd
436  end interface
437 
438  interface
439  subroutine cuda_cadd2(a_d, b_d, c, n) &
440  bind(c, name='cuda_cadd2')
441  use, intrinsic :: iso_c_binding
442  import c_rp
443  type(c_ptr), value :: a_d
444  type(c_ptr), value :: b_d
445  real(c_rp) :: c
446  integer(c_int) :: n
447  end subroutine cuda_cadd2
448  end interface
449 
450  interface
451  subroutine cuda_cfill(a_d, c, n) &
452  bind(c, name='cuda_cfill')
453  use, intrinsic :: iso_c_binding
454  import c_rp
455  type(c_ptr), value :: a_d
456  real(c_rp) :: c
457  integer(c_int) :: n
458  end subroutine cuda_cfill
459  end interface
460 
461  interface
462  subroutine cuda_rzero(a_d, n) &
463  bind(c, name='cuda_rzero')
464  use, intrinsic :: iso_c_binding
465  type(c_ptr), value :: a_d
466  integer(c_int) :: n
467  end subroutine cuda_rzero
468  end interface
469 
470  interface
471  subroutine cuda_add2(a_d, b_d, n) &
472  bind(c, name='cuda_add2')
473  use, intrinsic :: iso_c_binding
474  import c_rp
475  implicit none
476  type(c_ptr), value :: a_d, b_d
477  integer(c_int) :: n
478  end subroutine cuda_add2
479  end interface
480 
481  interface
482  subroutine cuda_add2s1(a_d, b_d, c1, n) &
483  bind(c, name='cuda_add2s1')
484  use, intrinsic :: iso_c_binding
485  import c_rp
486  implicit none
487  type(c_ptr), value :: a_d, b_d
488  real(c_rp) :: c1
489  integer(c_int) :: n
490  end subroutine cuda_add2s1
491  end interface
492 
493  interface
494  subroutine cuda_add2s2(a_d, b_d, c1, n) &
495  bind(c, name='cuda_add2s2')
496  use, intrinsic :: iso_c_binding
497  import c_rp
498  implicit none
499  type(c_ptr), value :: a_d, b_d
500  real(c_rp) :: c1
501  integer(c_int) :: n
502  end subroutine cuda_add2s2
503  end interface
504 
505  interface
506  subroutine cuda_addsqr2s2(a_d, b_d, c1, n) &
507  bind(c, name='cuda_addsqr2s2')
508  use, intrinsic :: iso_c_binding
509  import c_rp
510  implicit none
511  type(c_ptr), value :: a_d, b_d
512  real(c_rp) :: c1
513  integer(c_int) :: n
514  end subroutine cuda_addsqr2s2
515  end interface
516 
517  interface
518  subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
519  bind(c, name='cuda_add3s2')
520  use, intrinsic :: iso_c_binding
521  import c_rp
522  implicit none
523  type(c_ptr), value :: a_d, b_d, c_d
524  real(c_rp) :: c1, c2
525  integer(c_int) :: n
526  end subroutine cuda_add3s2
527  end interface
528 
529  interface
530  subroutine cuda_invcol1(a_d, n) &
531  bind(c, name='cuda_invcol1')
532  use, intrinsic :: iso_c_binding
533  implicit none
534  type(c_ptr), value :: a_d
535  integer(c_int) :: n
536  end subroutine cuda_invcol1
537  end interface
538 
539  interface
540  subroutine cuda_invcol2(a_d, b_d, n) &
541  bind(c, name='cuda_invcol2')
542  use, intrinsic :: iso_c_binding
543  implicit none
544  type(c_ptr), value :: a_d, b_d
545  integer(c_int) :: n
546  end subroutine cuda_invcol2
547  end interface
548 
549  interface
550  subroutine cuda_col2(a_d, b_d, n) &
551  bind(c, name='cuda_col2')
552  use, intrinsic :: iso_c_binding
553  implicit none
554  type(c_ptr), value :: a_d, b_d
555  integer(c_int) :: n
556  end subroutine cuda_col2
557  end interface
558 
559  interface
560  subroutine cuda_col3(a_d, b_d, c_d, n) &
561  bind(c, name='cuda_col3')
562  use, intrinsic :: iso_c_binding
563  implicit none
564  type(c_ptr), value :: a_d, b_d, c_d
565  integer(c_int) :: n
566  end subroutine cuda_col3
567  end interface
568 
569  interface
570  subroutine cuda_subcol3(a_d, b_d, c_d, n) &
571  bind(c, name='cuda_subcol3')
572  use, intrinsic :: iso_c_binding
573  implicit none
574  type(c_ptr), value :: a_d, b_d, c_d
575  integer(c_int) :: n
576  end subroutine cuda_subcol3
577  end interface
578 
579  interface
580  subroutine cuda_sub2(a_d, b_d, n) &
581  bind(c, name='cuda_sub2')
582  use, intrinsic :: iso_c_binding
583  implicit none
584  type(c_ptr), value :: a_d, b_d
585  integer(c_int) :: n
586  end subroutine cuda_sub2
587  end interface
588 
589  interface
590  subroutine cuda_sub3(a_d, b_d, c_d, n) &
591  bind(c, name='cuda_sub3')
592  use, intrinsic :: iso_c_binding
593  implicit none
594  type(c_ptr), value :: a_d, b_d, c_d
595  integer(c_int) :: n
596  end subroutine cuda_sub3
597  end interface
598 
599  interface
600  subroutine cuda_add3(a_d, b_d, c_d, n) &
601  bind(c, name='cuda_add3')
602  use, intrinsic :: iso_c_binding
603  implicit none
604  type(c_ptr), value :: a_d, b_d, c_d
605  integer(c_int) :: n
606  end subroutine cuda_add3
607  end interface
608 
609  interface
610  subroutine cuda_addcol3(a_d, b_d, c_d, n) &
611  bind(c, name='cuda_addcol3')
612  use, intrinsic :: iso_c_binding
613  implicit none
614  type(c_ptr), value :: a_d, b_d, c_d
615  integer(c_int) :: n
616  end subroutine cuda_addcol3
617  end interface
618 
619  interface
620  subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n) &
621  bind(c, name='cuda_addcol4')
622  use, intrinsic :: iso_c_binding
623  implicit none
624  type(c_ptr), value :: a_d, b_d, c_d, d_d
625  integer(c_int) :: n
626  end subroutine cuda_addcol4
627  end interface
628 
629  interface
630  subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
631  bind(c, name='cuda_vdot3')
632  use, intrinsic :: iso_c_binding
633  implicit none
634  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
635  integer(c_int) :: n
636  end subroutine cuda_vdot3
637  end interface
638 
639  interface
640  real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
641  bind(c, name='cuda_vlsc3')
642  use, intrinsic :: iso_c_binding
643  import c_rp
644  implicit none
645  type(c_ptr), value :: u_d, v_d, w_d
646  integer(c_int) :: n
647  end function cuda_vlsc3
648  end interface
649 
650  interface
651  subroutine cuda_add2s2_many(y_d,x_d_d,a_d,j,n) &
652  bind(c, name='cuda_add2s2_many')
653  use, intrinsic :: iso_c_binding
654  import c_rp
655  implicit none
656  type(c_ptr), value :: y_d, x_d_d, a_d
657  integer(c_int) :: j, n
658  end subroutine cuda_add2s2_many
659  end interface
660 
661  interface
662  real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n) &
663  bind(c, name='cuda_glsc3')
664  use, intrinsic :: iso_c_binding
665  import c_rp
666  implicit none
667  type(c_ptr), value :: a_d, b_d, c_d
668  integer(c_int) :: n
669  end function cuda_glsc3
670  end interface
671 
672  interface
673  subroutine cuda_glsc3_many(h,w_d,v_d_d,mult_d,j,n) &
674  bind(c, name='cuda_glsc3_many')
675  use, intrinsic :: iso_c_binding
676  import c_rp
677  implicit none
678  type(c_ptr), value :: w_d, v_d_d, mult_d
679  integer(c_int) :: j, n
680  real(c_rp) :: h(j)
681  end subroutine cuda_glsc3_many
682  end interface
683 
684  interface
685  real(c_rp) function cuda_glsc2(a_d, b_d, n) &
686  bind(c, name='cuda_glsc2')
687  use, intrinsic :: iso_c_binding
688  import c_rp
689  implicit none
690  type(c_ptr), value :: a_d, b_d
691  integer(c_int) :: n
692  end function cuda_glsc2
693  end interface
694 
695  interface
696  real(c_rp) function cuda_glsum(a_d, n) &
697  bind(c, name='cuda_glsum')
698  use, intrinsic :: iso_c_binding
699  import c_rp
700  implicit none
701  type(c_ptr), value :: a_d
702  integer(c_int) :: n
703  end function cuda_glsum
704  end interface
705 #elif HAVE_OPENCL
706  interface
707  subroutine opencl_copy(a_d, b_d, n) &
708  bind(c, name='opencl_copy')
709  use, intrinsic :: iso_c_binding
710  type(c_ptr), value :: a_d, b_d
711  integer(c_int) :: n
712  end subroutine opencl_copy
713  end interface
714 
715  interface
716  subroutine opencl_masked_copy(a_d, b_d, mask_d, n, m) &
717  bind(c, name='opencl_masked_copy')
718  use, intrinsic :: iso_c_binding
719  type(c_ptr), value :: a_d, b_d, mask_d
720  integer(c_int) :: n, m
721  end subroutine opencl_masked_copy
722  end interface
723 
724  interface
725  subroutine opencl_cfill_mask(a_d, c, size, mask_d, mask_size) &
726  bind(c, name='opencl_cfill_mask')
727  use, intrinsic :: iso_c_binding
728  import c_rp
729  type(c_ptr), value :: a_d
730  real(c_rp) :: c
731  integer(c_int) :: size
732  type(c_ptr), value :: mask_d
733  integer(c_int) :: mask_size
734  end subroutine opencl_cfill_mask
735  end interface
736 
737  interface
738  subroutine opencl_cmult(a_d, c, n) &
739  bind(c, name='opencl_cmult')
740  use, intrinsic :: iso_c_binding
741  import c_rp
742  type(c_ptr), value :: a_d
743  real(c_rp) :: c
744  integer(c_int) :: n
745  end subroutine opencl_cmult
746  end interface
747 
748  interface
749  subroutine opencl_cmult2(a_d, b_d, c, n) &
750  bind(c, name='opencl_cmult2')
751  use, intrinsic :: iso_c_binding
752  import c_rp
753  type(c_ptr), value :: a_d, b_d
754  real(c_rp) :: c
755  integer(c_int) :: n
756  end subroutine opencl_cmult2
757  end interface
758 
759  interface
760  subroutine opencl_cadd(a_d, c, n) &
761  bind(c, name='opencl_cadd')
762  use, intrinsic :: iso_c_binding
763  import c_rp
764  type(c_ptr), value :: a_d
765  real(c_rp) :: c
766  integer(c_int) :: n
767  end subroutine opencl_cadd
768  end interface
769 
770  interface
771  subroutine opencl_cadd2(a_d, b_d, c, n) &
772  bind(c, name='opencl_cadd2')
773  use, intrinsic :: iso_c_binding
774  import c_rp
775  type(c_ptr), value :: a_d
776  type(c_ptr), value :: b_d
777  real(c_rp) :: c
778  integer(c_int) :: n
779  end subroutine opencl_cadd2
780  end interface
781 
782  interface
783  subroutine opencl_cfill(a_d, c, n) &
784  bind(c, name='opencl_cfill')
785  use, intrinsic :: iso_c_binding
786  import c_rp
787  type(c_ptr), value :: a_d
788  real(c_rp) :: c
789  integer(c_int) :: n
790  end subroutine opencl_cfill
791  end interface
792 
793  interface
794  subroutine opencl_rzero(a_d, n) &
795  bind(c, name='opencl_rzero')
796  use, intrinsic :: iso_c_binding
797  type(c_ptr), value :: a_d
798  integer(c_int) :: n
799  end subroutine opencl_rzero
800  end interface
801 
802  interface
803  subroutine opencl_rone(a_d, n) &
804  bind(c, name='opencl_rone')
805  use, intrinsic :: iso_c_binding
806  type(c_ptr), value :: a_d
807  integer(c_int) :: n
808  end subroutine opencl_rone
809  end interface
810 
811  interface
812  subroutine opencl_add2(a_d, b_d, n) &
813  bind(c, name='opencl_add2')
814  use, intrinsic :: iso_c_binding
815  implicit none
816  type(c_ptr), value :: a_d, b_d
817  integer(c_int) :: n
818  end subroutine opencl_add2
819  end interface
820 
821  interface
822  subroutine opencl_add2s1(a_d, b_d, c1, n) &
823  bind(c, name='opencl_add2s1')
824  use, intrinsic :: iso_c_binding
825  import c_rp
826  implicit none
827  type(c_ptr), value :: a_d, b_d
828  real(c_rp) :: c1
829  integer(c_int) :: n
830  end subroutine opencl_add2s1
831  end interface
832 
833  interface
834  subroutine opencl_add2s2(a_d, b_d, c1, n) &
835  bind(c, name='opencl_add2s2')
836  use, intrinsic :: iso_c_binding
837  import c_rp
838  implicit none
839  type(c_ptr), value :: a_d, b_d
840  real(c_rp) :: c1
841  integer(c_int) :: n
842  end subroutine opencl_add2s2
843  end interface
844 
845  interface
846  subroutine opencl_add2s2_many(y_d,x_d_d,a_d,j,n) &
847  bind(c, name='opencl_add2s2_many')
848  use, intrinsic :: iso_c_binding
849  import c_rp
850  implicit none
851  type(c_ptr), value :: y_d, x_d_d, a_d
852  integer(c_int) :: j, n
853  end subroutine opencl_add2s2_many
854  end interface
855 
856  interface
857  subroutine opencl_addsqr2s2(a_d, b_d, c1, n) &
858  bind(c, name='opencl_addsqr2s2')
859  use, intrinsic :: iso_c_binding
860  import c_rp
861  implicit none
862  type(c_ptr), value :: a_d, b_d
863  real(c_rp) :: c1
864  integer(c_int) :: n
865  end subroutine opencl_addsqr2s2
866  end interface
867 
868  interface
869  subroutine opencl_add3s2(a_d, b_d, c_d, c1, c2, n) &
870  bind(c, name='opencl_add3s2')
871  use, intrinsic :: iso_c_binding
872  import c_rp
873  implicit none
874  type(c_ptr), value :: a_d, b_d, c_d
875  real(c_rp) :: c1, c2
876  integer(c_int) :: n
877  end subroutine opencl_add3s2
878  end interface
879 
880  interface
881  subroutine opencl_invcol1(a_d, n) &
882  bind(c, name='opencl_invcol1')
883  use, intrinsic :: iso_c_binding
884  implicit none
885  type(c_ptr), value :: a_d
886  integer(c_int) :: n
887  end subroutine opencl_invcol1
888  end interface
889 
890  interface
891  subroutine opencl_invcol2(a_d, b_d, n) &
892  bind(c, name='opencl_invcol2')
893  use, intrinsic :: iso_c_binding
894  implicit none
895  type(c_ptr), value :: a_d, b_d
896  integer(c_int) :: n
897  end subroutine opencl_invcol2
898  end interface
899 
900  interface
901  subroutine opencl_col2(a_d, b_d, n) &
902  bind(c, name='opencl_col2')
903  use, intrinsic :: iso_c_binding
904  implicit none
905  type(c_ptr), value :: a_d, b_d
906  integer(c_int) :: n
907  end subroutine opencl_col2
908  end interface
909 
910  interface
911  subroutine opencl_col3(a_d, b_d, c_d, n) &
912  bind(c, name='opencl_col3')
913  use, intrinsic :: iso_c_binding
914  implicit none
915  type(c_ptr), value :: a_d, b_d, c_d
916  integer(c_int) :: n
917  end subroutine opencl_col3
918  end interface
919 
920  interface
921  subroutine opencl_subcol3(a_d, b_d, c_d, n) &
922  bind(c, name='opencl_subcol3')
923  use, intrinsic :: iso_c_binding
924  implicit none
925  type(c_ptr), value :: a_d, b_d, c_d
926  integer(c_int) :: n
927  end subroutine opencl_subcol3
928  end interface
929 
930  interface
931  subroutine opencl_sub2(a_d, b_d, n) &
932  bind(c, name='opencl_sub2')
933  use, intrinsic :: iso_c_binding
934  implicit none
935  type(c_ptr), value :: a_d, b_d
936  integer(c_int) :: n
937  end subroutine opencl_sub2
938  end interface
939 
940  interface
941  subroutine opencl_sub3(a_d, b_d, c_d, n) &
942  bind(c, name='opencl_sub3')
943  use, intrinsic :: iso_c_binding
944  implicit none
945  type(c_ptr), value :: a_d, b_d, c_d
946  integer(c_int) :: n
947  end subroutine opencl_sub3
948  end interface
949 
950  interface
951  subroutine opencl_add3(a_d, b_d, c_d, n) &
952  bind(c, name='opencl_add3')
953  use, intrinsic :: iso_c_binding
954  implicit none
955  type(c_ptr), value :: a_d, b_d, c_d
956  integer(c_int) :: n
957  end subroutine opencl_add3
958  end interface
959 
960  interface
961  subroutine opencl_addcol3(a_d, b_d, c_d, n) &
962  bind(c, name='opencl_addcol3')
963  use, intrinsic :: iso_c_binding
964  implicit none
965  type(c_ptr), value :: a_d, b_d, c_d
966  integer(c_int) :: n
967  end subroutine opencl_addcol3
968  end interface
969 
970  interface
971  subroutine opencl_addcol4(a_d, b_d, c_d, d_d, n) &
972  bind(c, name='opencl_addcol4')
973  use, intrinsic :: iso_c_binding
974  implicit none
975  type(c_ptr), value :: a_d, b_d, c_d, d_d
976  integer(c_int) :: n
977  end subroutine opencl_addcol4
978  end interface
979 
980  interface
981  subroutine opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
982  bind(c, name='opencl_vdot3')
983  use, intrinsic :: iso_c_binding
984  implicit none
985  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
986  integer(c_int) :: n
987  end subroutine opencl_vdot3
988  end interface
989 
990  interface
991  real(c_rp) function opencl_glsc3(a_d, b_d, c_d, n) &
992  bind(c, name='opencl_glsc3')
993  use, intrinsic :: iso_c_binding
994  import c_rp
995  implicit none
996  type(c_ptr), value :: a_d, b_d, c_d
997  integer(c_int) :: n
998  end function opencl_glsc3
999  end interface
1000 
1001  interface
1002  subroutine opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n) &
1003  bind(c, name='opencl_glsc3_many')
1004  use, intrinsic :: iso_c_binding
1005  import c_rp
1006  implicit none
1007  integer(c_int) :: j, n
1008  type(c_ptr), value :: w_d, v_d_d, mult_d
1009  real(c_rp) :: h(j)
1010  end subroutine opencl_glsc3_many
1011  end interface
1012 
1013  interface
1014  real(c_rp) function opencl_glsc2(a_d, b_d, n) &
1015  bind(c, name='opencl_glsc2')
1016  use, intrinsic :: iso_c_binding
1017  import c_rp
1018  implicit none
1019  type(c_ptr), value :: a_d, b_d
1020  integer(c_int) :: n
1021  end function opencl_glsc2
1022  end interface
1023 
1024  interface
1025  real(c_rp) function opencl_glsum(a_d, n) &
1026  bind(c, name='opencl_glsum')
1027  use, intrinsic :: iso_c_binding
1028  import c_rp
1029  implicit none
1030  type(c_ptr), value :: a_d
1031  integer(c_int) :: n
1032  end function opencl_glsum
1033  end interface
1034 #endif
1035 
1043 
1044 contains
1045 
1047  subroutine device_copy(a_d, b_d, n)
1048  type(c_ptr) :: a_d, b_d
1049  integer :: n
1050 #ifdef HAVE_HIP
1051  call hip_copy(a_d, b_d, n)
1052 #elif HAVE_CUDA
1053  call cuda_copy(a_d, b_d, n)
1054 #elif HAVE_OPENCL
1055  call opencl_copy(a_d, b_d, n)
1056 #else
1057  call neko_error('no device backend configured')
1058 #endif
1059  end subroutine device_copy
1060 
1062  subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
1063  type(c_ptr) :: a_d, b_d, mask_d
1064  integer :: n, m
1065 #ifdef HAVE_HIP
1066  call hip_masked_copy(a_d, b_d, mask_d, n, m)
1067 #elif HAVE_CUDA
1068  call cuda_masked_copy(a_d, b_d, mask_d, n, m)
1069 #elif HAVE_OPENCL
1070  call opencl_masked_copy(a_d, b_d, mask_d, n, m)
1071 #else
1072  call neko_error('no device backend configured')
1073 #endif
1074  end subroutine device_masked_copy
1075 
1078  subroutine device_cfill_mask(a_d, c, size, mask_d, mask_size)
1079  type(c_ptr) :: a_d
1080  real(kind=rp), intent(in) :: c
1081  integer :: size
1082  type(c_ptr) :: mask_d
1083  integer :: mask_size
1084 #ifdef HAVE_HIP
1085  call hip_cfill_mask(a_d, c, size, mask_d, mask_size)
1086 #elif HAVE_CUDA
1087  call cuda_cfill_mask(a_d, c, size, mask_d, mask_size)
1088 #elif HAVE_OPENCL
1089  call opencl_cfill_mask(a_d, c, size, mask_d, mask_size)
1090 #else
1091  call neko_error('No device backend configured')
1092 #endif
1093  end subroutine device_cfill_mask
1094 
1096  subroutine device_rzero(a_d, n)
1097  type(c_ptr) :: a_d
1098  integer :: n
1099 #ifdef HAVE_HIP
1100  call hip_rzero(a_d, n)
1101 #elif HAVE_CUDA
1102  call cuda_rzero(a_d, n)
1103 #elif HAVE_OPENCL
1104  call opencl_rzero(a_d, n)
1105 #else
1106  call neko_error('No device backend configured')
1107 #endif
1108  end subroutine device_rzero
1109 
1111  subroutine device_rone(a_d, n)
1112  type(c_ptr) :: a_d
1113  integer :: n
1114  real(kind=rp) :: one = 1.0_rp
1115 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
1116  call device_cfill(a_d, one, n)
1117 #elif HAVE_OPENCL
1118  call opencl_rone(a_d, n)
1119 #else
1120  call neko_error('No device backend configured')
1121 #endif
1122  end subroutine device_rone
1123 
1125  subroutine device_cmult(a_d, c, n)
1126  type(c_ptr) :: a_d
1127  real(kind=rp), intent(in) :: c
1128  integer :: n
1129 #ifdef HAVE_HIP
1130  call hip_cmult(a_d, c, n)
1131 #elif HAVE_CUDA
1132  call cuda_cmult(a_d, c, n)
1133 #elif HAVE_OPENCL
1134  call opencl_cmult(a_d, c, n)
1135 #else
1136  call neko_error('No device backend configured')
1137 #endif
1138  end subroutine device_cmult
1139 
1141  subroutine device_cmult2(a_d, b_d, c, n)
1142  type(c_ptr) :: a_d, b_d
1143  real(kind=rp), intent(in) :: c
1144  integer :: n
1145 #ifdef HAVE_HIP
1146  call hip_cmult2(a_d, b_d, c, n)
1147 #elif HAVE_CUDA
1148  call cuda_cmult2(a_d, b_d, c, n)
1149 #elif HAVE_OPENCL
1150  call opencl_cmult2(a_d, b_d, c, n)
1151 #else
1152  call neko_error('No device backend configured')
1153 #endif
1154  end subroutine device_cmult2
1155 
1157  subroutine device_cadd(a_d, c, n)
1158  type(c_ptr) :: a_d
1159  real(kind=rp), intent(in) :: c
1160  integer :: n
1161 #ifdef HAVE_HIP
1162  call hip_cadd(a_d, c, n)
1163 #elif HAVE_CUDA
1164  call cuda_cadd(a_d, c, n)
1165 #elif HAVE_OPENCL
1166  call opencl_cadd(a_d, c, n)
1167 #else
1168  call neko_error('No device backend configured')
1169 #endif
1170  end subroutine device_cadd
1171 
1173  subroutine device_cadd2(a_d, b_d, c, n)
1174  type(c_ptr) :: a_d
1175  type(c_ptr) :: b_d
1176  real(kind=rp), intent(in) :: c
1177  integer :: n
1178 #ifdef HAVE_HIP
1179  call hip_cadd2(a_d, b_d, c, n)
1180 #elif HAVE_CUDA
1181  call cuda_cadd2(a_d, b_d, c, n)
1182 #elif HAVE_OPENCL
1183  call opencl_cadd2(a_d, b_d, c, n)
1184 #else
1185  call neko_error('No device backend configured')
1186 #endif
1187  end subroutine device_cadd2
1188 
1190  subroutine device_cfill(a_d, c, n)
1191  type(c_ptr) :: a_d
1192  real(kind=rp), intent(in) :: c
1193  integer :: n
1194 #ifdef HAVE_HIP
1195  call hip_cfill(a_d, c, n)
1196 #elif HAVE_CUDA
1197  call cuda_cfill(a_d, c, n)
1198 #elif HAVE_OPENCL
1199  call opencl_cfill(a_d, c, n)
1200 #else
1201  call neko_error('No device backend configured')
1202 #endif
1203  end subroutine device_cfill
1204 
1206  subroutine device_add2(a_d, b_d, n)
1207  type(c_ptr) :: a_d, b_d
1208  integer :: n
1209 #ifdef HAVE_HIP
1210  call hip_add2(a_d, b_d, n)
1211 #elif HAVE_CUDA
1212  call cuda_add2(a_d, b_d, n)
1213 #elif HAVE_OPENCL
1214  call opencl_add2(a_d, b_d, n)
1215 #else
1216  call neko_error('No device backend configured')
1217 #endif
1218  end subroutine device_add2
1219 
1220  subroutine device_add2s1(a_d, b_d, c1, n)
1221  type(c_ptr) :: a_d, b_d
1222  real(kind=rp) :: c1
1223  integer :: n
1224 #ifdef HAVE_HIP
1225  call hip_add2s1(a_d, b_d, c1, n)
1226 #elif HAVE_CUDA
1227  call cuda_add2s1(a_d, b_d, c1, n)
1228 #elif HAVE_OPENCL
1229  call opencl_add2s1(a_d, b_d, c1, n)
1230 #else
1231  call neko_error('No device backend configured')
1232 #endif
1233  end subroutine device_add2s1
1234 
1237  subroutine device_add2s2(a_d, b_d, c1, n)
1238  type(c_ptr) :: a_d, b_d
1239  real(kind=rp) :: c1
1240  integer :: n
1241 #ifdef HAVE_HIP
1242  call hip_add2s2(a_d, b_d, c1, n)
1243 #elif HAVE_CUDA
1244  call cuda_add2s2(a_d, b_d, c1, n)
1245 #elif HAVE_OPENCL
1246  call opencl_add2s2(a_d, b_d, c1, n)
1247 #else
1248  call neko_error('No device backend configured')
1249 #endif
1250  end subroutine device_add2s2
1251 
1253  subroutine device_addsqr2s2(a_d, b_d, c1, n)
1254  type(c_ptr) :: a_d, b_d
1255  real(kind=rp) :: c1
1256  integer :: n
1257 #ifdef HAVE_HIP
1258  call hip_addsqr2s2(a_d, b_d, c1, n)
1259 #elif HAVE_CUDA
1260  call cuda_addsqr2s2(a_d, b_d, c1, n)
1261 #elif HAVE_OPENCL
1262  call opencl_addsqr2s2(a_d, b_d, c1, n)
1263 #else
1264  call neko_error('No device backend configured')
1265 #endif
1266  end subroutine device_addsqr2s2
1267 
1269  subroutine device_add3(a_d, b_d, c_d, n)
1270  type(c_ptr) :: a_d, b_d, c_d
1271  integer :: n
1272 #ifdef HAVE_HIP
1273  call hip_add3(a_d, b_d, c_d, n)
1274 #elif HAVE_CUDA
1275  call cuda_add3(a_d, b_d, c_d, n)
1276 #elif HAVE_OPENCL
1277  call opencl_add3(a_d, b_d, c_d, n)
1278 #else
1279  call neko_error('No device backend configured')
1280 #endif
1281  end subroutine device_add3
1282 
1284  subroutine device_add3s2(a_d, b_d, c_d, c1, c2 ,n)
1285  type(c_ptr) :: a_d, b_d, c_d
1286  real(kind=rp) :: c1, c2
1287  integer :: n
1288 #ifdef HAVE_HIP
1289  call hip_add3s2(a_d, b_d, c_d, c1, c2, n)
1290 #elif HAVE_CUDA
1291  call cuda_add3s2(a_d, b_d, c_d, c1, c2, n)
1292 #elif HAVE_OPENCL
1293  call opencl_add3s2(a_d, b_d, c_d, c1, c2, n)
1294 #else
1295  call neko_error('No device backend configured')
1296 #endif
1297  end subroutine device_add3s2
1298 
1300  subroutine device_invcol1(a_d, n)
1301  type(c_ptr) :: a_d
1302  integer :: n
1303 #ifdef HAVE_HIP
1304  call hip_invcol1(a_d, n)
1305 #elif HAVE_CUDA
1306  call cuda_invcol1(a_d, n)
1307 #elif HAVE_OPENCL
1308  call opencl_invcol1(a_d, n)
1309 #else
1310  call neko_error('No device backend configured')
1311 #endif
1312  end subroutine device_invcol1
1313 
1315  subroutine device_invcol2(a_d, b_d, n)
1316  type(c_ptr) :: a_d, b_d
1317  integer :: n
1318 #ifdef HAVE_HIP
1319  call hip_invcol2(a_d, b_d, n)
1320 #elif HAVE_CUDA
1321  call cuda_invcol2(a_d, b_d, n)
1322 #elif HAVE_OPENCL
1323  call opencl_invcol2(a_d, b_d, n)
1324 #else
1325  call neko_error('No device backend configured')
1326 #endif
1327  end subroutine device_invcol2
1328 
1330  subroutine device_col2(a_d, b_d, n)
1331  type(c_ptr) :: a_d, b_d
1332  integer :: n
1333 #ifdef HAVE_HIP
1334  call hip_col2(a_d, b_d, n)
1335 #elif HAVE_CUDA
1336  call cuda_col2(a_d, b_d, n)
1337 #elif HAVE_OPENCL
1338  call opencl_col2(a_d, b_d, n)
1339 #else
1340  call neko_error('No device backend configured')
1341 #endif
1342  end subroutine device_col2
1343 
1345  subroutine device_col3(a_d, b_d, c_d, n)
1346  type(c_ptr) :: a_d, b_d, c_d
1347  integer :: n
1348 #ifdef HAVE_HIP
1349  call hip_col3(a_d, b_d, c_d, n)
1350 #elif HAVE_CUDA
1351  call cuda_col3(a_d, b_d, c_d, n)
1352 #elif HAVE_OPENCL
1353  call opencl_col3(a_d, b_d, c_d, n)
1354 #else
1355  call neko_error('No device backend configured')
1356 #endif
1357  end subroutine device_col3
1358 
1360  subroutine device_subcol3(a_d, b_d, c_d, n)
1361  type(c_ptr) :: a_d, b_d, c_d
1362  integer :: n
1363 #ifdef HAVE_HIP
1364  call hip_subcol3(a_d, b_d, c_d, n)
1365 #elif HAVE_CUDA
1366  call cuda_subcol3(a_d, b_d, c_d, n)
1367 #elif HAVE_OPENCL
1368  call opencl_subcol3(a_d, b_d, c_d, n)
1369 #else
1370  call neko_error('No device backend configured')
1371 #endif
1372  end subroutine device_subcol3
1373 
1375  subroutine device_sub2(a_d, b_d, n)
1376  type(c_ptr) :: a_d, b_d
1377  integer :: n
1378 #ifdef HAVE_HIP
1379  call hip_sub2(a_d, b_d, n)
1380 #elif HAVE_CUDA
1381  call cuda_sub2(a_d, b_d, n)
1382 #elif HAVE_OPENCL
1383  call opencl_sub2(a_d, b_d, n)
1384 #else
1385  call neko_error('No device backend configured')
1386 #endif
1387  end subroutine device_sub2
1388 
1390  subroutine device_sub3(a_d, b_d, c_d, n)
1391  type(c_ptr) :: a_d, b_d, c_d
1392  integer :: n
1393 #ifdef HAVE_HIP
1394  call hip_sub3(a_d, b_d, c_d, n)
1395 #elif HAVE_CUDA
1396  call cuda_sub3(a_d, b_d, c_d, n)
1397 #elif HAVE_OPENCL
1398  call opencl_sub3(a_d, b_d, c_d, n)
1399 #else
1400  call neko_error('No device backend configured')
1401 #endif
1402  end subroutine device_sub3
1403 
1405  subroutine device_addcol3(a_d, b_d, c_d, n)
1406  type(c_ptr) :: a_d, b_d, c_d
1407  integer :: n
1408 #ifdef HAVE_HIP
1409  call hip_addcol3(a_d, b_d, c_d, n)
1410 #elif HAVE_CUDA
1411  call cuda_addcol3(a_d, b_d, c_d, n)
1412 #elif HAVE_OPENCL
1413  call opencl_addcol3(a_d, b_d, c_d, n)
1414 #else
1415  call neko_error('No device backend configured')
1416 #endif
1417  end subroutine device_addcol3
1418 
1420  subroutine device_addcol4(a_d, b_d, c_d, d_d, n)
1421  type(c_ptr) :: a_d, b_d, c_d, d_d
1422  integer :: n
1423 #ifdef HAVE_HIP
1424  call hip_addcol4(a_d, b_d, c_d, d_d, n)
1425 #elif HAVE_CUDA
1426  call cuda_addcol4(a_d, b_d, c_d, d_d, n)
1427 #elif HAVE_OPENCL
1428  call opencl_addcol4(a_d, b_d, c_d, d_d, n)
1429 #else
1430  call neko_error('No device backend configured')
1431 #endif
1432  end subroutine device_addcol4
1433 
1436  subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1437  type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1438  integer :: n
1439 #ifdef HAVE_HIP
1440  call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1441 #elif HAVE_CUDA
1442  call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1443 #elif HAVE_OPENCL
1444  call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1445 #else
1446  call neko_error('No device backend configured')
1447 #endif
1448  end subroutine device_vdot3
1449 
1451  function device_vlsc3(u_d, v_d, w_d, n) result(res)
1452  type(c_ptr) :: u_d, v_d, w_d
1453  integer :: n
1454  real(kind=rp) :: res
1455  res = 0.0_rp
1456 #ifdef HAVE_HIP
1457  res = hip_vlsc3(u_d, v_d, w_d, n)
1458 #elif HAVE_CUDA
1459  res = cuda_vlsc3(u_d, v_d, w_d, n)
1460 #elif HAVE_OPENCL
1461  ! Same kernel as glsc3 (currently no device MPI for OpenCL)
1462  res = opencl_glsc3(u_d, v_d, w_d, n)
1463 #else
1464  call neko_error('No device backend configured')
1465 #endif
1466  end function device_vlsc3
1467 
1469  function device_glsc3(a_d, b_d, c_d, n) result(res)
1470  type(c_ptr) :: a_d, b_d, c_d
1471  integer :: n, ierr
1472  real(kind=rp) :: res
1473 #ifdef HAVE_HIP
1474  res = hip_glsc3(a_d, b_d, c_d, n)
1475 #elif HAVE_CUDA
1476  res = cuda_glsc3(a_d, b_d, c_d, n)
1477 #elif HAVE_OPENCL
1478  res = opencl_glsc3(a_d, b_d, c_d, n)
1479 #else
1480  call neko_error('No device backend configured')
1481 #endif
1482 
1483 #ifndef HAVE_DEVICE_MPI
1484  if (pe_size .gt. 1) then
1485  call mpi_allreduce(mpi_in_place, res, 1, &
1486  mpi_real_precision, mpi_sum, neko_comm, ierr)
1487  end if
1488 #endif
1489  end function device_glsc3
1490 
1491  subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1492  type(c_ptr), value :: w_d, v_d_d, mult_d
1493  integer(c_int) :: j, n
1494  real(c_rp) :: h(j)
1495  integer :: ierr
1496 #ifdef HAVE_HIP
1497  call hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1498 #elif HAVE_CUDA
1499  call cuda_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1500 #elif HAVE_OPENCL
1501  call opencl_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1502 #else
1503  call neko_error('No device backend configured')
1504 #endif
1505 
1506 #ifndef HAVE_DEVICE_MPI
1507  if (pe_size .gt. 1) then
1508  call mpi_allreduce(mpi_in_place, h, j, &
1509  mpi_real_precision, mpi_sum, neko_comm, ierr)
1510  end if
1511 #endif
1512  end subroutine device_glsc3_many
1513 
1514  subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n)
1515  type(c_ptr), value :: y_d, x_d_d, a_d
1516  integer(c_int) :: j, n
1517 #ifdef HAVE_HIP
1518  call hip_add2s2_many(y_d,x_d_d,a_d,j,n)
1519 #elif HAVE_CUDA
1520  call cuda_add2s2_many(y_d,x_d_d,a_d,j,n)
1521 #elif HAVE_OPENCL
1522  call opencl_add2s2_many(y_d,x_d_d,a_d,j,n)
1523 #else
1524  call neko_error('No device backend configured')
1525 #endif
1526  end subroutine device_add2s2_many
1527 
1529  function device_glsc2(a_d, b_d, n) result(res)
1530  type(c_ptr) :: a_d, b_d
1531  integer :: n, ierr
1532  real(kind=rp) :: res
1533 #ifdef HAVE_HIP
1534  res = hip_glsc2(a_d, b_d, n)
1535 #elif HAVE_CUDA
1536  res = cuda_glsc2(a_d, b_d, n)
1537 #elif HAVE_OPENCL
1538  res = opencl_glsc2(a_d, b_d, n)
1539 #else
1540  call neko_error('No device backend configured')
1541 #endif
1542 
1543 #ifndef HAVE_DEVICE_MPI
1544  if (pe_size .gt. 1) then
1545  call mpi_allreduce(mpi_in_place, res, 1, &
1546  mpi_real_precision, mpi_sum, neko_comm, ierr)
1547  end if
1548 #endif
1549  end function device_glsc2
1550 
1552  function device_glsum(a_d, n) result(res)
1553  type(c_ptr) :: a_d
1554  integer :: n, ierr
1555  real(kind=rp) :: res
1556 #ifdef HAVE_HIP
1557  res = hip_glsum(a_d, n)
1558 #elif HAVE_CUDA
1559  res = cuda_glsum(a_d, n)
1560 #elif HAVE_OPENCL
1561  res = opencl_glsum(a_d, n)
1562 #else
1563  call neko_error('No device backend configured')
1564 #endif
1565 
1566 #ifndef HAVE_DEVICE_MPI
1567  if (pe_size .gt. 1) then
1568  call mpi_allreduce(mpi_in_place, res, 1, &
1569  mpi_real_precision, mpi_sum, neko_comm, ierr)
1570  end if
1571 #endif
1572  end function device_glsum
1573 
1574 
1575 end module device_math
void opencl_add3(void *a, void *b, void *c, int *n)
Definition: math.c:298
void opencl_addcol3(void *a, void *b, void *c, int *n)
Definition: math.c:652
void opencl_invcol1(void *a, int *n)
Definition: math.c:469
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.c:799
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.c:413
void opencl_cmult(void *a, real *c, int *n)
Definition: math.c:170
void opencl_sub3(void *a, void *b, void *c, int *n)
Definition: math.c:625
void opencl_rone(void *a, int *n)
Definition: math.c:130
void opencl_cadd(void *a, real *c, int *n)
Definition: math.c:195
void opencl_cmult2(void *a, void *b, real *c, int *n)
Definition: math.c:143
real opencl_glsc3(void *a, void *b, void *c, int *n)
Definition: math.c:745
void opencl_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.c:354
void opencl_rzero(void *a, int *n)
Definition: math.c:117
void opencl_sub2(void *a, void *b, int *n)
Definition: math.c:599
void opencl_col2(void *a, void *b, int *n)
Definition: math.c:519
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.c:679
void opencl_col3(void *a, void *b, void *c, int *n)
Definition: math.c:545
void opencl_subcol3(void *a, void *b, void *c, int *n)
Definition: math.c:572
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.c:440
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n)
Definition: math.c:383
void opencl_invcol2(void *a, void *b, int *n)
Definition: math.c:493
void opencl_cadd2(void *a, void *b, real *c, int *n)
Definition: math.c:220
void opencl_add2(void *a, void *b, int *n)
Definition: math.c:272
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.c:62
void opencl_cfill_mask(void *a, void *c, int *size, void *mask, int *mask_size)
Definition: math.c:90
void opencl_cfill(void *a, real *c, int *n)
Definition: math.c:246
void opencl_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.c:326
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.c:708
real opencl_glsc2(void *a, void *b, int *n)
Definition: math.c:863
real opencl_glsum(void *a, int *n)
Definition: math.c:912
void opencl_copy(void *a, void *b, int *n)
Definition: math.c:53
void cuda_invcol1(void *a, int *n)
Definition: math.cu:279
void cuda_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
Definition: math.cu:229
void cuda_cadd2(void *a, void *b, real *c, int *n)
Definition: math.cu:136
real cuda_vlsc3(void *u, void *v, void *w, int *n)
Definition: math.cu:434
void cuda_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.cu:212
void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.cu:58
void cuda_add3(void *a, void *b, void *c, int *n)
Definition: math.cu:180
void cuda_col2(void *a, void *b, int *n)
Definition: math.cu:307
void cuda_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.cu:507
void cuda_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.cu:409
void cuda_addcol3(void *a, void *b, void *c, int *n)
Definition: math.cu:379
void cuda_subcol3(void *a, void *b, void *c, int *n)
Definition: math.cu:335
void cuda_cmult(void *a, real *c, int *n)
Definition: math.cu:93
void cuda_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.cu:246
void cuda_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.cu:196
real cuda_glsum(void *a, int *n)
Definition: math.cu:590
real cuda_glsc2(void *a, void *b, int *n)
Definition: math.cu:549
void cuda_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.cu:263
void cuda_rzero(void *a, int *n)
Definition: math.cu:85
void cuda_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.cu:394
void cuda_add2(void *a, void *b, int *n)
Definition: math.cu:165
void cuda_copy(void *a, void *b, int *n)
Definition: math.cu:49
void cuda_cfill_mask(void *a, real *c, int *size, int *mask, int *mask_size)
Definition: math.cu:72
void cuda_invcol2(void *a, void *b, int *n)
Definition: math.cu:293
void cuda_col3(void *a, void *b, void *c, int *n)
Definition: math.cu:321
void cuda_cfill(void *a, real *c, int *n)
Definition: math.cu:150
void cuda_cadd(void *a, real *c, int *n)
Definition: math.cu:121
void cuda_sub2(void *a, void *b, int *n)
Definition: math.cu:350
real cuda_glsc3(void *a, void *b, void *c, int *n)
Definition: math.cu:468
void cuda_cmult2(void *a, void *b, real *c, int *n)
Definition: math.cu:107
void cuda_sub3(void *a, void *b, void *c, int *n)
Definition: math.cu:364
Definition: comm.F90:1
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Utilities.
Definition: utils.f90:35