Neko  0.8.1
A portable framework for high-order spectral element flow simulations
device_math.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2023, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use comm
35  use utils, only : neko_error
36  use num_types, only : rp, c_rp
37  use, intrinsic :: iso_c_binding
38  implicit none
39  private
40 
41 #ifdef HAVE_HIP
42  interface
43  subroutine hip_copy(a_d, b_d, n) &
44  bind(c, name='hip_copy')
45  use, intrinsic :: iso_c_binding
46  type(c_ptr), value :: a_d, b_d
47  integer(c_int) :: n
48  end subroutine hip_copy
49  end interface
50 
51  interface
52  subroutine hip_masked_copy(a_d, b_d, mask_d, n, m) &
53  bind(c, name='hip_masked_copy')
54  use, intrinsic :: iso_c_binding
55  type(c_ptr), value :: a_d, b_d, mask_d
56  integer(c_int) :: n, m
57  end subroutine hip_masked_copy
58  end interface
59 
60  interface
61  subroutine hip_cmult(a_d, c, n) &
62  bind(c, name='hip_cmult')
63  use, intrinsic :: iso_c_binding
64  import c_rp
65  type(c_ptr), value :: a_d
66  real(c_rp) :: c
67  integer(c_int) :: n
68  end subroutine hip_cmult
69  end interface
70 
71  interface
72  subroutine hip_cmult2(a_d, b_d, c, n) &
73  bind(c, name='hip_cmult2')
74  use, intrinsic :: iso_c_binding
75  import c_rp
76  type(c_ptr), value :: a_d, b_d
77  real(c_rp) :: c
78  integer(c_int) :: n
79  end subroutine hip_cmult2
80  end interface
81 
82  interface
83  subroutine hip_cadd(a_d, c, n) &
84  bind(c, name='hip_cadd')
85  use, intrinsic :: iso_c_binding
86  import c_rp
87  type(c_ptr), value :: a_d
88  real(c_rp) :: c
89  integer(c_int) :: n
90  end subroutine hip_cadd
91  end interface
92 
93  interface
94  subroutine hip_cfill(a_d, c, n) &
95  bind(c, name='hip_cfill')
96  use, intrinsic :: iso_c_binding
97  import c_rp
98  type(c_ptr), value :: a_d
99  real(c_rp) :: c
100  integer(c_int) :: n
101  end subroutine hip_cfill
102  end interface
103 
104  interface
105  subroutine hip_rzero(a_d, n) &
106  bind(c, name='hip_rzero')
107  use, intrinsic :: iso_c_binding
108  type(c_ptr), value :: a_d
109  integer(c_int) :: n
110  end subroutine hip_rzero
111  end interface
112 
113  interface
114  subroutine hip_add2(a_d, b_d, n) &
115  bind(c, name='hip_add2')
116  use, intrinsic :: iso_c_binding
117  import c_rp
118  implicit none
119  type(c_ptr), value :: a_d, b_d
120  integer(c_int) :: n
121  end subroutine hip_add2
122  end interface
123 
124  interface
125  subroutine hip_add2s1(a_d, b_d, c1, n) &
126  bind(c, name='hip_add2s1')
127  use, intrinsic :: iso_c_binding
128  import c_rp
129  implicit none
130  type(c_ptr), value :: a_d, b_d
131  real(c_rp) :: c1
132  integer(c_int) :: n
133  end subroutine hip_add2s1
134  end interface
135 
136  interface
137  subroutine hip_add2s2(a_d, b_d, c1, n) &
138  bind(c, name='hip_add2s2')
139  use, intrinsic :: iso_c_binding
140  import c_rp
141  implicit none
142  type(c_ptr), value :: a_d, b_d
143  real(c_rp) :: c1
144  integer(c_int) :: n
145  end subroutine hip_add2s2
146  end interface
147 
148  interface
149  subroutine hip_add2s2_many(y_d,x_d_d,a_d,j,n) &
150  bind(c, name='hip_add2s2_many')
151  use, intrinsic :: iso_c_binding
152  import c_rp
153  implicit none
154  type(c_ptr), value :: y_d, x_d_d, a_d
155  integer(c_int) :: j, n
156  end subroutine hip_add2s2_many
157  end interface
158 
159  interface
160  subroutine hip_addsqr2s2(a_d, b_d, c1, n) &
161  bind(c, name='hip_addsqr2s2')
162  use, intrinsic :: iso_c_binding
163  import c_rp
164  implicit none
165  type(c_ptr), value :: a_d, b_d
166  real(c_rp) :: c1
167  integer(c_int) :: n
168  end subroutine hip_addsqr2s2
169  end interface
170 
171  interface
172  subroutine hip_add3s2(a_d, b_d, c_d, c1, c2, n) &
173  bind(c, name='hip_add3s2')
174  use, intrinsic :: iso_c_binding
175  import c_rp
176  implicit none
177  type(c_ptr), value :: a_d, b_d, c_d
178  real(c_rp) :: c1, c2
179  integer(c_int) :: n
180  end subroutine hip_add3s2
181  end interface
182 
183  interface
184  subroutine hip_invcol1(a_d, n) &
185  bind(c, name='hip_invcol1')
186  use, intrinsic :: iso_c_binding
187  implicit none
188  type(c_ptr), value :: a_d
189  integer(c_int) :: n
190  end subroutine hip_invcol1
191  end interface
192 
193  interface
194  subroutine hip_invcol2(a_d, b_d, n) &
195  bind(c, name='hip_invcol2')
196  use, intrinsic :: iso_c_binding
197  implicit none
198  type(c_ptr), value :: a_d, b_d
199  integer(c_int) :: n
200  end subroutine hip_invcol2
201  end interface
202 
203  interface
204  subroutine hip_col2(a_d, b_d, n) &
205  bind(c, name='hip_col2')
206  use, intrinsic :: iso_c_binding
207  implicit none
208  type(c_ptr), value :: a_d, b_d
209  integer(c_int) :: n
210  end subroutine hip_col2
211  end interface
212 
213  interface
214  subroutine hip_col3(a_d, b_d, c_d, n) &
215  bind(c, name='hip_col3')
216  use, intrinsic :: iso_c_binding
217  implicit none
218  type(c_ptr), value :: a_d, b_d, c_d
219  integer(c_int) :: n
220  end subroutine hip_col3
221  end interface
222 
223  interface
224  subroutine hip_subcol3(a_d, b_d, c_d, n) &
225  bind(c, name='hip_subcol3')
226  use, intrinsic :: iso_c_binding
227  implicit none
228  type(c_ptr), value :: a_d, b_d, c_d
229  integer(c_int) :: n
230  end subroutine hip_subcol3
231  end interface
232 
233  interface
234  subroutine hip_sub2(a_d, b_d, n) &
235  bind(c, name='hip_sub2')
236  use, intrinsic :: iso_c_binding
237  implicit none
238  type(c_ptr), value :: a_d, b_d
239  integer(c_int) :: n
240  end subroutine hip_sub2
241  end interface
242 
243  interface
244  subroutine hip_sub3(a_d, b_d, c_d, n) &
245  bind(c, name='hip_sub3')
246  use, intrinsic :: iso_c_binding
247  implicit none
248  type(c_ptr), value :: a_d, b_d, c_d
249  integer(c_int) :: n
250  end subroutine hip_sub3
251  end interface
252 
253  interface
254  subroutine hip_addcol3(a_d, b_d, c_d, n) &
255  bind(c, name='hip_addcol3')
256  use, intrinsic :: iso_c_binding
257  implicit none
258  type(c_ptr), value :: a_d, b_d, c_d
259  integer(c_int) :: n
260  end subroutine hip_addcol3
261  end interface
262 
263  interface
264  subroutine hip_addcol4(a_d, b_d, c_d, d_d, n) &
265  bind(c, name='hip_addcol4')
266  use, intrinsic :: iso_c_binding
267  implicit none
268  type(c_ptr), value :: a_d, b_d, c_d, d_d
269  integer(c_int) :: n
270  end subroutine hip_addcol4
271  end interface
272 
273  interface
274  subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
275  bind(c, name='hip_vdot3')
276  use, intrinsic :: iso_c_binding
277  implicit none
278  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
279  integer(c_int) :: n
280  end subroutine hip_vdot3
281  end interface
282 
283  interface
284  real(c_rp) function hip_vlsc3(u_d, v_d, w_d, n) &
285  bind(c, name='hip_vlsc3')
286  use, intrinsic :: iso_c_binding
287  import c_rp
288  implicit none
289  type(c_ptr), value :: u_d, v_d, w_d
290  integer(c_int) :: n
291  end function hip_vlsc3
292  end interface
293 
294  interface
295  real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) &
296  bind(c, name='hip_glsc3')
297  use, intrinsic :: iso_c_binding
298  import c_rp
299  implicit none
300  type(c_ptr), value :: a_d, b_d, c_d
301  integer(c_int) :: n
302  end function hip_glsc3
303  end interface
304 
305  interface
306  subroutine hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n) &
307  bind(c, name='hip_glsc3_many')
308  use, intrinsic :: iso_c_binding
309  import c_rp
310  implicit none
311  type(c_ptr), value :: w_d, v_d_d, mult_d
312  integer(c_int) :: j, n
313  real(c_rp) :: h(j)
314  end subroutine hip_glsc3_many
315  end interface
316 
317  interface
318  real(c_rp) function hip_glsc2(a_d, b_d, n) &
319  bind(c, name='hip_glsc2')
320  use, intrinsic :: iso_c_binding
321  import c_rp
322  implicit none
323  type(c_ptr), value :: a_d, b_d
324  integer(c_int) :: n
325  end function hip_glsc2
326  end interface
327 
328  interface
329  real(c_rp) function hip_glsum(a_d, n) &
330  bind(c, name='hip_glsum')
331  use, intrinsic :: iso_c_binding
332  import c_rp
333  implicit none
334  type(c_ptr), value :: a_d
335  integer(c_int) :: n
336  end function hip_glsum
337  end interface
338 #elif HAVE_CUDA
339  interface
340  subroutine cuda_copy(a_d, b_d, n) &
341  bind(c, name='cuda_copy')
342  use, intrinsic :: iso_c_binding
343  type(c_ptr), value :: a_d, b_d
344  integer(c_int) :: n
345  end subroutine cuda_copy
346  end interface
347  interface
348  subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
349  bind(c, name='cuda_masked_copy')
350  use, intrinsic :: iso_c_binding
351  type(c_ptr), value :: a_d, b_d, mask_d
352  integer(c_int) :: n, m
353  end subroutine cuda_masked_copy
354  end interface
355  interface
356  subroutine cuda_cmult(a_d, c, n) &
357  bind(c, name='cuda_cmult')
358  use, intrinsic :: iso_c_binding
359  import c_rp
360  type(c_ptr), value :: a_d
361  real(c_rp) :: c
362  integer(c_int) :: n
363  end subroutine cuda_cmult
364  end interface
365 
366  interface
367  subroutine cuda_cmult2(a_d, b_d, c, n) &
368  bind(c, name='cuda_cmult2')
369  use, intrinsic :: iso_c_binding
370  import c_rp
371  type(c_ptr), value :: a_d, b_d
372  real(c_rp) :: c
373  integer(c_int) :: n
374  end subroutine cuda_cmult2
375  end interface
376 
377 
378  interface
379  subroutine cuda_cadd(a_d, c, n) &
380  bind(c, name='cuda_cadd')
381  use, intrinsic :: iso_c_binding
382  import c_rp
383  type(c_ptr), value :: a_d
384  real(c_rp) :: c
385  integer(c_int) :: n
386  end subroutine cuda_cadd
387  end interface
388 
389  interface
390  subroutine cuda_cfill(a_d, c, n) &
391  bind(c, name='cuda_cfill')
392  use, intrinsic :: iso_c_binding
393  import c_rp
394  type(c_ptr), value :: a_d
395  real(c_rp) :: c
396  integer(c_int) :: n
397  end subroutine cuda_cfill
398  end interface
399 
400  interface
401  subroutine cuda_rzero(a_d, n) &
402  bind(c, name='cuda_rzero')
403  use, intrinsic :: iso_c_binding
404  type(c_ptr), value :: a_d
405  integer(c_int) :: n
406  end subroutine cuda_rzero
407  end interface
408 
409  interface
410  subroutine cuda_add2(a_d, b_d, n) &
411  bind(c, name='cuda_add2')
412  use, intrinsic :: iso_c_binding
413  import c_rp
414  implicit none
415  type(c_ptr), value :: a_d, b_d
416  integer(c_int) :: n
417  end subroutine cuda_add2
418  end interface
419 
420  interface
421  subroutine cuda_add2s1(a_d, b_d, c1, n) &
422  bind(c, name='cuda_add2s1')
423  use, intrinsic :: iso_c_binding
424  import c_rp
425  implicit none
426  type(c_ptr), value :: a_d, b_d
427  real(c_rp) :: c1
428  integer(c_int) :: n
429  end subroutine cuda_add2s1
430  end interface
431 
432  interface
433  subroutine cuda_add2s2(a_d, b_d, c1, n) &
434  bind(c, name='cuda_add2s2')
435  use, intrinsic :: iso_c_binding
436  import c_rp
437  implicit none
438  type(c_ptr), value :: a_d, b_d
439  real(c_rp) :: c1
440  integer(c_int) :: n
441  end subroutine cuda_add2s2
442  end interface
443 
444  interface
445  subroutine cuda_addsqr2s2(a_d, b_d, c1, n) &
446  bind(c, name='cuda_addsqr2s2')
447  use, intrinsic :: iso_c_binding
448  import c_rp
449  implicit none
450  type(c_ptr), value :: a_d, b_d
451  real(c_rp) :: c1
452  integer(c_int) :: n
453  end subroutine cuda_addsqr2s2
454  end interface
455 
456  interface
457  subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
458  bind(c, name='cuda_add3s2')
459  use, intrinsic :: iso_c_binding
460  import c_rp
461  implicit none
462  type(c_ptr), value :: a_d, b_d, c_d
463  real(c_rp) :: c1, c2
464  integer(c_int) :: n
465  end subroutine cuda_add3s2
466  end interface
467 
468  interface
469  subroutine cuda_invcol1(a_d, n) &
470  bind(c, name='cuda_invcol1')
471  use, intrinsic :: iso_c_binding
472  implicit none
473  type(c_ptr), value :: a_d
474  integer(c_int) :: n
475  end subroutine cuda_invcol1
476  end interface
477 
478  interface
479  subroutine cuda_invcol2(a_d, b_d, n) &
480  bind(c, name='cuda_invcol2')
481  use, intrinsic :: iso_c_binding
482  implicit none
483  type(c_ptr), value :: a_d, b_d
484  integer(c_int) :: n
485  end subroutine cuda_invcol2
486  end interface
487 
488  interface
489  subroutine cuda_col2(a_d, b_d, n) &
490  bind(c, name='cuda_col2')
491  use, intrinsic :: iso_c_binding
492  implicit none
493  type(c_ptr), value :: a_d, b_d
494  integer(c_int) :: n
495  end subroutine cuda_col2
496  end interface
497 
498  interface
499  subroutine cuda_col3(a_d, b_d, c_d, n) &
500  bind(c, name='cuda_col3')
501  use, intrinsic :: iso_c_binding
502  implicit none
503  type(c_ptr), value :: a_d, b_d, c_d
504  integer(c_int) :: n
505  end subroutine cuda_col3
506  end interface
507 
508  interface
509  subroutine cuda_subcol3(a_d, b_d, c_d, n) &
510  bind(c, name='cuda_subcol3')
511  use, intrinsic :: iso_c_binding
512  implicit none
513  type(c_ptr), value :: a_d, b_d, c_d
514  integer(c_int) :: n
515  end subroutine cuda_subcol3
516  end interface
517 
518  interface
519  subroutine cuda_sub2(a_d, b_d, n) &
520  bind(c, name='cuda_sub2')
521  use, intrinsic :: iso_c_binding
522  implicit none
523  type(c_ptr), value :: a_d, b_d
524  integer(c_int) :: n
525  end subroutine cuda_sub2
526  end interface
527 
528  interface
529  subroutine cuda_sub3(a_d, b_d, c_d, n) &
530  bind(c, name='cuda_sub3')
531  use, intrinsic :: iso_c_binding
532  implicit none
533  type(c_ptr), value :: a_d, b_d, c_d
534  integer(c_int) :: n
535  end subroutine cuda_sub3
536  end interface
537 
538  interface
539  subroutine cuda_addcol3(a_d, b_d, c_d, n) &
540  bind(c, name='cuda_addcol3')
541  use, intrinsic :: iso_c_binding
542  implicit none
543  type(c_ptr), value :: a_d, b_d, c_d
544  integer(c_int) :: n
545  end subroutine cuda_addcol3
546  end interface
547 
548  interface
549  subroutine cuda_addcol4(a_d, b_d, c_d, d_d, n) &
550  bind(c, name='cuda_addcol4')
551  use, intrinsic :: iso_c_binding
552  implicit none
553  type(c_ptr), value :: a_d, b_d, c_d, d_d
554  integer(c_int) :: n
555  end subroutine cuda_addcol4
556  end interface
557 
558  interface
559  subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
560  bind(c, name='cuda_vdot3')
561  use, intrinsic :: iso_c_binding
562  implicit none
563  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
564  integer(c_int) :: n
565  end subroutine cuda_vdot3
566  end interface
567 
568  interface
569  real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
570  bind(c, name='cuda_vlsc3')
571  use, intrinsic :: iso_c_binding
572  import c_rp
573  implicit none
574  type(c_ptr), value :: u_d, v_d, w_d
575  integer(c_int) :: n
576  end function cuda_vlsc3
577  end interface
578 
579  interface
580  subroutine cuda_add2s2_many(y_d,x_d_d,a_d,j,n) &
581  bind(c, name='cuda_add2s2_many')
582  use, intrinsic :: iso_c_binding
583  import c_rp
584  implicit none
585  type(c_ptr), value :: y_d, x_d_d, a_d
586  integer(c_int) :: j, n
587  end subroutine cuda_add2s2_many
588  end interface
589 
590  interface
591  real(c_rp) function cuda_glsc3(a_d, b_d, c_d, n) &
592  bind(c, name='cuda_glsc3')
593  use, intrinsic :: iso_c_binding
594  import c_rp
595  implicit none
596  type(c_ptr), value :: a_d, b_d, c_d
597  integer(c_int) :: n
598  end function cuda_glsc3
599  end interface
600  interface
601  subroutine cuda_glsc3_many(h,w_d,v_d_d,mult_d,j,n) &
602  bind(c, name='cuda_glsc3_many')
603  use, intrinsic :: iso_c_binding
604  import c_rp
605  implicit none
606  type(c_ptr), value :: w_d, v_d_d, mult_d
607  integer(c_int) :: j, n
608  real(c_rp) :: h(j)
609  end subroutine cuda_glsc3_many
610  end interface
611 
612  interface
613  real(c_rp) function cuda_glsc2(a_d, b_d, n) &
614  bind(c, name='cuda_glsc2')
615  use, intrinsic :: iso_c_binding
616  import c_rp
617  implicit none
618  type(c_ptr), value :: a_d, b_d
619  integer(c_int) :: n
620  end function cuda_glsc2
621  end interface
622 
623  interface
624  real(c_rp) function cuda_glsum(a_d, n) &
625  bind(c, name='cuda_glsum')
626  use, intrinsic :: iso_c_binding
627  import c_rp
628  implicit none
629  type(c_ptr), value :: a_d
630  integer(c_int) :: n
631  end function cuda_glsum
632  end interface
633 #elif HAVE_OPENCL
634  interface
635  subroutine opencl_copy(a_d, b_d, n) &
636  bind(c, name='opencl_copy')
637  use, intrinsic :: iso_c_binding
638  type(c_ptr), value :: a_d, b_d
639  integer(c_int) :: n
640  end subroutine opencl_copy
641  end interface
642 
643  interface
644  subroutine opencl_masked_copy(a_d, b_d, mask_d, n, m) &
645  bind(c, name='opencl_masked_copy')
646  use, intrinsic :: iso_c_binding
647  type(c_ptr), value :: a_d, b_d, mask_d
648  integer(c_int) :: n, m
649  end subroutine opencl_masked_copy
650  end interface
651 
652  interface
653  subroutine opencl_cmult(a_d, c, n) &
654  bind(c, name='opencl_cmult')
655  use, intrinsic :: iso_c_binding
656  import c_rp
657  type(c_ptr), value :: a_d
658  real(c_rp) :: c
659  integer(c_int) :: n
660  end subroutine opencl_cmult
661  end interface
662 
663  interface
664  subroutine opencl_cmult2(a_d, b_d, c, n) &
665  bind(c, name='opencl_cmult2')
666  use, intrinsic :: iso_c_binding
667  import c_rp
668  type(c_ptr), value :: a_d, b_d
669  real(c_rp) :: c
670  integer(c_int) :: n
671  end subroutine opencl_cmult2
672  end interface
673  interface
674  subroutine opencl_cadd(a_d, c, n) &
675  bind(c, name='opencl_cadd')
676  use, intrinsic :: iso_c_binding
677  import c_rp
678  type(c_ptr), value :: a_d
679  real(c_rp) :: c
680  integer(c_int) :: n
681  end subroutine opencl_cadd
682  end interface
683 
684  interface
685  subroutine opencl_cfill(a_d, c, n) &
686  bind(c, name='opencl_cfill')
687  use, intrinsic :: iso_c_binding
688  import c_rp
689  type(c_ptr), value :: a_d
690  real(c_rp) :: c
691  integer(c_int) :: n
692  end subroutine opencl_cfill
693  end interface
694 
695  interface
696  subroutine opencl_rzero(a_d, n) &
697  bind(c, name='opencl_rzero')
698  use, intrinsic :: iso_c_binding
699  type(c_ptr), value :: a_d
700  integer(c_int) :: n
701  end subroutine opencl_rzero
702  end interface
703 
704  interface
705  subroutine opencl_rone(a_d, n) &
706  bind(c, name='opencl_rone')
707  use, intrinsic :: iso_c_binding
708  type(c_ptr), value :: a_d
709  integer(c_int) :: n
710  end subroutine opencl_rone
711  end interface
712 
713  interface
714  subroutine opencl_add2(a_d, b_d, n) &
715  bind(c, name='opencl_add2')
716  use, intrinsic :: iso_c_binding
717  implicit none
718  type(c_ptr), value :: a_d, b_d
719  integer(c_int) :: n
720  end subroutine opencl_add2
721  end interface
722 
723  interface
724  subroutine opencl_add2s1(a_d, b_d, c1, n) &
725  bind(c, name='opencl_add2s1')
726  use, intrinsic :: iso_c_binding
727  import c_rp
728  implicit none
729  type(c_ptr), value :: a_d, b_d
730  real(c_rp) :: c1
731  integer(c_int) :: n
732  end subroutine opencl_add2s1
733  end interface
734 
735  interface
736  subroutine opencl_add2s2(a_d, b_d, c1, n) &
737  bind(c, name='opencl_add2s2')
738  use, intrinsic :: iso_c_binding
739  import c_rp
740  implicit none
741  type(c_ptr), value :: a_d, b_d
742  real(c_rp) :: c1
743  integer(c_int) :: n
744  end subroutine opencl_add2s2
745  end interface
746 
747  interface
748  subroutine opencl_add2s2_many(y_d,x_d_d,a_d,j,n) &
749  bind(c, name='opencl_add2s2_many')
750  use, intrinsic :: iso_c_binding
751  import c_rp
752  implicit none
753  type(c_ptr), value :: y_d, x_d_d, a_d
754  integer(c_int) :: j, n
755  end subroutine opencl_add2s2_many
756  end interface
757 
758  interface
759  subroutine opencl_addsqr2s2(a_d, b_d, c1, n) &
760  bind(c, name='opencl_addsqr2s2')
761  use, intrinsic :: iso_c_binding
762  import c_rp
763  implicit none
764  type(c_ptr), value :: a_d, b_d
765  real(c_rp) :: c1
766  integer(c_int) :: n
767  end subroutine opencl_addsqr2s2
768  end interface
769 
770  interface
771  subroutine opencl_add3s2(a_d, b_d, c_d, c1, c2, n) &
772  bind(c, name='opencl_add3s2')
773  use, intrinsic :: iso_c_binding
774  import c_rp
775  implicit none
776  type(c_ptr), value :: a_d, b_d, c_d
777  real(c_rp) :: c1, c2
778  integer(c_int) :: n
779  end subroutine opencl_add3s2
780  end interface
781 
782  interface
783  subroutine opencl_invcol1(a_d, n) &
784  bind(c, name='opencl_invcol1')
785  use, intrinsic :: iso_c_binding
786  implicit none
787  type(c_ptr), value :: a_d
788  integer(c_int) :: n
789  end subroutine opencl_invcol1
790  end interface
791 
792  interface
793  subroutine opencl_invcol2(a_d, b_d, n) &
794  bind(c, name='opencl_invcol2')
795  use, intrinsic :: iso_c_binding
796  implicit none
797  type(c_ptr), value :: a_d, b_d
798  integer(c_int) :: n
799  end subroutine opencl_invcol2
800  end interface
801 
802  interface
803  subroutine opencl_col2(a_d, b_d, n) &
804  bind(c, name='opencl_col2')
805  use, intrinsic :: iso_c_binding
806  implicit none
807  type(c_ptr), value :: a_d, b_d
808  integer(c_int) :: n
809  end subroutine opencl_col2
810  end interface
811 
812  interface
813  subroutine opencl_col3(a_d, b_d, c_d, n) &
814  bind(c, name='opencl_col3')
815  use, intrinsic :: iso_c_binding
816  implicit none
817  type(c_ptr), value :: a_d, b_d, c_d
818  integer(c_int) :: n
819  end subroutine opencl_col3
820  end interface
821 
822  interface
823  subroutine opencl_subcol3(a_d, b_d, c_d, n) &
824  bind(c, name='opencl_subcol3')
825  use, intrinsic :: iso_c_binding
826  implicit none
827  type(c_ptr), value :: a_d, b_d, c_d
828  integer(c_int) :: n
829  end subroutine opencl_subcol3
830  end interface
831 
832  interface
833  subroutine opencl_sub2(a_d, b_d, n) &
834  bind(c, name='opencl_sub2')
835  use, intrinsic :: iso_c_binding
836  implicit none
837  type(c_ptr), value :: a_d, b_d
838  integer(c_int) :: n
839  end subroutine opencl_sub2
840  end interface
841 
842  interface
843  subroutine opencl_sub3(a_d, b_d, c_d, n) &
844  bind(c, name='opencl_sub3')
845  use, intrinsic :: iso_c_binding
846  implicit none
847  type(c_ptr), value :: a_d, b_d, c_d
848  integer(c_int) :: n
849  end subroutine opencl_sub3
850  end interface
851 
852  interface
853  subroutine opencl_addcol3(a_d, b_d, c_d, n) &
854  bind(c, name='opencl_addcol3')
855  use, intrinsic :: iso_c_binding
856  implicit none
857  type(c_ptr), value :: a_d, b_d, c_d
858  integer(c_int) :: n
859  end subroutine opencl_addcol3
860  end interface
861 
862  interface
863  subroutine opencl_addcol4(a_d, b_d, c_d, d_d, n) &
864  bind(c, name='opencl_addcol4')
865  use, intrinsic :: iso_c_binding
866  implicit none
867  type(c_ptr), value :: a_d, b_d, c_d, d_d
868  integer(c_int) :: n
869  end subroutine opencl_addcol4
870  end interface
871 
872  interface
873  subroutine opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
874  bind(c, name='opencl_vdot3')
875  use, intrinsic :: iso_c_binding
876  implicit none
877  type(c_ptr), value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
878  integer(c_int) :: n
879  end subroutine opencl_vdot3
880  end interface
881 
882  interface
883  real(c_rp) function opencl_glsc3(a_d, b_d, c_d, n) &
884  bind(c, name='opencl_glsc3')
885  use, intrinsic :: iso_c_binding
886  import c_rp
887  implicit none
888  type(c_ptr), value :: a_d, b_d, c_d
889  integer(c_int) :: n
890  end function opencl_glsc3
891  end interface
892 
893  interface
894  subroutine opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n) &
895  bind(c, name='opencl_glsc3_many')
896  use, intrinsic :: iso_c_binding
897  import c_rp
898  implicit none
899  integer(c_int) :: j, n
900  type(c_ptr), value :: w_d, v_d_d, mult_d
901  real(c_rp) :: h(j)
902  end subroutine opencl_glsc3_many
903  end interface
904 
905  interface
906  real(c_rp) function opencl_glsc2(a_d, b_d, n) &
907  bind(c, name='opencl_glsc2')
908  use, intrinsic :: iso_c_binding
909  import c_rp
910  implicit none
911  type(c_ptr), value :: a_d, b_d
912  integer(c_int) :: n
913  end function opencl_glsc2
914  end interface
915 
916  interface
917  real(c_rp) function opencl_glsum(a_d, n) &
918  bind(c, name='opencl_glsum')
919  use, intrinsic :: iso_c_binding
920  import c_rp
921  implicit none
922  type(c_ptr), value :: a_d
923  integer(c_int) :: n
924  end function opencl_glsum
925  end interface
926 #endif
927 
935 
936 contains
937 
938  subroutine device_copy(a_d, b_d, n)
939  type(c_ptr) :: a_d, b_d
940  integer :: n
941 #ifdef HAVE_HIP
942  call hip_copy(a_d, b_d, n)
943 #elif HAVE_CUDA
944  call cuda_copy(a_d, b_d, n)
945 #elif HAVE_OPENCL
946  call opencl_copy(a_d, b_d, n)
947 #else
948  call neko_error('no device backend configured')
949 #endif
950  end subroutine device_copy
951 
952  subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
953  type(c_ptr) :: a_d, b_d, mask_d
954  integer :: n, m
955 #ifdef HAVE_HIP
956  call hip_masked_copy(a_d, b_d, mask_d, n, m)
957 #elif HAVE_CUDA
958  call cuda_masked_copy(a_d, b_d, mask_d, n, m)
959 #elif HAVE_OPENCL
960  call opencl_masked_copy(a_d, b_d, mask_d, n, m)
961 #else
962  call neko_error('no device backend configured')
963 #endif
964  end subroutine device_masked_copy
965 
966 
967  subroutine device_rzero(a_d, n)
968  type(c_ptr) :: a_d
969  integer :: n
970 #ifdef HAVE_HIP
971  call hip_rzero(a_d, n)
972 #elif HAVE_CUDA
973  call cuda_rzero(a_d, n)
974 #elif HAVE_OPENCL
975  call opencl_rzero(a_d, n)
976 #else
977  call neko_error('No device backend configured')
978 #endif
979  end subroutine device_rzero
980 
981  subroutine device_rone(a_d, n)
982  type(c_ptr) :: a_d
983  integer :: n
984  real(kind=rp) :: one = 1.0_rp
985 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
986  call device_cfill(a_d, one, n)
987 #elif HAVE_OPENCL
988  call opencl_rone(a_d, n)
989 #else
990  call neko_error('No device backend configured')
991 #endif
992  end subroutine device_rone
993 
994  subroutine device_cmult(a_d, c, n)
995  type(c_ptr) :: a_d
996  real(kind=rp), intent(in) :: c
997  integer :: n
998 #ifdef HAVE_HIP
999  call hip_cmult(a_d, c, n)
1000 #elif HAVE_CUDA
1001  call cuda_cmult(a_d, c, n)
1002 #elif HAVE_OPENCL
1003  call opencl_cmult(a_d, c, n)
1004 #else
1005  call neko_error('No device backend configured')
1006 #endif
1007  end subroutine device_cmult
1008 
1009  subroutine device_cmult2(a_d, b_d, c, n)
1010  type(c_ptr) :: a_d, b_d
1011  real(kind=rp), intent(in) :: c
1012  integer :: n
1013 #ifdef HAVE_HIP
1014  call hip_cmult2(a_d, b_d, c, n)
1015 #elif HAVE_CUDA
1016  call cuda_cmult2(a_d, b_d, c, n)
1017 #elif HAVE_OPENCL
1018  call opencl_cmult2(a_d, b_d, c, n)
1019 #else
1020  call neko_error('No device backend configured')
1021 #endif
1022  end subroutine device_cmult2
1023 
1024 
1025  subroutine device_cadd(a_d, c, n)
1026  type(c_ptr) :: a_d
1027  real(kind=rp), intent(in) :: c
1028  integer :: n
1029 #ifdef HAVE_HIP
1030  call hip_cadd(a_d, c, n)
1031 #elif HAVE_CUDA
1032  call cuda_cadd(a_d, c, n)
1033 #elif HAVE_OPENCL
1034  call opencl_cadd(a_d, c, n)
1035 #else
1036  call neko_error('No device backend configured')
1037 #endif
1038  end subroutine device_cadd
1039 
1040  subroutine device_cfill(a_d, c, n)
1041  type(c_ptr) :: a_d
1042  real(kind=rp), intent(in) :: c
1043  integer :: n
1044 #ifdef HAVE_HIP
1045  call hip_cfill(a_d, c, n)
1046 #elif HAVE_CUDA
1047  call cuda_cfill(a_d, c, n)
1048 #elif HAVE_OPENCL
1049  call opencl_cfill(a_d, c, n)
1050 #else
1051  call neko_error('No device backend configured')
1052 #endif
1053  end subroutine device_cfill
1054 
1055  subroutine device_add2(a_d, b_d, n)
1056  type(c_ptr) :: a_d, b_d
1057  integer :: n
1058 #ifdef HAVE_HIP
1059  call hip_add2(a_d, b_d, n)
1060 #elif HAVE_CUDA
1061  call cuda_add2(a_d, b_d, n)
1062 #elif HAVE_OPENCL
1063  call opencl_add2(a_d, b_d, n)
1064 #else
1065  call neko_error('No device backend configured')
1066 #endif
1067  end subroutine device_add2
1068 
1069  subroutine device_add2s1(a_d, b_d, c1, n)
1070  type(c_ptr) :: a_d, b_d
1071  real(kind=rp) :: c1
1072  integer :: n
1073 #ifdef HAVE_HIP
1074  call hip_add2s1(a_d, b_d, c1, n)
1075 #elif HAVE_CUDA
1076  call cuda_add2s1(a_d, b_d, c1, n)
1077 #elif HAVE_OPENCL
1078  call opencl_add2s1(a_d, b_d, c1, n)
1079 #else
1080  call neko_error('No device backend configured')
1081 #endif
1082  end subroutine device_add2s1
1083 
1084  subroutine device_add2s2(a_d, b_d, c1, n)
1085  type(c_ptr) :: a_d, b_d
1086  real(kind=rp) :: c1
1087  integer :: n
1088 #ifdef HAVE_HIP
1089  call hip_add2s2(a_d, b_d, c1, n)
1090 #elif HAVE_CUDA
1091  call cuda_add2s2(a_d, b_d, c1, n)
1092 #elif HAVE_OPENCL
1093  call opencl_add2s2(a_d, b_d, c1, n)
1094 #else
1095  call neko_error('No device backend configured')
1096 #endif
1097  end subroutine device_add2s2
1098 
1099  subroutine device_addsqr2s2(a_d, b_d, c1, n)
1100  type(c_ptr) :: a_d, b_d
1101  real(kind=rp) :: c1
1102  integer :: n
1103 #ifdef HAVE_HIP
1104  call hip_addsqr2s2(a_d, b_d, c1, n)
1105 #elif HAVE_CUDA
1106  call cuda_addsqr2s2(a_d, b_d, c1, n)
1107 #elif HAVE_OPENCL
1108  call opencl_addsqr2s2(a_d, b_d, c1, n)
1109 #else
1110  call neko_error('No device backend configured')
1111 #endif
1112  end subroutine device_addsqr2s2
1113 
1114  subroutine device_add3s2(a_d, b_d, c_d, c1, c2 ,n)
1115  type(c_ptr) :: a_d, b_d, c_d
1116  real(kind=rp) :: c1, c2
1117  integer :: n
1118 #ifdef HAVE_HIP
1119  call hip_add3s2(a_d, b_d, c_d, c1, c2, n)
1120 #elif HAVE_CUDA
1121  call cuda_add3s2(a_d, b_d, c_d, c1, c2, n)
1122 #elif HAVE_OPENCL
1123  call opencl_add3s2(a_d, b_d, c_d, c1, c2, n)
1124 #else
1125  call neko_error('No device backend configured')
1126 #endif
1127  end subroutine device_add3s2
1128 
1129  subroutine device_invcol1(a_d, n)
1130  type(c_ptr) :: a_d
1131  integer :: n
1132 #ifdef HAVE_HIP
1133  call hip_invcol1(a_d, n)
1134 #elif HAVE_CUDA
1135  call cuda_invcol1(a_d, n)
1136 #elif HAVE_OPENCL
1137  call opencl_invcol1(a_d, n)
1138 #else
1139  call neko_error('No device backend configured')
1140 #endif
1141  end subroutine device_invcol1
1142 
1143  subroutine device_invcol2(a_d, b_d, n)
1144  type(c_ptr) :: a_d, b_d
1145  integer :: n
1146 #ifdef HAVE_HIP
1147  call hip_invcol2(a_d, b_d, n)
1148 #elif HAVE_CUDA
1149  call cuda_invcol2(a_d, b_d, n)
1150 #elif HAVE_OPENCL
1151  call opencl_invcol2(a_d, b_d, n)
1152 #else
1153  call neko_error('No device backend configured')
1154 #endif
1155  end subroutine device_invcol2
1156 
1157  subroutine device_col2(a_d, b_d, n)
1158  type(c_ptr) :: a_d, b_d
1159  integer :: n
1160 #ifdef HAVE_HIP
1161  call hip_col2(a_d, b_d, n)
1162 #elif HAVE_CUDA
1163  call cuda_col2(a_d, b_d, n)
1164 #elif HAVE_OPENCL
1165  call opencl_col2(a_d, b_d, n)
1166 #else
1167  call neko_error('No device backend configured')
1168 #endif
1169  end subroutine device_col2
1170 
1171  subroutine device_col3(a_d, b_d, c_d, n)
1172  type(c_ptr) :: a_d, b_d, c_d
1173  integer :: n
1174 #ifdef HAVE_HIP
1175  call hip_col3(a_d, b_d, c_d, n)
1176 #elif HAVE_CUDA
1177  call cuda_col3(a_d, b_d, c_d, n)
1178 #elif HAVE_OPENCL
1179  call opencl_col3(a_d, b_d, c_d, n)
1180 #else
1181  call neko_error('No device backend configured')
1182 #endif
1183  end subroutine device_col3
1184 
1185  subroutine device_subcol3(a_d, b_d, c_d, n)
1186  type(c_ptr) :: a_d, b_d, c_d
1187  integer :: n
1188 #ifdef HAVE_HIP
1189  call hip_subcol3(a_d, b_d, c_d, n)
1190 #elif HAVE_CUDA
1191  call cuda_subcol3(a_d, b_d, c_d, n)
1192 #elif HAVE_OPENCL
1193  call opencl_subcol3(a_d, b_d, c_d, n)
1194 #else
1195  call neko_error('No device backend configured')
1196 #endif
1197  end subroutine device_subcol3
1198 
1199  subroutine device_sub2(a_d, b_d, n)
1200  type(c_ptr) :: a_d, b_d
1201  integer :: n
1202 #ifdef HAVE_HIP
1203  call hip_sub2(a_d, b_d, n)
1204 #elif HAVE_CUDA
1205  call cuda_sub2(a_d, b_d, n)
1206 #elif HAVE_OPENCL
1207  call opencl_sub2(a_d, b_d, n)
1208 #else
1209  call neko_error('No device backend configured')
1210 #endif
1211  end subroutine device_sub2
1212 
1213  subroutine device_sub3(a_d, b_d, c_d, n)
1214  type(c_ptr) :: a_d, b_d, c_d
1215  integer :: n
1216 #ifdef HAVE_HIP
1217  call hip_sub3(a_d, b_d, c_d, n)
1218 #elif HAVE_CUDA
1219  call cuda_sub3(a_d, b_d, c_d, n)
1220 #elif HAVE_OPENCL
1221  call opencl_sub3(a_d, b_d, c_d, n)
1222 #else
1223  call neko_error('No device backend configured')
1224 #endif
1225  end subroutine device_sub3
1226 
1227  subroutine device_addcol3(a_d, b_d, c_d, n)
1228  type(c_ptr) :: a_d, b_d, c_d
1229  integer :: n
1230 #ifdef HAVE_HIP
1231  call hip_addcol3(a_d, b_d, c_d, n)
1232 #elif HAVE_CUDA
1233  call cuda_addcol3(a_d, b_d, c_d, n)
1234 #elif HAVE_OPENCL
1235  call opencl_addcol3(a_d, b_d, c_d, n)
1236 #else
1237  call neko_error('No device backend configured')
1238 #endif
1239  end subroutine device_addcol3
1240 
1241  subroutine device_addcol4(a_d, b_d, c_d, d_d, n)
1242  type(c_ptr) :: a_d, b_d, c_d, d_d
1243  integer :: n
1244 #ifdef HAVE_HIP
1245  call hip_addcol4(a_d, b_d, c_d, d_d, n)
1246 #elif HAVE_CUDA
1247  call cuda_addcol4(a_d, b_d, c_d, d_d, n)
1248 #elif HAVE_OPENCL
1249  call opencl_addcol4(a_d, b_d, c_d, d_d, n)
1250 #else
1251  call neko_error('No device backend configured')
1252 #endif
1253  end subroutine device_addcol4
1254 
1255  subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1256  type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1257  integer :: n
1258 #ifdef HAVE_HIP
1259  call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1260 #elif HAVE_CUDA
1261  call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1262 #elif HAVE_OPENCL
1263  call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1264 #else
1265  call neko_error('No device backend configured')
1266 #endif
1267  end subroutine device_vdot3
1268 
1269  function device_vlsc3(u_d, v_d, w_d, n) result(res)
1270  type(c_ptr) :: u_d, v_d, w_d
1271  integer :: n
1272  real(kind=rp) :: res
1273  res = 0.0_rp
1274 #ifdef HAVE_HIP
1275  res = hip_vlsc3(u_d, v_d, w_d, n)
1276 #elif HAVE_CUDA
1277  res = cuda_vlsc3(u_d, v_d, w_d, n)
1278 #elif HAVE_OPENCL
1279  ! Same kernel as glsc3 (currently no device MPI for OpenCL)
1280  res = opencl_glsc3(u_d, v_d, w_d, n)
1281 #else
1282  call neko_error('No device backend configured')
1283 #endif
1284  end function device_vlsc3
1285 
1286  function device_glsc3(a_d, b_d, c_d, n) result(res)
1287  type(c_ptr) :: a_d, b_d, c_d
1288  integer :: n, ierr
1289  real(kind=rp) :: res
1290 #ifdef HAVE_HIP
1291  res = hip_glsc3(a_d, b_d, c_d, n)
1292 #elif HAVE_CUDA
1293  res = cuda_glsc3(a_d, b_d, c_d, n)
1294 #elif HAVE_OPENCL
1295  res = opencl_glsc3(a_d, b_d, c_d, n)
1296 #else
1297  call neko_error('No device backend configured')
1298 #endif
1299 
1300 #ifndef HAVE_DEVICE_MPI
1301  if (pe_size .gt. 1) then
1302  call mpi_allreduce(mpi_in_place, res, 1, &
1303  mpi_real_precision, mpi_sum, neko_comm, ierr)
1304  end if
1305 #endif
1306  end function device_glsc3
1307 
1308  subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1309  type(c_ptr), value :: w_d, v_d_d, mult_d
1310  integer(c_int) :: j, n
1311  real(c_rp) :: h(j)
1312  integer :: ierr
1313 #ifdef HAVE_HIP
1314  call hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1315 #elif HAVE_CUDA
1316  call cuda_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1317 #elif HAVE_OPENCL
1318  call opencl_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
1319 #else
1320  call neko_error('No device backend configured')
1321 #endif
1322 
1323 #ifndef HAVE_DEVICE_MPI
1324  if (pe_size .gt. 1) then
1325  call mpi_allreduce(mpi_in_place, h, j, &
1326  mpi_real_precision, mpi_sum, neko_comm, ierr)
1327  end if
1328 #endif
1329  end subroutine device_glsc3_many
1330 
1331  subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n)
1332  type(c_ptr), value :: y_d, x_d_d, a_d
1333  integer(c_int) :: j, n
1334 #ifdef HAVE_HIP
1335  call hip_add2s2_many(y_d,x_d_d,a_d,j,n)
1336 #elif HAVE_CUDA
1337  call cuda_add2s2_many(y_d,x_d_d,a_d,j,n)
1338 #elif HAVE_OPENCL
1339  call opencl_add2s2_many(y_d,x_d_d,a_d,j,n)
1340 #else
1341  call neko_error('No device backend configured')
1342 #endif
1343  end subroutine device_add2s2_many
1344 
1345  function device_glsc2(a_d, b_d, n) result(res)
1346  type(c_ptr) :: a_d, b_d
1347  integer :: n, ierr
1348  real(kind=rp) :: res
1349 #ifdef HAVE_HIP
1350  res = hip_glsc2(a_d, b_d, n)
1351 #elif HAVE_CUDA
1352  res = cuda_glsc2(a_d, b_d, n)
1353 #elif HAVE_OPENCL
1354  res = opencl_glsc2(a_d, b_d, n)
1355 #else
1356  call neko_error('No device backend configured')
1357 #endif
1358 
1359 #ifndef HAVE_DEVICE_MPI
1360  if (pe_size .gt. 1) then
1361  call mpi_allreduce(mpi_in_place, res, 1, &
1362  mpi_real_precision, mpi_sum, neko_comm, ierr)
1363  end if
1364 #endif
1365  end function device_glsc2
1366 
1367  function device_glsum(a_d, n) result(res)
1368  type(c_ptr) :: a_d
1369  integer :: n, ierr
1370  real(kind=rp) :: res
1371 #ifdef HAVE_HIP
1372  res = hip_glsum(a_d, n)
1373 #elif HAVE_CUDA
1374  res = cuda_glsum(a_d, n)
1375 #elif HAVE_OPENCL
1376  res = opencl_glsum(a_d, n)
1377 #else
1378  call neko_error('No device backend configured')
1379 #endif
1380 
1381 #ifndef HAVE_DEVICE_MPI
1382  if (pe_size .gt. 1) then
1383  call mpi_allreduce(mpi_in_place, res, 1, &
1384  mpi_real_precision, mpi_sum, neko_comm, ierr)
1385  end if
1386 #endif
1387  end function device_glsum
1388 
1389 
1390 end module device_math
void opencl_addcol3(void *a, void *b, void *c, int *n)
Definition: math.c:572
void opencl_invcol1(void *a, int *n)
Definition: math.c:389
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.c:719
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.c:333
void opencl_cmult(void *a, real *c, int *n)
Definition: math.c:143
void opencl_sub3(void *a, void *b, void *c, int *n)
Definition: math.c:545
void opencl_rone(void *a, int *n)
Definition: math.c:103
void opencl_cadd(void *a, real *c, int *n)
Definition: math.c:168
void opencl_cmult2(void *a, void *b, real *c, int *n)
Definition: math.c:116
real opencl_glsc3(void *a, void *b, void *c, int *n)
Definition: math.c:665
void opencl_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.c:274
void opencl_rzero(void *a, int *n)
Definition: math.c:90
void opencl_sub2(void *a, void *b, int *n)
Definition: math.c:519
void opencl_col2(void *a, void *b, int *n)
Definition: math.c:439
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.c:599
void opencl_col3(void *a, void *b, void *c, int *n)
Definition: math.c:465
void opencl_subcol3(void *a, void *b, void *c, int *n)
Definition: math.c:492
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.c:360
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n)
Definition: math.c:303
void opencl_invcol2(void *a, void *b, int *n)
Definition: math.c:413
void opencl_add2(void *a, void *b, int *n)
Definition: math.c:219
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.c:62
void opencl_cfill(void *a, real *c, int *n)
Definition: math.c:193
void opencl_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.c:246
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.c:628
real opencl_glsc2(void *a, void *b, int *n)
Definition: math.c:783
real opencl_glsum(void *a, int *n)
Definition: math.c:832
void opencl_copy(void *a, void *b, int *n)
Definition: math.c:53
void cuda_invcol1(void *a, int *n)
Definition: math.cu:238
void cuda_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
Definition: math.cu:188
real cuda_vlsc3(void *u, void *v, void *w, int *n)
Definition: math.cu:393
void cuda_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.cu:171
void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.cu:58
void cuda_col2(void *a, void *b, int *n)
Definition: math.cu:266
void cuda_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.cu:466
void cuda_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.cu:368
void cuda_addcol3(void *a, void *b, void *c, int *n)
Definition: math.cu:338
void cuda_subcol3(void *a, void *b, void *c, int *n)
Definition: math.cu:294
void cuda_cmult(void *a, real *c, int *n)
Definition: math.cu:81
void cuda_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.cu:205
void cuda_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.cu:155
real cuda_glsum(void *a, int *n)
Definition: math.cu:549
real cuda_glsc2(void *a, void *b, int *n)
Definition: math.cu:508
void cuda_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.cu:222
void cuda_rzero(void *a, int *n)
Definition: math.cu:73
void cuda_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.cu:353
void cuda_add2(void *a, void *b, int *n)
Definition: math.cu:139
void cuda_copy(void *a, void *b, int *n)
Definition: math.cu:49
void cuda_invcol2(void *a, void *b, int *n)
Definition: math.cu:252
void cuda_col3(void *a, void *b, void *c, int *n)
Definition: math.cu:280
void cuda_cfill(void *a, real *c, int *n)
Definition: math.cu:124
void cuda_cadd(void *a, real *c, int *n)
Definition: math.cu:109
void cuda_sub2(void *a, void *b, int *n)
Definition: math.cu:309
real cuda_glsc3(void *a, void *b, void *c, int *n)
Definition: math.cu:427
void cuda_cmult2(void *a, void *b, real *c, int *n)
Definition: math.cu:95
void cuda_sub3(void *a, void *b, void *c, int *n)
Definition: math.cu:323
Definition: comm.F90:1
subroutine, public device_add2(a_d, b_d, n)
subroutine, public device_addcol3(a_d, b_d, c_d, n)
subroutine, public device_col2(a_d, b_d, n)
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
subroutine, public device_rone(a_d, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
subroutine, public device_invcol1(a_d, n)
subroutine, public device_col3(a_d, b_d, c_d, n)
subroutine, public device_cadd(a_d, c, n)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
subroutine, public device_cmult2(a_d, b_d, c, n)
subroutine, public device_cmult(a_d, c, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
subroutine, public device_sub3(a_d, b_d, c_d, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
real(kind=rp) function, public device_glsum(a_d, n)
subroutine, public device_copy(a_d, b_d, n)
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
subroutine, public device_subcol3(a_d, b_d, c_d, n)
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
subroutine, public device_cfill(a_d, c, n)
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
subroutine, public device_invcol2(a_d, b_d, n)
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Utilities.
Definition: utils.f90:35