Neko  0.9.99
A portable framework for high-order spectral element flow simulations
device_math.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34  use, intrinsic :: iso_c_binding, only: c_ptr, c_int
35  use num_types, only: rp, c_rp
36  use utils, only: neko_error
38  use mpi_f08, only: mpi_sum, mpi_in_place, mpi_allreduce
39 
40  ! ========================================================================== !
41  ! Device math interfaces
42 
43  use hip_math
44  use cuda_math
45  use opencl_math
46 
47  implicit none
48  private
49 
50  interface device_pwmax
51  module procedure device_pwmax_vec2, device_pwmax_vec3, &
53  end interface device_pwmax
54 
55  interface device_pwmin
56  module procedure device_pwmin_vec2, device_pwmin_vec3, &
58  end interface device_pwmin
59 
60 
71 
72 contains
73 
75  subroutine device_copy(a_d, b_d, n)
76  type(c_ptr) :: a_d, b_d
77  integer :: n
78 #if HAVE_HIP
79  call hip_copy(a_d, b_d, n)
80 #elif HAVE_CUDA
81  call cuda_copy(a_d, b_d, n)
82 #elif HAVE_OPENCL
83  call opencl_copy(a_d, b_d, n)
84 #else
85  call neko_error('no device backend configured')
86 #endif
87  end subroutine device_copy
88 
90  subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
91  type(c_ptr) :: a_d, b_d, mask_d
92  integer :: n, m
93 #if HAVE_HIP
94  call hip_masked_copy(a_d, b_d, mask_d, n, m)
95 #elif HAVE_CUDA
96  call cuda_masked_copy(a_d, b_d, mask_d, n, m)
97 #elif HAVE_OPENCL
98  call opencl_masked_copy(a_d, b_d, mask_d, n, m)
99 #else
100  call neko_error('no device backend configured')
101 #endif
102  end subroutine device_masked_copy
103 
104  subroutine device_masked_red_copy(a_d, b_d, mask_d, n, m)
105  type(c_ptr) :: a_d, b_d, mask_d
106  integer :: n, m
107 #if HAVE_HIP
108  call hip_masked_red_copy(a_d, b_d, mask_d, n, m)
109 #elif HAVE_CUDA
110  call cuda_masked_red_copy(a_d, b_d, mask_d, n, m)
111 #elif HAVE_OPENCL
112  call neko_error('No OpenCL bcknd, masked red copy')
113 #else
114  call neko_error('no device backend configured')
115 #endif
116  end subroutine device_masked_red_copy
117 
120  subroutine device_cfill_mask(a_d, c, size, mask_d, mask_size)
121  type(c_ptr) :: a_d
122  real(kind=rp), intent(in) :: c
123  integer :: size
124  type(c_ptr) :: mask_d
125  integer :: mask_size
126 #if HAVE_HIP
127  call hip_cfill_mask(a_d, c, size, mask_d, mask_size)
128 #elif HAVE_CUDA
129  call cuda_cfill_mask(a_d, c, size, mask_d, mask_size)
130 #elif HAVE_OPENCL
131  call opencl_cfill_mask(a_d, c, size, mask_d, mask_size)
132 #else
133  call neko_error('No device backend configured')
134 #endif
135  end subroutine device_cfill_mask
136 
138  subroutine device_rzero(a_d, n)
139  type(c_ptr) :: a_d
140  integer :: n
141 #if HAVE_HIP
142  call hip_rzero(a_d, n)
143 #elif HAVE_CUDA
144  call cuda_rzero(a_d, n)
145 #elif HAVE_OPENCL
146  call opencl_rzero(a_d, n)
147 #else
148  call neko_error('No device backend configured')
149 #endif
150  end subroutine device_rzero
151 
153  subroutine device_rone(a_d, n)
154  type(c_ptr) :: a_d
155  integer :: n
156  real(kind=rp), parameter :: one = 1.0_rp
157 #if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
158  call device_cfill(a_d, one, n)
159 #else
160  call neko_error('No device backend configured')
161 #endif
162  end subroutine device_rone
163 
165  subroutine device_cmult(a_d, c, n)
166  type(c_ptr) :: a_d
167  real(kind=rp), intent(in) :: c
168  integer :: n
169 #if HAVE_HIP
170  call hip_cmult(a_d, c, n)
171 #elif HAVE_CUDA
172  call cuda_cmult(a_d, c, n)
173 #elif HAVE_OPENCL
174  call opencl_cmult(a_d, c, n)
175 #else
176  call neko_error('No device backend configured')
177 #endif
178  end subroutine device_cmult
179 
181  subroutine device_cmult2(a_d, b_d, c, n)
182  type(c_ptr) :: a_d, b_d
183  real(kind=rp), intent(in) :: c
184  integer :: n
185 #if HAVE_HIP
186  call hip_cmult2(a_d, b_d, c, n)
187 #elif HAVE_CUDA
188  call cuda_cmult2(a_d, b_d, c, n)
189 #elif HAVE_OPENCL
190  call opencl_cmult2(a_d, b_d, c, n)
191 #else
192  call neko_error('No device backend configured')
193 #endif
194  end subroutine device_cmult2
195 
197  subroutine device_cadd(a_d, c, n)
198  type(c_ptr) :: a_d
199  real(kind=rp), intent(in) :: c
200  integer :: n
201 #if HAVE_HIP
202  call hip_cadd(a_d, c, n)
203 #elif HAVE_CUDA
204  call cuda_cadd(a_d, c, n)
205 #elif HAVE_OPENCL
206  call opencl_cadd(a_d, c, n)
207 #else
208  call neko_error('No device backend configured')
209 #endif
210  end subroutine device_cadd
211 
213  subroutine device_cadd2(a_d, b_d, c, n)
214  type(c_ptr) :: a_d
215  type(c_ptr) :: b_d
216  real(kind=rp), intent(in) :: c
217  integer :: n
218 #if HAVE_HIP
219  call hip_cadd2(a_d, b_d, c, n)
220 #elif HAVE_CUDA
221  call cuda_cadd2(a_d, b_d, c, n)
222 #elif HAVE_OPENCL
223  call opencl_cadd2(a_d, b_d, c, n)
224 #else
225  call neko_error('No device backend configured')
226 #endif
227  end subroutine device_cadd2
228 
230  subroutine device_cfill(a_d, c, n)
231  type(c_ptr) :: a_d
232  real(kind=rp), intent(in) :: c
233  integer :: n
234 #if HAVE_HIP
235  call hip_cfill(a_d, c, n)
236 #elif HAVE_CUDA
237  call cuda_cfill(a_d, c, n)
238 #elif HAVE_OPENCL
239  call opencl_cfill(a_d, c, n)
240 #else
241  call neko_error('No device backend configured')
242 #endif
243  end subroutine device_cfill
244 
246  subroutine device_add2(a_d, b_d, n)
247  type(c_ptr) :: a_d, b_d
248  integer :: n
249 #if HAVE_HIP
250  call hip_add2(a_d, b_d, n)
251 #elif HAVE_CUDA
252  call cuda_add2(a_d, b_d, n)
253 #elif HAVE_OPENCL
254  call opencl_add2(a_d, b_d, n)
255 #else
256  call neko_error('No device backend configured')
257 #endif
258  end subroutine device_add2
259 
260  subroutine device_add4(a_d, b_d, c_d, d_d, n)
261  type(c_ptr) :: a_d, b_d, c_d, d_d
262  integer :: n
263 #if HAVE_HIP
264  call hip_add4(a_d, b_d, c_d, d_d, n)
265 #elif HAVE_CUDA
266  call cuda_add4(a_d, b_d, c_d, d_d, n)
267 #elif HAVE_OPENCL
268  call opencl_add4(a_d, b_d, c_d, d_d, n)
269 #else
270  call neko_error('No device backend configured')
271 #endif
272  end subroutine device_add4
273 
274  subroutine device_add2s1(a_d, b_d, c1, n)
275  type(c_ptr) :: a_d, b_d
276  real(kind=rp) :: c1
277  integer :: n
278 #if HAVE_HIP
279  call hip_add2s1(a_d, b_d, c1, n)
280 #elif HAVE_CUDA
281  call cuda_add2s1(a_d, b_d, c1, n)
282 #elif HAVE_OPENCL
283  call opencl_add2s1(a_d, b_d, c1, n)
284 #else
285  call neko_error('No device backend configured')
286 #endif
287  end subroutine device_add2s1
288 
291  subroutine device_add2s2(a_d, b_d, c1, n)
292  type(c_ptr) :: a_d, b_d
293  real(kind=rp) :: c1
294  integer :: n
295 #if HAVE_HIP
296  call hip_add2s2(a_d, b_d, c1, n)
297 #elif HAVE_CUDA
298  call cuda_add2s2(a_d, b_d, c1, n)
299 #elif HAVE_OPENCL
300  call opencl_add2s2(a_d, b_d, c1, n)
301 #else
302  call neko_error('No device backend configured')
303 #endif
304  end subroutine device_add2s2
305 
307  subroutine device_addsqr2s2(a_d, b_d, c1, n)
308  type(c_ptr) :: a_d, b_d
309  real(kind=rp) :: c1
310  integer :: n
311 #if HAVE_HIP
312  call hip_addsqr2s2(a_d, b_d, c1, n)
313 #elif HAVE_CUDA
314  call cuda_addsqr2s2(a_d, b_d, c1, n)
315 #elif HAVE_OPENCL
316  call opencl_addsqr2s2(a_d, b_d, c1, n)
317 #else
318  call neko_error('No device backend configured')
319 #endif
320  end subroutine device_addsqr2s2
321 
323  subroutine device_add3(a_d, b_d, c_d, n)
324  type(c_ptr) :: a_d, b_d, c_d
325  integer :: n
326 #if HAVE_HIP
327  call hip_add3(a_d, b_d, c_d, n)
328 #elif HAVE_CUDA
329  call cuda_add3(a_d, b_d, c_d, n)
330 #elif HAVE_OPENCL
331  call opencl_add3(a_d, b_d, c_d, n)
332 #else
333  call neko_error('No device backend configured')
334 #endif
335  end subroutine device_add3
336 
338  subroutine device_add3s2(a_d, b_d, c_d, c1, c2 , n)
339  type(c_ptr) :: a_d, b_d, c_d
340  real(kind=rp) :: c1, c2
341  integer :: n
342 #if HAVE_HIP
343  call hip_add3s2(a_d, b_d, c_d, c1, c2, n)
344 #elif HAVE_CUDA
345  call cuda_add3s2(a_d, b_d, c_d, c1, c2, n)
346 #elif HAVE_OPENCL
347  call opencl_add3s2(a_d, b_d, c_d, c1, c2, n)
348 #else
349  call neko_error('No device backend configured')
350 #endif
351  end subroutine device_add3s2
352 
354  subroutine device_invcol1(a_d, n)
355  type(c_ptr) :: a_d
356  integer :: n
357 #if HAVE_HIP
358  call hip_invcol1(a_d, n)
359 #elif HAVE_CUDA
360  call cuda_invcol1(a_d, n)
361 #elif HAVE_OPENCL
362  call opencl_invcol1(a_d, n)
363 #else
364  call neko_error('No device backend configured')
365 #endif
366  end subroutine device_invcol1
367 
369  subroutine device_invcol2(a_d, b_d, n)
370  type(c_ptr) :: a_d, b_d
371  integer :: n
372 #if HAVE_HIP
373  call hip_invcol2(a_d, b_d, n)
374 #elif HAVE_CUDA
375  call cuda_invcol2(a_d, b_d, n)
376 #elif HAVE_OPENCL
377  call opencl_invcol2(a_d, b_d, n)
378 #else
379  call neko_error('No device backend configured')
380 #endif
381  end subroutine device_invcol2
382 
384  subroutine device_col2(a_d, b_d, n)
385  type(c_ptr) :: a_d, b_d
386  integer :: n
387 #if HAVE_HIP
388  call hip_col2(a_d, b_d, n)
389 #elif HAVE_CUDA
390  call cuda_col2(a_d, b_d, n)
391 #elif HAVE_OPENCL
392  call opencl_col2(a_d, b_d, n)
393 #else
394  call neko_error('No device backend configured')
395 #endif
396  end subroutine device_col2
397 
399  subroutine device_col3(a_d, b_d, c_d, n)
400  type(c_ptr) :: a_d, b_d, c_d
401  integer :: n
402 #if HAVE_HIP
403  call hip_col3(a_d, b_d, c_d, n)
404 #elif HAVE_CUDA
405  call cuda_col3(a_d, b_d, c_d, n)
406 #elif HAVE_OPENCL
407  call opencl_col3(a_d, b_d, c_d, n)
408 #else
409  call neko_error('No device backend configured')
410 #endif
411  end subroutine device_col3
412 
414  subroutine device_subcol3(a_d, b_d, c_d, n)
415  type(c_ptr) :: a_d, b_d, c_d
416  integer :: n
417 #if HAVE_HIP
418  call hip_subcol3(a_d, b_d, c_d, n)
419 #elif HAVE_CUDA
420  call cuda_subcol3(a_d, b_d, c_d, n)
421 #elif HAVE_OPENCL
422  call opencl_subcol3(a_d, b_d, c_d, n)
423 #else
424  call neko_error('No device backend configured')
425 #endif
426  end subroutine device_subcol3
427 
429  subroutine device_sub2(a_d, b_d, n)
430  type(c_ptr) :: a_d, b_d
431  integer :: n
432 #if HAVE_HIP
433  call hip_sub2(a_d, b_d, n)
434 #elif HAVE_CUDA
435  call cuda_sub2(a_d, b_d, n)
436 #elif HAVE_OPENCL
437  call opencl_sub2(a_d, b_d, n)
438 #else
439  call neko_error('No device backend configured')
440 #endif
441  end subroutine device_sub2
442 
444  subroutine device_sub3(a_d, b_d, c_d, n)
445  type(c_ptr) :: a_d, b_d, c_d
446  integer :: n
447 #if HAVE_HIP
448  call hip_sub3(a_d, b_d, c_d, n)
449 #elif HAVE_CUDA
450  call cuda_sub3(a_d, b_d, c_d, n)
451 #elif HAVE_OPENCL
452  call opencl_sub3(a_d, b_d, c_d, n)
453 #else
454  call neko_error('No device backend configured')
455 #endif
456  end subroutine device_sub3
457 
459  subroutine device_addcol3(a_d, b_d, c_d, n)
460  type(c_ptr) :: a_d, b_d, c_d
461  integer :: n
462 #if HAVE_HIP
463  call hip_addcol3(a_d, b_d, c_d, n)
464 #elif HAVE_CUDA
465  call cuda_addcol3(a_d, b_d, c_d, n)
466 #elif HAVE_OPENCL
467  call opencl_addcol3(a_d, b_d, c_d, n)
468 #else
469  call neko_error('No device backend configured')
470 #endif
471  end subroutine device_addcol3
472 
474  subroutine device_addcol4(a_d, b_d, c_d, d_d, n)
475  type(c_ptr) :: a_d, b_d, c_d, d_d
476  integer :: n
477 #if HAVE_HIP
478  call hip_addcol4(a_d, b_d, c_d, d_d, n)
479 #elif HAVE_CUDA
480  call cuda_addcol4(a_d, b_d, c_d, d_d, n)
481 #elif HAVE_OPENCL
482  call opencl_addcol4(a_d, b_d, c_d, d_d, n)
483 #else
484  call neko_error('No device backend configured')
485 #endif
486  end subroutine device_addcol4
487 
490  subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
491  type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
492  integer :: n
493 #if HAVE_HIP
494  call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
495 #elif HAVE_CUDA
496  call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
497 #elif HAVE_OPENCL
498  call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
499 #else
500  call neko_error('No device backend configured')
501 #endif
502  end subroutine device_vdot3
503 
506  subroutine device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
507  w1_d, w2_d, w3_d, n)
508  type(c_ptr) :: u1_d, u2_d, u3_d
509  type(c_ptr) :: v1_d, v2_d, v3_d
510  type(c_ptr) :: w1_d, w2_d, w3_d
511  integer :: n
512 #if HAVE_HIP
513  call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
514  w1_d, w2_d, w3_d, n)
515 #elif HAVE_CUDA
516  call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
517  w1_d, w2_d, w3_d, n)
518 #elif HAVE_OPENCL
519  call neko_error("no opencl backedn vcross")
520 #else
521  call neko_error('No device backend configured')
522 #endif
523  end subroutine device_vcross
524 
525 
527  function device_vlsc3(u_d, v_d, w_d, n) result(res)
528  type(c_ptr) :: u_d, v_d, w_d
529  integer :: n
530  real(kind=rp) :: res
531  res = 0.0_rp
532 #if HAVE_HIP
533  res = hip_vlsc3(u_d, v_d, w_d, n)
534 #elif HAVE_CUDA
535  res = cuda_vlsc3(u_d, v_d, w_d, n)
536 #elif HAVE_OPENCL
537  ! Same kernel as glsc3 (currently no device MPI for OpenCL)
538  res = opencl_glsc3(u_d, v_d, w_d, n)
539 #else
540  call neko_error('No device backend configured')
541 #endif
542  end function device_vlsc3
543 
545  function device_glsc3(a_d, b_d, c_d, n) result(res)
546  type(c_ptr) :: a_d, b_d, c_d
547  integer :: n, ierr
548  real(kind=rp) :: res
549 #if HAVE_HIP
550  res = hip_glsc3(a_d, b_d, c_d, n)
551 #elif HAVE_CUDA
552  res = cuda_glsc3(a_d, b_d, c_d, n)
553 #elif HAVE_OPENCL
554  res = opencl_glsc3(a_d, b_d, c_d, n)
555 #else
556  call neko_error('No device backend configured')
557 #endif
558 
559 #ifndef HAVE_DEVICE_MPI
560  if (pe_size .gt. 1) then
561  call mpi_allreduce(mpi_in_place, res, 1, &
562  mpi_real_precision, mpi_sum, neko_comm, ierr)
563  end if
564 #endif
565  end function device_glsc3
566 
567  subroutine device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
568  type(c_ptr), value :: w_d, v_d_d, mult_d
569  integer(c_int) :: j, n
570  real(c_rp) :: h(j)
571  integer :: ierr
572 #if HAVE_HIP
573  call hip_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
574 #elif HAVE_CUDA
575  call cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
576 #elif HAVE_OPENCL
577  call opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
578 #else
579  call neko_error('No device backend configured')
580 #endif
581 
582 #ifndef HAVE_DEVICE_MPI
583  if (pe_size .gt. 1) then
584  call mpi_allreduce(mpi_in_place, h, j, &
585  mpi_real_precision, mpi_sum, neko_comm, ierr)
586  end if
587 #endif
588  end subroutine device_glsc3_many
589 
590  subroutine device_add2s2_many(y_d, x_d_d, a_d, j, n)
591  type(c_ptr), value :: y_d, x_d_d, a_d
592  integer(c_int) :: j, n
593 #if HAVE_HIP
594  call hip_add2s2_many(y_d, x_d_d, a_d, j, n)
595 #elif HAVE_CUDA
596  call cuda_add2s2_many(y_d, x_d_d, a_d, j, n)
597 #elif HAVE_OPENCL
598  call opencl_add2s2_many(y_d, x_d_d, a_d, j, n)
599 #else
600  call neko_error('No device backend configured')
601 #endif
602  end subroutine device_add2s2_many
603 
605  function device_glsc2(a_d, b_d, n) result(res)
606  type(c_ptr) :: a_d, b_d
607  integer :: n, ierr
608  real(kind=rp) :: res
609 #if HAVE_HIP
610  res = hip_glsc2(a_d, b_d, n)
611 #elif HAVE_CUDA
612  res = cuda_glsc2(a_d, b_d, n)
613 #elif HAVE_OPENCL
614  res = opencl_glsc2(a_d, b_d, n)
615 #else
616  call neko_error('No device backend configured')
617 #endif
618 
619 #ifndef HAVE_DEVICE_MPI
620  if (pe_size .gt. 1) then
621  call mpi_allreduce(mpi_in_place, res, 1, &
622  mpi_real_precision, mpi_sum, neko_comm, ierr)
623  end if
624 #endif
625  end function device_glsc2
626 
628  function device_glsum(a_d, n) result(res)
629  type(c_ptr) :: a_d
630  integer :: n, ierr
631  real(kind=rp) :: res
632 #if HAVE_HIP
633  res = hip_glsum(a_d, n)
634 #elif HAVE_CUDA
635  res = cuda_glsum(a_d, n)
636 #elif HAVE_OPENCL
637  res = opencl_glsum(a_d, n)
638 #else
639  call neko_error('No device backend configured')
640 #endif
641 
642 #ifndef HAVE_DEVICE_MPI
643  if (pe_size .gt. 1) then
644  call mpi_allreduce(mpi_in_place, res, 1, &
645  mpi_real_precision, mpi_sum, neko_comm, ierr)
646  end if
647 #endif
648  end function device_glsum
649 
650  subroutine device_absval(a_d, n)
651  integer, intent(in) :: n
652  type(c_ptr) :: a_d
653 #ifdef HAVE_HIP
654  call hip_absval(a_d, n)
655 #elif HAVE_CUDA
656  call cuda_absval(a_d, n)
657 #elif HAVE_OPENCL
658  call neko_error('OPENCL is not implemented for device_absval')
659 #else
660  call neko_error('No device backend configured')
661 #endif
662 
663  end subroutine device_absval
664 
665  ! ========================================================================== !
666  ! Device point-wise max
667 
670  subroutine device_pwmax_vec2(a_d, b_d, n)
671  type(c_ptr) :: a_d, b_d
672  integer :: n
673 
674 #if HAVE_HIP
675  call neko_error('No HIP backend for device_pwmax_vec2')
676 #elif HAVE_CUDA
677  call cuda_pwmax_vec2(a_d, b_d, n)
678 #elif HAVE_OPENCL
679  call neko_error('No OpenCL backend for device_pwmax_vec2')
680 #else
681  call neko_error('No device backend configured')
682 #endif
683  end subroutine device_pwmax_vec2
684 
687  subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
688  type(c_ptr) :: a_d, b_d, c_d
689  integer :: n
690 
691 #if HAVE_HIP
692  call neko_error('No HIP backend for device_pwmax_vec3')
693 #elif HAVE_CUDA
694  call cuda_pwmax_vec3(a_d, b_d, c_d, n)
695 #elif HAVE_OPENCL
696  call neko_error('No OpenCL backend for device_pwmax_vec3')
697 #else
698  call neko_error('No device backend configured')
699 #endif
700 
701  end subroutine device_pwmax_vec3
702 
705  subroutine device_pwmax_sca2(a_d, c, n)
706  type(c_ptr) :: a_d
707  real(kind=rp), intent(in) :: c
708  integer :: n
709 
710 #if HAVE_HIP
711  call neko_error('No HIP backend for device_pwmax_sca2')
712 #elif HAVE_CUDA
713  call cuda_pwmax_sca2(a_d, c, n)
714 #elif HAVE_OPENCL
715  call neko_error('No OpenCL backend for device_pwmax_sca2')
716 #else
717  call neko_error('No device backend configured')
718 #endif
719 
720  end subroutine device_pwmax_sca2
721 
724  subroutine device_pwmax_sca3(a_d, b_d, c, n)
725  type(c_ptr) :: a_d, b_d
726  real(kind=rp), intent(in) :: c
727  integer :: n
728 
729 #if HAVE_HIP
730  call neko_error('No HIP backend for device_pwmax_sca3')
731 #elif HAVE_CUDA
732  call cuda_pwmax_sca3(a_d, b_d, c, n)
733 #elif HAVE_OPENCL
734  call neko_error('No OpenCL backend for device_pwmax_sca3')
735 #else
736  call neko_error('No device backend configured')
737 #endif
738 
739  end subroutine device_pwmax_sca3
740 
741  ! ========================================================================== !
742  ! Device point-wise min
743 
746  subroutine device_pwmin_vec2(a_d, b_d, n)
747  type(c_ptr) :: a_d, b_d
748  integer :: n
749 
750 #if HAVE_HIP
751  call neko_error('No HIP backend for device_pwmin_vec2')
752 #elif HAVE_CUDA
753  call cuda_pwmin_vec2(a_d, b_d, n)
754 #elif HAVE_OPENCL
755  call neko_error('No OpenCL backend for device_pwmin_vec2')
756 #else
757  call neko_error('No device backend configured')
758 #endif
759  end subroutine device_pwmin_vec2
760 
763  subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
764  type(c_ptr) :: a_d, b_d, c_d
765  integer :: n
766 
767 #if HAVE_HIP
768  call neko_error('No HIP backend for device_pwmin_vec3')
769 #elif HAVE_CUDA
770  call cuda_pwmin_vec3(a_d, b_d, c_d, n)
771 #elif HAVE_OPENCL
772  call neko_error('No OpenCL backend for device_pwmin_vec3')
773 #else
774  call neko_error('No device backend configured')
775 #endif
776 
777  end subroutine device_pwmin_vec3
778 
781  subroutine device_pwmin_sca2(a_d, c, n)
782  type(c_ptr) :: a_d
783  real(kind=rp), intent(in) :: c
784  integer :: n
785 
786 #if HAVE_HIP
787  call neko_error('No HIP backend for device_pwmin_sca2')
788 #elif HAVE_CUDA
789  call cuda_pwmin_sca2(a_d, c, n)
790 #elif HAVE_OPENCL
791  call neko_error('No OpenCL backend for device_pwmin_sca2')
792 #else
793  call neko_error('No device backend configured')
794 #endif
795 
796  end subroutine device_pwmin_sca2
797 
800  subroutine device_pwmin_sca3(a_d, b_d, c, n)
801  type(c_ptr) :: a_d, b_d
802  real(kind=rp), intent(in) :: c
803  integer :: n
804 
805 #if HAVE_HIP
806  call neko_error('No HIP backend for device_pwmin_sca3')
807 #elif HAVE_CUDA
808  call cuda_pwmin_sca3(a_d, b_d, c, n)
809 #elif HAVE_OPENCL
810  call neko_error('No OpenCL backend for device_pwmin_sca3')
811 #else
812  call neko_error('No device backend configured')
813 #endif
814 
815  end subroutine device_pwmin_sca3
816 
817 end module device_math
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:23
integer pe_size
MPI size of communicator.
Definition: comm.F90:31
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine device_pwmax_sca2(a_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine device_pwmax_vec2(a_d, b_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n)
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_masked_red_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine device_pwmax_sca3(a_d, b_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_absval(a_d, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
Definition: device_math.F90:91
subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine device_pwmin_sca3(a_d, b_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Definition: device_math.F90:76
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine device_pwmin_sca2(a_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine device_pwmin_vec2(a_d, b_d, n)
Compute the point-wise minimum of two vectors .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Utilities.
Definition: utils.f90:35