Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device_math.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
35 use num_types, only: rp, c_rp
36 use utils, only: neko_error
38 use mpi_f08, only: mpi_sum, mpi_in_place, mpi_allreduce
39
40 ! ========================================================================== !
41 ! Device math interfaces
42
43 use hip_math
44 use cuda_math
45 use opencl_math
46
47 implicit none
48 private
49
53 end interface device_pwmax
54
58 end interface device_pwmin
59
60
71
72contains
73
75 subroutine device_copy(a_d, b_d, n)
76 type(c_ptr) :: a_d, b_d
77 integer :: n
78#if HAVE_HIP
79 call hip_copy(a_d, b_d, n)
80#elif HAVE_CUDA
81 call cuda_copy(a_d, b_d, n)
82#elif HAVE_OPENCL
83 call opencl_copy(a_d, b_d, n)
84#else
85 call neko_error('no device backend configured')
86#endif
87 end subroutine device_copy
88
90 subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
91 type(c_ptr) :: a_d, b_d, mask_d
92 integer :: n, m
93#if HAVE_HIP
94 call hip_masked_copy(a_d, b_d, mask_d, n, m)
95#elif HAVE_CUDA
96 call cuda_masked_copy(a_d, b_d, mask_d, n, m)
97#elif HAVE_OPENCL
98 call opencl_masked_copy(a_d, b_d, mask_d, n, m)
99#else
100 call neko_error('no device backend configured')
101#endif
102 end subroutine device_masked_copy
103
104 subroutine device_masked_red_copy(a_d, b_d, mask_d, n, m)
105 type(c_ptr) :: a_d, b_d, mask_d
106 integer :: n, m
107#if HAVE_HIP
108 call hip_masked_red_copy(a_d, b_d, mask_d, n, m)
109#elif HAVE_CUDA
110 call cuda_masked_red_copy(a_d, b_d, mask_d, n, m)
111#elif HAVE_OPENCL
112 call neko_error('No OpenCL bcknd, masked red copy')
113#else
114 call neko_error('no device backend configured')
115#endif
116 end subroutine device_masked_red_copy
117
120 subroutine device_cfill_mask(a_d, c, size, mask_d, mask_size)
121 type(c_ptr) :: a_d
122 real(kind=rp), intent(in) :: c
123 integer :: size
124 type(c_ptr) :: mask_d
125 integer :: mask_size
126#if HAVE_HIP
127 call hip_cfill_mask(a_d, c, size, mask_d, mask_size)
128#elif HAVE_CUDA
129 call cuda_cfill_mask(a_d, c, size, mask_d, mask_size)
130#elif HAVE_OPENCL
131 call opencl_cfill_mask(a_d, c, size, mask_d, mask_size)
132#else
133 call neko_error('No device backend configured')
134#endif
135 end subroutine device_cfill_mask
136
138 subroutine device_rzero(a_d, n)
139 type(c_ptr) :: a_d
140 integer :: n
141#if HAVE_HIP
142 call hip_rzero(a_d, n)
143#elif HAVE_CUDA
144 call cuda_rzero(a_d, n)
145#elif HAVE_OPENCL
146 call opencl_rzero(a_d, n)
147#else
148 call neko_error('No device backend configured')
149#endif
150 end subroutine device_rzero
151
153 subroutine device_rone(a_d, n)
154 type(c_ptr) :: a_d
155 integer :: n
156 real(kind=rp), parameter :: one = 1.0_rp
157#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
158 call device_cfill(a_d, one, n)
159#else
160 call neko_error('No device backend configured')
161#endif
162 end subroutine device_rone
163
165 subroutine device_cmult(a_d, c, n)
166 type(c_ptr) :: a_d
167 real(kind=rp), intent(in) :: c
168 integer :: n
169#if HAVE_HIP
170 call hip_cmult(a_d, c, n)
171#elif HAVE_CUDA
172 call cuda_cmult(a_d, c, n)
173#elif HAVE_OPENCL
174 call opencl_cmult(a_d, c, n)
175#else
176 call neko_error('No device backend configured')
177#endif
178 end subroutine device_cmult
179
181 subroutine device_cmult2(a_d, b_d, c, n)
182 type(c_ptr) :: a_d, b_d
183 real(kind=rp), intent(in) :: c
184 integer :: n
185#if HAVE_HIP
186 call hip_cmult2(a_d, b_d, c, n)
187#elif HAVE_CUDA
188 call cuda_cmult2(a_d, b_d, c, n)
189#elif HAVE_OPENCL
190 call opencl_cmult2(a_d, b_d, c, n)
191#else
192 call neko_error('No device backend configured')
193#endif
194 end subroutine device_cmult2
195
197 subroutine device_cadd(a_d, c, n)
198 type(c_ptr) :: a_d
199 real(kind=rp), intent(in) :: c
200 integer :: n
201#if HAVE_HIP
202 call hip_cadd(a_d, c, n)
203#elif HAVE_CUDA
204 call cuda_cadd(a_d, c, n)
205#elif HAVE_OPENCL
206 call opencl_cadd(a_d, c, n)
207#else
208 call neko_error('No device backend configured')
209#endif
210 end subroutine device_cadd
211
213 subroutine device_cadd2(a_d, b_d, c, n)
214 type(c_ptr) :: a_d
215 type(c_ptr) :: b_d
216 real(kind=rp), intent(in) :: c
217 integer :: n
218#if HAVE_HIP
219 call hip_cadd2(a_d, b_d, c, n)
220#elif HAVE_CUDA
221 call cuda_cadd2(a_d, b_d, c, n)
222#elif HAVE_OPENCL
223 call opencl_cadd2(a_d, b_d, c, n)
224#else
225 call neko_error('No device backend configured')
226#endif
227 end subroutine device_cadd2
228
230 subroutine device_cfill(a_d, c, n)
231 type(c_ptr) :: a_d
232 real(kind=rp), intent(in) :: c
233 integer :: n
234#if HAVE_HIP
235 call hip_cfill(a_d, c, n)
236#elif HAVE_CUDA
237 call cuda_cfill(a_d, c, n)
238#elif HAVE_OPENCL
239 call opencl_cfill(a_d, c, n)
240#else
241 call neko_error('No device backend configured')
242#endif
243 end subroutine device_cfill
244
246 subroutine device_add2(a_d, b_d, n)
247 type(c_ptr) :: a_d, b_d
248 integer :: n
249#if HAVE_HIP
250 call hip_add2(a_d, b_d, n)
251#elif HAVE_CUDA
252 call cuda_add2(a_d, b_d, n)
253#elif HAVE_OPENCL
254 call opencl_add2(a_d, b_d, n)
255#else
256 call neko_error('No device backend configured')
257#endif
258 end subroutine device_add2
259
260 subroutine device_add4(a_d, b_d, c_d, d_d, n)
261 type(c_ptr) :: a_d, b_d, c_d, d_d
262 integer :: n
263#if HAVE_HIP
264 call hip_add4(a_d, b_d, c_d, d_d, n)
265#elif HAVE_CUDA
266 call cuda_add4(a_d, b_d, c_d, d_d, n)
267#elif HAVE_OPENCL
268 call opencl_add4(a_d, b_d, c_d, d_d, n)
269#else
270 call neko_error('No device backend configured')
271#endif
272 end subroutine device_add4
273
274 subroutine device_add2s1(a_d, b_d, c1, n)
275 type(c_ptr) :: a_d, b_d
276 real(kind=rp) :: c1
277 integer :: n
278#if HAVE_HIP
279 call hip_add2s1(a_d, b_d, c1, n)
280#elif HAVE_CUDA
281 call cuda_add2s1(a_d, b_d, c1, n)
282#elif HAVE_OPENCL
283 call opencl_add2s1(a_d, b_d, c1, n)
284#else
285 call neko_error('No device backend configured')
286#endif
287 end subroutine device_add2s1
288
291 subroutine device_add2s2(a_d, b_d, c1, n)
292 type(c_ptr) :: a_d, b_d
293 real(kind=rp) :: c1
294 integer :: n
295#if HAVE_HIP
296 call hip_add2s2(a_d, b_d, c1, n)
297#elif HAVE_CUDA
298 call cuda_add2s2(a_d, b_d, c1, n)
299#elif HAVE_OPENCL
300 call opencl_add2s2(a_d, b_d, c1, n)
301#else
302 call neko_error('No device backend configured')
303#endif
304 end subroutine device_add2s2
305
307 subroutine device_addsqr2s2(a_d, b_d, c1, n)
308 type(c_ptr) :: a_d, b_d
309 real(kind=rp) :: c1
310 integer :: n
311#if HAVE_HIP
312 call hip_addsqr2s2(a_d, b_d, c1, n)
313#elif HAVE_CUDA
314 call cuda_addsqr2s2(a_d, b_d, c1, n)
315#elif HAVE_OPENCL
316 call opencl_addsqr2s2(a_d, b_d, c1, n)
317#else
318 call neko_error('No device backend configured')
319#endif
320 end subroutine device_addsqr2s2
321
323 subroutine device_add3(a_d, b_d, c_d, n)
324 type(c_ptr) :: a_d, b_d, c_d
325 integer :: n
326#if HAVE_HIP
327 call hip_add3(a_d, b_d, c_d, n)
328#elif HAVE_CUDA
329 call cuda_add3(a_d, b_d, c_d, n)
330#elif HAVE_OPENCL
331 call opencl_add3(a_d, b_d, c_d, n)
332#else
333 call neko_error('No device backend configured')
334#endif
335 end subroutine device_add3
336
338 subroutine device_add3s2(a_d, b_d, c_d, c1, c2 , n)
339 type(c_ptr) :: a_d, b_d, c_d
340 real(kind=rp) :: c1, c2
341 integer :: n
342#if HAVE_HIP
343 call hip_add3s2(a_d, b_d, c_d, c1, c2, n)
344#elif HAVE_CUDA
345 call cuda_add3s2(a_d, b_d, c_d, c1, c2, n)
346#elif HAVE_OPENCL
347 call opencl_add3s2(a_d, b_d, c_d, c1, c2, n)
348#else
349 call neko_error('No device backend configured')
350#endif
351 end subroutine device_add3s2
352
354 subroutine device_invcol1(a_d, n)
355 type(c_ptr) :: a_d
356 integer :: n
357#if HAVE_HIP
358 call hip_invcol1(a_d, n)
359#elif HAVE_CUDA
360 call cuda_invcol1(a_d, n)
361#elif HAVE_OPENCL
362 call opencl_invcol1(a_d, n)
363#else
364 call neko_error('No device backend configured')
365#endif
366 end subroutine device_invcol1
367
369 subroutine device_invcol2(a_d, b_d, n)
370 type(c_ptr) :: a_d, b_d
371 integer :: n
372#if HAVE_HIP
373 call hip_invcol2(a_d, b_d, n)
374#elif HAVE_CUDA
375 call cuda_invcol2(a_d, b_d, n)
376#elif HAVE_OPENCL
377 call opencl_invcol2(a_d, b_d, n)
378#else
379 call neko_error('No device backend configured')
380#endif
381 end subroutine device_invcol2
382
384 subroutine device_col2(a_d, b_d, n)
385 type(c_ptr) :: a_d, b_d
386 integer :: n
387#if HAVE_HIP
388 call hip_col2(a_d, b_d, n)
389#elif HAVE_CUDA
390 call cuda_col2(a_d, b_d, n)
391#elif HAVE_OPENCL
392 call opencl_col2(a_d, b_d, n)
393#else
394 call neko_error('No device backend configured')
395#endif
396 end subroutine device_col2
397
399 subroutine device_col3(a_d, b_d, c_d, n)
400 type(c_ptr) :: a_d, b_d, c_d
401 integer :: n
402#if HAVE_HIP
403 call hip_col3(a_d, b_d, c_d, n)
404#elif HAVE_CUDA
405 call cuda_col3(a_d, b_d, c_d, n)
406#elif HAVE_OPENCL
407 call opencl_col3(a_d, b_d, c_d, n)
408#else
409 call neko_error('No device backend configured')
410#endif
411 end subroutine device_col3
412
414 subroutine device_subcol3(a_d, b_d, c_d, n)
415 type(c_ptr) :: a_d, b_d, c_d
416 integer :: n
417#if HAVE_HIP
418 call hip_subcol3(a_d, b_d, c_d, n)
419#elif HAVE_CUDA
420 call cuda_subcol3(a_d, b_d, c_d, n)
421#elif HAVE_OPENCL
422 call opencl_subcol3(a_d, b_d, c_d, n)
423#else
424 call neko_error('No device backend configured')
425#endif
426 end subroutine device_subcol3
427
429 subroutine device_sub2(a_d, b_d, n)
430 type(c_ptr) :: a_d, b_d
431 integer :: n
432#if HAVE_HIP
433 call hip_sub2(a_d, b_d, n)
434#elif HAVE_CUDA
435 call cuda_sub2(a_d, b_d, n)
436#elif HAVE_OPENCL
437 call opencl_sub2(a_d, b_d, n)
438#else
439 call neko_error('No device backend configured')
440#endif
441 end subroutine device_sub2
442
444 subroutine device_sub3(a_d, b_d, c_d, n)
445 type(c_ptr) :: a_d, b_d, c_d
446 integer :: n
447#if HAVE_HIP
448 call hip_sub3(a_d, b_d, c_d, n)
449#elif HAVE_CUDA
450 call cuda_sub3(a_d, b_d, c_d, n)
451#elif HAVE_OPENCL
452 call opencl_sub3(a_d, b_d, c_d, n)
453#else
454 call neko_error('No device backend configured')
455#endif
456 end subroutine device_sub3
457
459 subroutine device_addcol3(a_d, b_d, c_d, n)
460 type(c_ptr) :: a_d, b_d, c_d
461 integer :: n
462#if HAVE_HIP
463 call hip_addcol3(a_d, b_d, c_d, n)
464#elif HAVE_CUDA
465 call cuda_addcol3(a_d, b_d, c_d, n)
466#elif HAVE_OPENCL
467 call opencl_addcol3(a_d, b_d, c_d, n)
468#else
469 call neko_error('No device backend configured')
470#endif
471 end subroutine device_addcol3
472
474 subroutine device_addcol4(a_d, b_d, c_d, d_d, n)
475 type(c_ptr) :: a_d, b_d, c_d, d_d
476 integer :: n
477#if HAVE_HIP
478 call hip_addcol4(a_d, b_d, c_d, d_d, n)
479#elif HAVE_CUDA
480 call cuda_addcol4(a_d, b_d, c_d, d_d, n)
481#elif HAVE_OPENCL
482 call opencl_addcol4(a_d, b_d, c_d, d_d, n)
483#else
484 call neko_error('No device backend configured')
485#endif
486 end subroutine device_addcol4
487
490 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
491 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
492 integer :: n
493#if HAVE_HIP
494 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
495#elif HAVE_CUDA
496 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
497#elif HAVE_OPENCL
498 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
499#else
500 call neko_error('No device backend configured')
501#endif
502 end subroutine device_vdot3
503
506 subroutine device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
507 w1_d, w2_d, w3_d, n)
508 type(c_ptr) :: u1_d, u2_d, u3_d
509 type(c_ptr) :: v1_d, v2_d, v3_d
510 type(c_ptr) :: w1_d, w2_d, w3_d
511 integer :: n
512#if HAVE_HIP
513 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
514 w1_d, w2_d, w3_d, n)
515#elif HAVE_CUDA
516 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
517 w1_d, w2_d, w3_d, n)
518#elif HAVE_OPENCL
519 call neko_error("no opencl backedn vcross")
520#else
521 call neko_error('No device backend configured')
522#endif
523 end subroutine device_vcross
524
525
527 function device_vlsc3(u_d, v_d, w_d, n) result(res)
528 type(c_ptr) :: u_d, v_d, w_d
529 integer :: n
530 real(kind=rp) :: res
531 res = 0.0_rp
532#if HAVE_HIP
533 res = hip_vlsc3(u_d, v_d, w_d, n)
534#elif HAVE_CUDA
535 res = cuda_vlsc3(u_d, v_d, w_d, n)
536#elif HAVE_OPENCL
537 ! Same kernel as glsc3 (currently no device MPI for OpenCL)
538 res = opencl_glsc3(u_d, v_d, w_d, n)
539#else
540 call neko_error('No device backend configured')
541#endif
542 end function device_vlsc3
543
545 function device_glsc3(a_d, b_d, c_d, n) result(res)
546 type(c_ptr) :: a_d, b_d, c_d
547 integer :: n, ierr
548 real(kind=rp) :: res
549#if HAVE_HIP
550 res = hip_glsc3(a_d, b_d, c_d, n)
551#elif HAVE_CUDA
552 res = cuda_glsc3(a_d, b_d, c_d, n)
553#elif HAVE_OPENCL
554 res = opencl_glsc3(a_d, b_d, c_d, n)
555#else
556 call neko_error('No device backend configured')
557#endif
558
559#ifndef HAVE_DEVICE_MPI
560 if (pe_size .gt. 1) then
561 call mpi_allreduce(mpi_in_place, res, 1, &
562 mpi_real_precision, mpi_sum, neko_comm, ierr)
563 end if
564#endif
565 end function device_glsc3
566
567 subroutine device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
568 type(c_ptr), value :: w_d, v_d_d, mult_d
569 integer(c_int) :: j, n
570 real(c_rp) :: h(j)
571 integer :: ierr
572#if HAVE_HIP
573 call hip_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
574#elif HAVE_CUDA
575 call cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
576#elif HAVE_OPENCL
577 call opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
578#else
579 call neko_error('No device backend configured')
580#endif
581
582#ifndef HAVE_DEVICE_MPI
583 if (pe_size .gt. 1) then
584 call mpi_allreduce(mpi_in_place, h, j, &
585 mpi_real_precision, mpi_sum, neko_comm, ierr)
586 end if
587#endif
588 end subroutine device_glsc3_many
589
590 subroutine device_add2s2_many(y_d, x_d_d, a_d, j, n)
591 type(c_ptr), value :: y_d, x_d_d, a_d
592 integer(c_int) :: j, n
593#if HAVE_HIP
594 call hip_add2s2_many(y_d, x_d_d, a_d, j, n)
595#elif HAVE_CUDA
596 call cuda_add2s2_many(y_d, x_d_d, a_d, j, n)
597#elif HAVE_OPENCL
598 call opencl_add2s2_many(y_d, x_d_d, a_d, j, n)
599#else
600 call neko_error('No device backend configured')
601#endif
602 end subroutine device_add2s2_many
603
605 function device_glsc2(a_d, b_d, n) result(res)
606 type(c_ptr) :: a_d, b_d
607 integer :: n, ierr
608 real(kind=rp) :: res
609#if HAVE_HIP
610 res = hip_glsc2(a_d, b_d, n)
611#elif HAVE_CUDA
612 res = cuda_glsc2(a_d, b_d, n)
613#elif HAVE_OPENCL
614 res = opencl_glsc2(a_d, b_d, n)
615#else
616 call neko_error('No device backend configured')
617#endif
618
619#ifndef HAVE_DEVICE_MPI
620 if (pe_size .gt. 1) then
621 call mpi_allreduce(mpi_in_place, res, 1, &
622 mpi_real_precision, mpi_sum, neko_comm, ierr)
623 end if
624#endif
625 end function device_glsc2
626
628 function device_glsum(a_d, n) result(res)
629 type(c_ptr) :: a_d
630 integer :: n, ierr
631 real(kind=rp) :: res
632#if HAVE_HIP
633 res = hip_glsum(a_d, n)
634#elif HAVE_CUDA
635 res = cuda_glsum(a_d, n)
636#elif HAVE_OPENCL
637 res = opencl_glsum(a_d, n)
638#else
639 call neko_error('No device backend configured')
640#endif
641
642#ifndef HAVE_DEVICE_MPI
643 if (pe_size .gt. 1) then
644 call mpi_allreduce(mpi_in_place, res, 1, &
645 mpi_real_precision, mpi_sum, neko_comm, ierr)
646 end if
647#endif
648 end function device_glsum
649
650 subroutine device_absval(a_d, n)
651 integer, intent(in) :: n
652 type(c_ptr) :: a_d
653#ifdef HAVE_HIP
654 call hip_absval(a_d, n)
655#elif HAVE_CUDA
656 call cuda_absval(a_d, n)
657#elif HAVE_OPENCL
658 call neko_error('OPENCL is not implemented for device_absval')
659#else
660 call neko_error('No device backend configured')
661#endif
662
663 end subroutine device_absval
664
665 ! ========================================================================== !
666 ! Device point-wise max
667
670 subroutine device_pwmax_vec2(a_d, b_d, n)
671 type(c_ptr) :: a_d, b_d
672 integer :: n
673
674#if HAVE_HIP
675 call neko_error('No HIP backend for device_pwmax_vec2')
676#elif HAVE_CUDA
677 call cuda_pwmax_vec2(a_d, b_d, n)
678#elif HAVE_OPENCL
679 call neko_error('No OpenCL backend for device_pwmax_vec2')
680#else
681 call neko_error('No device backend configured')
682#endif
683 end subroutine device_pwmax_vec2
684
687 subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
688 type(c_ptr) :: a_d, b_d, c_d
689 integer :: n
690
691#if HAVE_HIP
692 call neko_error('No HIP backend for device_pwmax_vec3')
693#elif HAVE_CUDA
694 call cuda_pwmax_vec3(a_d, b_d, c_d, n)
695#elif HAVE_OPENCL
696 call neko_error('No OpenCL backend for device_pwmax_vec3')
697#else
698 call neko_error('No device backend configured')
699#endif
700
701 end subroutine device_pwmax_vec3
702
705 subroutine device_pwmax_sca2(a_d, c, n)
706 type(c_ptr) :: a_d
707 real(kind=rp), intent(in) :: c
708 integer :: n
709
710#if HAVE_HIP
711 call neko_error('No HIP backend for device_pwmax_sca2')
712#elif HAVE_CUDA
713 call cuda_pwmax_sca2(a_d, c, n)
714#elif HAVE_OPENCL
715 call neko_error('No OpenCL backend for device_pwmax_sca2')
716#else
717 call neko_error('No device backend configured')
718#endif
719
720 end subroutine device_pwmax_sca2
721
724 subroutine device_pwmax_sca3(a_d, b_d, c, n)
725 type(c_ptr) :: a_d, b_d
726 real(kind=rp), intent(in) :: c
727 integer :: n
728
729#if HAVE_HIP
730 call neko_error('No HIP backend for device_pwmax_sca3')
731#elif HAVE_CUDA
732 call cuda_pwmax_sca3(a_d, b_d, c, n)
733#elif HAVE_OPENCL
734 call neko_error('No OpenCL backend for device_pwmax_sca3')
735#else
736 call neko_error('No device backend configured')
737#endif
738
739 end subroutine device_pwmax_sca3
740
741 ! ========================================================================== !
742 ! Device point-wise min
743
746 subroutine device_pwmin_vec2(a_d, b_d, n)
747 type(c_ptr) :: a_d, b_d
748 integer :: n
749
750#if HAVE_HIP
751 call neko_error('No HIP backend for device_pwmin_vec2')
752#elif HAVE_CUDA
753 call cuda_pwmin_vec2(a_d, b_d, n)
754#elif HAVE_OPENCL
755 call neko_error('No OpenCL backend for device_pwmin_vec2')
756#else
757 call neko_error('No device backend configured')
758#endif
759 end subroutine device_pwmin_vec2
760
763 subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
764 type(c_ptr) :: a_d, b_d, c_d
765 integer :: n
766
767#if HAVE_HIP
768 call neko_error('No HIP backend for device_pwmin_vec3')
769#elif HAVE_CUDA
770 call cuda_pwmin_vec3(a_d, b_d, c_d, n)
771#elif HAVE_OPENCL
772 call neko_error('No OpenCL backend for device_pwmin_vec3')
773#else
774 call neko_error('No device backend configured')
775#endif
776
777 end subroutine device_pwmin_vec3
778
781 subroutine device_pwmin_sca2(a_d, c, n)
782 type(c_ptr) :: a_d
783 real(kind=rp), intent(in) :: c
784 integer :: n
785
786#if HAVE_HIP
787 call neko_error('No HIP backend for device_pwmin_sca2')
788#elif HAVE_CUDA
789 call cuda_pwmin_sca2(a_d, c, n)
790#elif HAVE_OPENCL
791 call neko_error('No OpenCL backend for device_pwmin_sca2')
792#else
793 call neko_error('No device backend configured')
794#endif
795
796 end subroutine device_pwmin_sca2
797
800 subroutine device_pwmin_sca3(a_d, b_d, c, n)
801 type(c_ptr) :: a_d, b_d
802 real(kind=rp), intent(in) :: c
803 integer :: n
804
805#if HAVE_HIP
806 call neko_error('No HIP backend for device_pwmin_sca3')
807#elif HAVE_CUDA
808 call cuda_pwmin_sca3(a_d, b_d, c, n)
809#elif HAVE_OPENCL
810 call neko_error('No OpenCL backend for device_pwmin_sca3')
811#else
812 call neko_error('No device backend configured')
813#endif
814
815 end subroutine device_pwmin_sca3
816
817end module device_math
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
integer pe_size
MPI size of communicator.
Definition comm.F90:31
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine device_pwmax_sca2(a_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine device_pwmax_vec2(a_d, b_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n)
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_masked_red_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine device_pwmax_sca3(a_d, b_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_absval(a_d, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine device_pwmin_sca3(a_d, b_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine device_pwmin_sca2(a_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine device_pwmin_vec2(a_d, b_d, n)
Compute the point-wise minimum of two vectors .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35