Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
device_math.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
35 use num_types, only: rp, c_rp
36 use utils, only: neko_error
38 use mpi_f08, only: mpi_sum, mpi_in_place, mpi_allreduce
39
40 ! ========================================================================== !
41 ! Device math interfaces
42
43 use hip_math
44 use cuda_math
45 use opencl_math
46
47 implicit none
48 private
49
53 end interface device_pwmax
54
58 end interface device_pwmin
59
60
71
72contains
73
75 subroutine device_copy(a_d, b_d, n)
76 type(c_ptr) :: a_d, b_d
77 integer :: n
78#if HAVE_HIP
79 call hip_copy(a_d, b_d, n)
80#elif HAVE_CUDA
81 call cuda_copy(a_d, b_d, n)
82#elif HAVE_OPENCL
83 call opencl_copy(a_d, b_d, n)
84#else
85 call neko_error('no device backend configured')
86#endif
87 end subroutine device_copy
88
90 subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
91 type(c_ptr) :: a_d, b_d, mask_d
92 integer :: n, m
93#if HAVE_HIP
94 call hip_masked_copy(a_d, b_d, mask_d, n, m)
95#elif HAVE_CUDA
96 call cuda_masked_copy(a_d, b_d, mask_d, n, m)
97#elif HAVE_OPENCL
98 call opencl_masked_copy(a_d, b_d, mask_d, n, m)
99#else
100 call neko_error('no device backend configured')
101#endif
102 end subroutine device_masked_copy
103
104 subroutine device_masked_red_copy(a_d, b_d, mask_d, n, m)
105 type(c_ptr) :: a_d, b_d, mask_d
106 integer :: n, m
107#if HAVE_HIP
108 call hip_masked_red_copy(a_d, b_d, mask_d, n, m)
109#elif HAVE_CUDA
110 call cuda_masked_red_copy(a_d, b_d, mask_d, n, m)
111#elif HAVE_OPENCL
112 call neko_error('No OpenCL bcknd, masked red copy')
113#else
114 call neko_error('no device backend configured')
115#endif
116 end subroutine device_masked_red_copy
117
118 subroutine device_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
119 type(c_ptr) :: a_d, b_d, mask_d
120 integer :: n, m
121#if HAVE_HIP
122 call hip_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
123#elif HAVE_CUDA
124 call cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
125#elif HAVE_OPENCL
126 call neko_error('No OpenCL bcknd, masked atomic reduction')
127#else
128 call neko_error('no device backend configured')
129#endif
130 end subroutine device_masked_atomic_reduction
131
134 subroutine device_cfill_mask(a_d, c, size, mask_d, mask_size)
135 type(c_ptr) :: a_d
136 real(kind=rp), intent(in) :: c
137 integer :: size
138 type(c_ptr) :: mask_d
139 integer :: mask_size
140#if HAVE_HIP
141 call hip_cfill_mask(a_d, c, size, mask_d, mask_size)
142#elif HAVE_CUDA
143 call cuda_cfill_mask(a_d, c, size, mask_d, mask_size)
144#elif HAVE_OPENCL
145 call opencl_cfill_mask(a_d, c, size, mask_d, mask_size)
146#else
147 call neko_error('No device backend configured')
148#endif
149 end subroutine device_cfill_mask
150
152 subroutine device_rzero(a_d, n)
153 type(c_ptr) :: a_d
154 integer :: n
155#if HAVE_HIP
156 call hip_rzero(a_d, n)
157#elif HAVE_CUDA
158 call cuda_rzero(a_d, n)
159#elif HAVE_OPENCL
160 call opencl_rzero(a_d, n)
161#else
162 call neko_error('No device backend configured')
163#endif
164 end subroutine device_rzero
165
167 subroutine device_rone(a_d, n)
168 type(c_ptr) :: a_d
169 integer :: n
170 real(kind=rp), parameter :: one = 1.0_rp
171#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
172 call device_cfill(a_d, one, n)
173#else
174 call neko_error('No device backend configured')
175#endif
176 end subroutine device_rone
177
179 subroutine device_cmult(a_d, c, n)
180 type(c_ptr) :: a_d
181 real(kind=rp), intent(in) :: c
182 integer :: n
183#if HAVE_HIP
184 call hip_cmult(a_d, c, n)
185#elif HAVE_CUDA
186 call cuda_cmult(a_d, c, n)
187#elif HAVE_OPENCL
188 call opencl_cmult(a_d, c, n)
189#else
190 call neko_error('No device backend configured')
191#endif
192 end subroutine device_cmult
193
195 subroutine device_cmult2(a_d, b_d, c, n)
196 type(c_ptr) :: a_d, b_d
197 real(kind=rp), intent(in) :: c
198 integer :: n
199#if HAVE_HIP
200 call hip_cmult2(a_d, b_d, c, n)
201#elif HAVE_CUDA
202 call cuda_cmult2(a_d, b_d, c, n)
203#elif HAVE_OPENCL
204 call opencl_cmult2(a_d, b_d, c, n)
205#else
206 call neko_error('No device backend configured')
207#endif
208 end subroutine device_cmult2
209
211 subroutine device_cadd(a_d, c, n)
212 type(c_ptr) :: a_d
213 real(kind=rp), intent(in) :: c
214 integer :: n
215#if HAVE_HIP
216 call hip_cadd(a_d, c, n)
217#elif HAVE_CUDA
218 call cuda_cadd(a_d, c, n)
219#elif HAVE_OPENCL
220 call opencl_cadd(a_d, c, n)
221#else
222 call neko_error('No device backend configured')
223#endif
224 end subroutine device_cadd
225
227 subroutine device_cadd2(a_d, b_d, c, n)
228 type(c_ptr) :: a_d
229 type(c_ptr) :: b_d
230 real(kind=rp), intent(in) :: c
231 integer :: n
232#if HAVE_HIP
233 call hip_cadd2(a_d, b_d, c, n)
234#elif HAVE_CUDA
235 call cuda_cadd2(a_d, b_d, c, n)
236#elif HAVE_OPENCL
237 call opencl_cadd2(a_d, b_d, c, n)
238#else
239 call neko_error('No device backend configured')
240#endif
241 end subroutine device_cadd2
242
244 subroutine device_cfill(a_d, c, n)
245 type(c_ptr) :: a_d
246 real(kind=rp), intent(in) :: c
247 integer :: n
248#if HAVE_HIP
249 call hip_cfill(a_d, c, n)
250#elif HAVE_CUDA
251 call cuda_cfill(a_d, c, n)
252#elif HAVE_OPENCL
253 call opencl_cfill(a_d, c, n)
254#else
255 call neko_error('No device backend configured')
256#endif
257 end subroutine device_cfill
258
260 subroutine device_add2(a_d, b_d, n)
261 type(c_ptr) :: a_d, b_d
262 integer :: n
263#if HAVE_HIP
264 call hip_add2(a_d, b_d, n)
265#elif HAVE_CUDA
266 call cuda_add2(a_d, b_d, n)
267#elif HAVE_OPENCL
268 call opencl_add2(a_d, b_d, n)
269#else
270 call neko_error('No device backend configured')
271#endif
272 end subroutine device_add2
273
274 subroutine device_add4(a_d, b_d, c_d, d_d, n)
275 type(c_ptr) :: a_d, b_d, c_d, d_d
276 integer :: n
277#if HAVE_HIP
278 call hip_add4(a_d, b_d, c_d, d_d, n)
279#elif HAVE_CUDA
280 call cuda_add4(a_d, b_d, c_d, d_d, n)
281#elif HAVE_OPENCL
282 call opencl_add4(a_d, b_d, c_d, d_d, n)
283#else
284 call neko_error('No device backend configured')
285#endif
286 end subroutine device_add4
287
288 subroutine device_add2s1(a_d, b_d, c1, n)
289 type(c_ptr) :: a_d, b_d
290 real(kind=rp) :: c1
291 integer :: n
292#if HAVE_HIP
293 call hip_add2s1(a_d, b_d, c1, n)
294#elif HAVE_CUDA
295 call cuda_add2s1(a_d, b_d, c1, n)
296#elif HAVE_OPENCL
297 call opencl_add2s1(a_d, b_d, c1, n)
298#else
299 call neko_error('No device backend configured')
300#endif
301 end subroutine device_add2s1
302
305 subroutine device_add2s2(a_d, b_d, c1, n)
306 type(c_ptr) :: a_d, b_d
307 real(kind=rp) :: c1
308 integer :: n
309#if HAVE_HIP
310 call hip_add2s2(a_d, b_d, c1, n)
311#elif HAVE_CUDA
312 call cuda_add2s2(a_d, b_d, c1, n)
313#elif HAVE_OPENCL
314 call opencl_add2s2(a_d, b_d, c1, n)
315#else
316 call neko_error('No device backend configured')
317#endif
318 end subroutine device_add2s2
319
321 subroutine device_addsqr2s2(a_d, b_d, c1, n)
322 type(c_ptr) :: a_d, b_d
323 real(kind=rp) :: c1
324 integer :: n
325#if HAVE_HIP
326 call hip_addsqr2s2(a_d, b_d, c1, n)
327#elif HAVE_CUDA
328 call cuda_addsqr2s2(a_d, b_d, c1, n)
329#elif HAVE_OPENCL
330 call opencl_addsqr2s2(a_d, b_d, c1, n)
331#else
332 call neko_error('No device backend configured')
333#endif
334 end subroutine device_addsqr2s2
335
337 subroutine device_add3(a_d, b_d, c_d, n)
338 type(c_ptr) :: a_d, b_d, c_d
339 integer :: n
340#if HAVE_HIP
341 call hip_add3(a_d, b_d, c_d, n)
342#elif HAVE_CUDA
343 call cuda_add3(a_d, b_d, c_d, n)
344#elif HAVE_OPENCL
345 call opencl_add3(a_d, b_d, c_d, n)
346#else
347 call neko_error('No device backend configured')
348#endif
349 end subroutine device_add3
350
352 subroutine device_add3s2(a_d, b_d, c_d, c1, c2 , n)
353 type(c_ptr) :: a_d, b_d, c_d
354 real(kind=rp) :: c1, c2
355 integer :: n
356#if HAVE_HIP
357 call hip_add3s2(a_d, b_d, c_d, c1, c2, n)
358#elif HAVE_CUDA
359 call cuda_add3s2(a_d, b_d, c_d, c1, c2, n)
360#elif HAVE_OPENCL
361 call opencl_add3s2(a_d, b_d, c_d, c1, c2, n)
362#else
363 call neko_error('No device backend configured')
364#endif
365 end subroutine device_add3s2
366
368 subroutine device_invcol1(a_d, n)
369 type(c_ptr) :: a_d
370 integer :: n
371#if HAVE_HIP
372 call hip_invcol1(a_d, n)
373#elif HAVE_CUDA
374 call cuda_invcol1(a_d, n)
375#elif HAVE_OPENCL
376 call opencl_invcol1(a_d, n)
377#else
378 call neko_error('No device backend configured')
379#endif
380 end subroutine device_invcol1
381
383 subroutine device_invcol2(a_d, b_d, n)
384 type(c_ptr) :: a_d, b_d
385 integer :: n
386#if HAVE_HIP
387 call hip_invcol2(a_d, b_d, n)
388#elif HAVE_CUDA
389 call cuda_invcol2(a_d, b_d, n)
390#elif HAVE_OPENCL
391 call opencl_invcol2(a_d, b_d, n)
392#else
393 call neko_error('No device backend configured')
394#endif
395 end subroutine device_invcol2
396
398 subroutine device_col2(a_d, b_d, n)
399 type(c_ptr) :: a_d, b_d
400 integer :: n
401#if HAVE_HIP
402 call hip_col2(a_d, b_d, n)
403#elif HAVE_CUDA
404 call cuda_col2(a_d, b_d, n)
405#elif HAVE_OPENCL
406 call opencl_col2(a_d, b_d, n)
407#else
408 call neko_error('No device backend configured')
409#endif
410 end subroutine device_col2
411
413 subroutine device_col3(a_d, b_d, c_d, n)
414 type(c_ptr) :: a_d, b_d, c_d
415 integer :: n
416#if HAVE_HIP
417 call hip_col3(a_d, b_d, c_d, n)
418#elif HAVE_CUDA
419 call cuda_col3(a_d, b_d, c_d, n)
420#elif HAVE_OPENCL
421 call opencl_col3(a_d, b_d, c_d, n)
422#else
423 call neko_error('No device backend configured')
424#endif
425 end subroutine device_col3
426
428 subroutine device_subcol3(a_d, b_d, c_d, n)
429 type(c_ptr) :: a_d, b_d, c_d
430 integer :: n
431#if HAVE_HIP
432 call hip_subcol3(a_d, b_d, c_d, n)
433#elif HAVE_CUDA
434 call cuda_subcol3(a_d, b_d, c_d, n)
435#elif HAVE_OPENCL
436 call opencl_subcol3(a_d, b_d, c_d, n)
437#else
438 call neko_error('No device backend configured')
439#endif
440 end subroutine device_subcol3
441
443 subroutine device_sub2(a_d, b_d, n)
444 type(c_ptr) :: a_d, b_d
445 integer :: n
446#if HAVE_HIP
447 call hip_sub2(a_d, b_d, n)
448#elif HAVE_CUDA
449 call cuda_sub2(a_d, b_d, n)
450#elif HAVE_OPENCL
451 call opencl_sub2(a_d, b_d, n)
452#else
453 call neko_error('No device backend configured')
454#endif
455 end subroutine device_sub2
456
458 subroutine device_sub3(a_d, b_d, c_d, n)
459 type(c_ptr) :: a_d, b_d, c_d
460 integer :: n
461#if HAVE_HIP
462 call hip_sub3(a_d, b_d, c_d, n)
463#elif HAVE_CUDA
464 call cuda_sub3(a_d, b_d, c_d, n)
465#elif HAVE_OPENCL
466 call opencl_sub3(a_d, b_d, c_d, n)
467#else
468 call neko_error('No device backend configured')
469#endif
470 end subroutine device_sub3
471
473 subroutine device_addcol3(a_d, b_d, c_d, n)
474 type(c_ptr) :: a_d, b_d, c_d
475 integer :: n
476#if HAVE_HIP
477 call hip_addcol3(a_d, b_d, c_d, n)
478#elif HAVE_CUDA
479 call cuda_addcol3(a_d, b_d, c_d, n)
480#elif HAVE_OPENCL
481 call opencl_addcol3(a_d, b_d, c_d, n)
482#else
483 call neko_error('No device backend configured')
484#endif
485 end subroutine device_addcol3
486
488 subroutine device_addcol4(a_d, b_d, c_d, d_d, n)
489 type(c_ptr) :: a_d, b_d, c_d, d_d
490 integer :: n
491#if HAVE_HIP
492 call hip_addcol4(a_d, b_d, c_d, d_d, n)
493#elif HAVE_CUDA
494 call cuda_addcol4(a_d, b_d, c_d, d_d, n)
495#elif HAVE_OPENCL
496 call opencl_addcol4(a_d, b_d, c_d, d_d, n)
497#else
498 call neko_error('No device backend configured')
499#endif
500 end subroutine device_addcol4
501
504 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
505 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
506 integer :: n
507#if HAVE_HIP
508 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
509#elif HAVE_CUDA
510 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
511#elif HAVE_OPENCL
512 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
513#else
514 call neko_error('No device backend configured')
515#endif
516 end subroutine device_vdot3
517
520 subroutine device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
521 w1_d, w2_d, w3_d, n)
522 type(c_ptr) :: u1_d, u2_d, u3_d
523 type(c_ptr) :: v1_d, v2_d, v3_d
524 type(c_ptr) :: w1_d, w2_d, w3_d
525 integer :: n
526#if HAVE_HIP
527 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
528 w1_d, w2_d, w3_d, n)
529#elif HAVE_CUDA
530 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
531 w1_d, w2_d, w3_d, n)
532#elif HAVE_OPENCL
533 call neko_error("no opencl backedn vcross")
534#else
535 call neko_error('No device backend configured')
536#endif
537 end subroutine device_vcross
538
539
541 function device_vlsc3(u_d, v_d, w_d, n) result(res)
542 type(c_ptr) :: u_d, v_d, w_d
543 integer :: n
544 real(kind=rp) :: res
545 res = 0.0_rp
546#if HAVE_HIP
547 res = hip_vlsc3(u_d, v_d, w_d, n)
548#elif HAVE_CUDA
549 res = cuda_vlsc3(u_d, v_d, w_d, n)
550#elif HAVE_OPENCL
551 ! Same kernel as glsc3 (currently no device MPI for OpenCL)
552 res = opencl_glsc3(u_d, v_d, w_d, n)
553#else
554 call neko_error('No device backend configured')
555#endif
556 end function device_vlsc3
557
559 function device_glsc3(a_d, b_d, c_d, n) result(res)
560 type(c_ptr) :: a_d, b_d, c_d
561 integer :: n, ierr
562 real(kind=rp) :: res
563#if HAVE_HIP
564 res = hip_glsc3(a_d, b_d, c_d, n)
565#elif HAVE_CUDA
566 res = cuda_glsc3(a_d, b_d, c_d, n)
567#elif HAVE_OPENCL
568 res = opencl_glsc3(a_d, b_d, c_d, n)
569#else
570 call neko_error('No device backend configured')
571#endif
572
573#ifndef HAVE_DEVICE_MPI
574 if (pe_size .gt. 1) then
575 call mpi_allreduce(mpi_in_place, res, 1, &
576 mpi_real_precision, mpi_sum, neko_comm, ierr)
577 end if
578#endif
579 end function device_glsc3
580
581 subroutine device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
582 type(c_ptr), value :: w_d, v_d_d, mult_d
583 integer(c_int) :: j, n
584 real(c_rp) :: h(j)
585 integer :: ierr
586#if HAVE_HIP
587 call hip_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
588#elif HAVE_CUDA
589 call cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
590#elif HAVE_OPENCL
591 call opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
592#else
593 call neko_error('No device backend configured')
594#endif
595
596#ifndef HAVE_DEVICE_MPI
597 if (pe_size .gt. 1) then
598 call mpi_allreduce(mpi_in_place, h, j, &
599 mpi_real_precision, mpi_sum, neko_comm, ierr)
600 end if
601#endif
602 end subroutine device_glsc3_many
603
604 subroutine device_add2s2_many(y_d, x_d_d, a_d, j, n)
605 type(c_ptr), value :: y_d, x_d_d, a_d
606 integer(c_int) :: j, n
607#if HAVE_HIP
608 call hip_add2s2_many(y_d, x_d_d, a_d, j, n)
609#elif HAVE_CUDA
610 call cuda_add2s2_many(y_d, x_d_d, a_d, j, n)
611#elif HAVE_OPENCL
612 call opencl_add2s2_many(y_d, x_d_d, a_d, j, n)
613#else
614 call neko_error('No device backend configured')
615#endif
616 end subroutine device_add2s2_many
617
619 function device_glsc2(a_d, b_d, n) result(res)
620 type(c_ptr) :: a_d, b_d
621 integer :: n, ierr
622 real(kind=rp) :: res
623#if HAVE_HIP
624 res = hip_glsc2(a_d, b_d, n)
625#elif HAVE_CUDA
626 res = cuda_glsc2(a_d, b_d, n)
627#elif HAVE_OPENCL
628 res = opencl_glsc2(a_d, b_d, n)
629#else
630 call neko_error('No device backend configured')
631#endif
632
633#ifndef HAVE_DEVICE_MPI
634 if (pe_size .gt. 1) then
635 call mpi_allreduce(mpi_in_place, res, 1, &
636 mpi_real_precision, mpi_sum, neko_comm, ierr)
637 end if
638#endif
639 end function device_glsc2
640
642 function device_glsum(a_d, n) result(res)
643 type(c_ptr) :: a_d
644 integer :: n, ierr
645 real(kind=rp) :: res
646#if HAVE_HIP
647 res = hip_glsum(a_d, n)
648#elif HAVE_CUDA
649 res = cuda_glsum(a_d, n)
650#elif HAVE_OPENCL
651 res = opencl_glsum(a_d, n)
652#else
653 call neko_error('No device backend configured')
654#endif
655
656#ifndef HAVE_DEVICE_MPI
657 if (pe_size .gt. 1) then
658 call mpi_allreduce(mpi_in_place, res, 1, &
659 mpi_real_precision, mpi_sum, neko_comm, ierr)
660 end if
661#endif
662 end function device_glsum
663
664 subroutine device_absval(a_d, n)
665 integer, intent(in) :: n
666 type(c_ptr) :: a_d
667#ifdef HAVE_HIP
668 call hip_absval(a_d, n)
669#elif HAVE_CUDA
670 call cuda_absval(a_d, n)
671#elif HAVE_OPENCL
672 call neko_error('OPENCL is not implemented for device_absval')
673#else
674 call neko_error('No device backend configured')
675#endif
676
677 end subroutine device_absval
678
679 ! ========================================================================== !
680 ! Device point-wise max
681
684 subroutine device_pwmax_vec2(a_d, b_d, n)
685 type(c_ptr) :: a_d, b_d
686 integer :: n
687
688#if HAVE_HIP
689 call neko_error('No HIP backend for device_pwmax_vec2')
690#elif HAVE_CUDA
691 call cuda_pwmax_vec2(a_d, b_d, n)
692#elif HAVE_OPENCL
693 call neko_error('No OpenCL backend for device_pwmax_vec2')
694#else
695 call neko_error('No device backend configured')
696#endif
697 end subroutine device_pwmax_vec2
698
701 subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
702 type(c_ptr) :: a_d, b_d, c_d
703 integer :: n
704
705#if HAVE_HIP
706 call neko_error('No HIP backend for device_pwmax_vec3')
707#elif HAVE_CUDA
708 call cuda_pwmax_vec3(a_d, b_d, c_d, n)
709#elif HAVE_OPENCL
710 call neko_error('No OpenCL backend for device_pwmax_vec3')
711#else
712 call neko_error('No device backend configured')
713#endif
714
715 end subroutine device_pwmax_vec3
716
719 subroutine device_pwmax_sca2(a_d, c, n)
720 type(c_ptr) :: a_d
721 real(kind=rp), intent(in) :: c
722 integer :: n
723
724#if HAVE_HIP
725 call neko_error('No HIP backend for device_pwmax_sca2')
726#elif HAVE_CUDA
727 call cuda_pwmax_sca2(a_d, c, n)
728#elif HAVE_OPENCL
729 call neko_error('No OpenCL backend for device_pwmax_sca2')
730#else
731 call neko_error('No device backend configured')
732#endif
733
734 end subroutine device_pwmax_sca2
735
738 subroutine device_pwmax_sca3(a_d, b_d, c, n)
739 type(c_ptr) :: a_d, b_d
740 real(kind=rp), intent(in) :: c
741 integer :: n
742
743#if HAVE_HIP
744 call neko_error('No HIP backend for device_pwmax_sca3')
745#elif HAVE_CUDA
746 call cuda_pwmax_sca3(a_d, b_d, c, n)
747#elif HAVE_OPENCL
748 call neko_error('No OpenCL backend for device_pwmax_sca3')
749#else
750 call neko_error('No device backend configured')
751#endif
752
753 end subroutine device_pwmax_sca3
754
755 ! ========================================================================== !
756 ! Device point-wise min
757
760 subroutine device_pwmin_vec2(a_d, b_d, n)
761 type(c_ptr) :: a_d, b_d
762 integer :: n
763
764#if HAVE_HIP
765 call neko_error('No HIP backend for device_pwmin_vec2')
766#elif HAVE_CUDA
767 call cuda_pwmin_vec2(a_d, b_d, n)
768#elif HAVE_OPENCL
769 call neko_error('No OpenCL backend for device_pwmin_vec2')
770#else
771 call neko_error('No device backend configured')
772#endif
773 end subroutine device_pwmin_vec2
774
777 subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
778 type(c_ptr) :: a_d, b_d, c_d
779 integer :: n
780
781#if HAVE_HIP
782 call neko_error('No HIP backend for device_pwmin_vec3')
783#elif HAVE_CUDA
784 call cuda_pwmin_vec3(a_d, b_d, c_d, n)
785#elif HAVE_OPENCL
786 call neko_error('No OpenCL backend for device_pwmin_vec3')
787#else
788 call neko_error('No device backend configured')
789#endif
790
791 end subroutine device_pwmin_vec3
792
795 subroutine device_pwmin_sca2(a_d, c, n)
796 type(c_ptr) :: a_d
797 real(kind=rp), intent(in) :: c
798 integer :: n
799
800#if HAVE_HIP
801 call neko_error('No HIP backend for device_pwmin_sca2')
802#elif HAVE_CUDA
803 call cuda_pwmin_sca2(a_d, c, n)
804#elif HAVE_OPENCL
805 call neko_error('No OpenCL backend for device_pwmin_sca2')
806#else
807 call neko_error('No device backend configured')
808#endif
809
810 end subroutine device_pwmin_sca2
811
814 subroutine device_pwmin_sca3(a_d, b_d, c, n)
815 type(c_ptr) :: a_d, b_d
816 real(kind=rp), intent(in) :: c
817 integer :: n
818
819#if HAVE_HIP
820 call neko_error('No HIP backend for device_pwmin_sca3')
821#elif HAVE_CUDA
822 call cuda_pwmin_sca3(a_d, b_d, c, n)
823#elif HAVE_OPENCL
824 call neko_error('No OpenCL backend for device_pwmin_sca3')
825#else
826 call neko_error('No device backend configured')
827#endif
828
829 end subroutine device_pwmin_sca3
830
831end module device_math
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:38
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:46
integer pe_size
MPI size of communicator.
Definition comm.F90:54
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine device_pwmax_sca2(a_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine device_pwmax_vec2(a_d, b_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n)
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_masked_red_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine device_pwmax_sca3(a_d, b_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_absval(a_d, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine device_pwmin_sca3(a_d, b_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine device_pwmin_sca2(a_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine device_pwmin_vec2(a_d, b_d, n)
Compute the point-wise minimum of two vectors .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35