Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device_math.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use, intrinsic :: iso_c_binding, only: c_ptr, c_int
35 use num_types, only : rp, c_rp
36 use utils, only : neko_error
38 use mpi_f08, only : mpi_sum, mpi_in_place, mpi_allreduce
39 use device, only : glb_cmd_queue
40 ! ========================================================================== !
41 ! Device math interfaces
42
43 use hip_math
44 use cuda_math
45 use opencl_math
46
47 implicit none
48 private
49
53 end interface device_pwmax
54
58 end interface device_pwmin
59
60 interface device_cadd
61 module procedure device_radd, device_iadd
62 end interface device_cadd
63
76
77contains
78
80 subroutine device_copy(a_d, b_d, n, strm)
81 type(c_ptr) :: a_d, b_d
82 integer :: n
83 type(c_ptr), optional :: strm
84 type(c_ptr) :: strm_
85
86 if (n .lt. 1) return
87
88 if (present(strm)) then
89 strm_ = strm
90 else
91 strm_ = glb_cmd_queue
92 end if
93
94#if HAVE_HIP
95 call hip_copy(a_d, b_d, n, strm_)
96#elif HAVE_CUDA
97 call cuda_copy(a_d, b_d, n, strm_)
98#elif HAVE_OPENCL
99 call opencl_copy(a_d, b_d, n, strm_)
100#else
101 call neko_error('no device backend configured')
102#endif
103 end subroutine device_copy
104
106 subroutine device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
107 type(c_ptr) :: a_d, b_d, mask_d
108 integer :: n, n_mask
109 type(c_ptr), optional :: strm
110 type(c_ptr) :: strm_
111
112 if (n .lt. 1 .or. n_mask .lt. 1) return
113
114 if (present(strm)) then
115 strm_ = strm
116 else
117 strm_ = glb_cmd_queue
118 end if
119
120#if HAVE_HIP
121 call hip_masked_copy(a_d, b_d, mask_d, n, n_mask, strm_)
122#elif HAVE_CUDA
123 call cuda_masked_copy(a_d, b_d, mask_d, n, n_mask, strm_)
124#elif HAVE_OPENCL
125 call opencl_masked_copy(a_d, b_d, mask_d, n, n_mask, strm_)
126#else
127 call neko_error('no device backend configured')
128#endif
129 end subroutine device_masked_copy_0
130
132 subroutine device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
133 type(c_ptr) :: a_d, b_d, mask_d
134 integer :: n, n_mask
135 type(c_ptr), optional :: strm
136 type(c_ptr) :: strm_
137
138 if (n .lt. 1 .or. n_mask .lt. 1) return
139
140 if (present(strm)) then
141 strm_ = strm
142 else
143 strm_ = glb_cmd_queue
144 end if
145
146#if HAVE_HIP
147 call hip_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm_)
148#elif HAVE_CUDA
149 call cuda_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm_)
150#elif HAVE_OPENCL
151 call opencl_masked_gather_copy(a_d, b_d, mask_d, n, n_mask, strm_)
152#else
153 call neko_error('no device backend configured')
154#endif
155 end subroutine device_masked_gather_copy_0
156
158 subroutine device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
159 type(c_ptr) :: a_d, b_d, mask_d
160 integer :: n, n_mask
161 type(c_ptr), optional :: strm
162 type(c_ptr) :: strm_
163
164 if (n .lt. 1 .or. n_mask .lt. 1) return
165
166 if (present(strm)) then
167 strm_ = strm
168 else
169 strm_ = glb_cmd_queue
170 end if
171
172#if HAVE_HIP
173 call hip_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm_)
174#elif HAVE_CUDA
175 call cuda_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm_)
176#elif HAVE_OPENCL
177 call opencl_masked_scatter_copy(a_d, b_d, mask_d, n, n_mask, strm_)
178#else
179 call neko_error('no device backend configured')
180#endif
181 end subroutine device_masked_scatter_copy_0
182
183 subroutine device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
184 type(c_ptr) :: a_d, b_d, mask_d
185 integer :: n, n_mask
186 type(c_ptr), optional :: strm
187 type(c_ptr) :: strm_
188
189 if (n .lt. 1 .or. n_mask .lt. 1) return
190
191 if (present(strm)) then
192 strm_ = strm
193 else
194 strm_ = glb_cmd_queue
195 end if
196
197#if HAVE_HIP
198 call hip_masked_atomic_reduction(a_d, b_d, mask_d, n, n_mask, strm_)
199#elif HAVE_CUDA
200 call cuda_masked_atomic_reduction(a_d, b_d, mask_d, n, n_mask, strm_)
201#elif HAVE_OPENCL
202 call neko_error('No OpenCL bcknd, masked atomic reduction')
203#else
204 call neko_error('no device backend configured')
205#endif
207
210 subroutine device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
211 type(c_ptr) :: a_d
212 real(kind=rp), intent(in) :: c
213 integer :: n
214 type(c_ptr) :: mask_d
215 integer :: n_mask
216 type(c_ptr), optional :: strm
217 type(c_ptr) :: strm_
218
219 if (n .lt. 1 .or. n_mask .lt. 1) return
220
221 if (present(strm)) then
222 strm_ = strm
223 else
224 strm_ = glb_cmd_queue
225 end if
226
227#if HAVE_HIP
228 call hip_cfill_mask(a_d, c, n, mask_d, n_mask, strm_)
229#elif HAVE_CUDA
230 call cuda_cfill_mask(a_d, c, n, mask_d, n_mask, strm_)
231#elif HAVE_OPENCL
232 call opencl_cfill_mask(a_d, c, n, mask_d, n_mask, strm_)
233#else
234 call neko_error('No device backend configured')
235#endif
236 end subroutine device_cfill_mask
237
239 subroutine device_rzero(a_d, n, strm)
240 type(c_ptr) :: a_d
241 integer :: n
242 type(c_ptr), optional :: strm
243 type(c_ptr) :: strm_
244
245 if (n .lt. 1) return
246
247 if (present(strm)) then
248 strm_ = strm
249 else
250 strm_ = glb_cmd_queue
251 end if
252
253#if HAVE_HIP
254 call hip_rzero(a_d, n, strm_)
255#elif HAVE_CUDA
256 call cuda_rzero(a_d, n, strm_)
257#elif HAVE_OPENCL
258 call opencl_rzero(a_d, n, strm_)
259#else
260 call neko_error('No device backend configured')
261#endif
262 end subroutine device_rzero
263
265 subroutine device_rone(a_d, n, strm)
266 type(c_ptr) :: a_d
267 integer :: n
268 type(c_ptr), optional :: strm
269 type(c_ptr) :: strm_
270 real(kind=rp), parameter :: one = 1.0_rp
271
272 if (n .lt. 1) return
273
274 if (present(strm)) then
275 strm_ = strm
276 else
277 strm_ = glb_cmd_queue
278 end if
279
280#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
281 call device_cfill(a_d, one, n, strm_)
282#else
283 call neko_error('No device backend configured')
284#endif
285 end subroutine device_rone
286
288 subroutine device_cmult(a_d, c, n, strm)
289 type(c_ptr) :: a_d
290 real(kind=rp), intent(in) :: c
291 integer :: n
292 type(c_ptr), optional :: strm
293 type(c_ptr) :: strm_
294
295 if (n .lt. 1) return
296
297 if (present(strm)) then
298 strm_ = strm
299 else
300 strm_ = glb_cmd_queue
301 end if
302
303#if HAVE_HIP
304 call hip_cmult(a_d, c, n, strm_)
305#elif HAVE_CUDA
306 call cuda_cmult(a_d, c, n, strm_)
307#elif HAVE_OPENCL
308 call opencl_cmult(a_d, c, n, strm_)
309#else
310 call neko_error('No device backend configured')
311#endif
312 end subroutine device_cmult
313
315 subroutine device_cmult2(a_d, b_d, c, n, strm)
316 type(c_ptr) :: a_d, b_d
317 real(kind=rp), intent(in) :: c
318 integer :: n
319 type(c_ptr), optional :: strm
320 type(c_ptr) :: strm_
321
322 if (n .lt. 1) return
323
324 if (present(strm)) then
325 strm_ = strm
326 else
327 strm_ = glb_cmd_queue
328 end if
329
330#if HAVE_HIP
331 call hip_cmult2(a_d, b_d, c, n, strm_)
332#elif HAVE_CUDA
333 call cuda_cmult2(a_d, b_d, c, n, strm_)
334#elif HAVE_OPENCL
335 call opencl_cmult2(a_d, b_d, c, n, strm_)
336#else
337 call neko_error('No device backend configured')
338#endif
339 end subroutine device_cmult2
340
342 subroutine device_cdiv(a_d, c, n, strm)
343 type(c_ptr) :: a_d
344 real(kind=rp), intent(in) :: c
345 integer :: n
346 type(c_ptr), optional :: strm
347 type(c_ptr) :: strm_
348
349 if (present(strm)) then
350 strm_ = strm
351 else
352 strm_ = glb_cmd_queue
353 end if
354
355#if HAVE_HIP
356 call hip_cdiv(a_d, c, n, strm_)
357#elif HAVE_CUDA
358 call cuda_cdiv(a_d, c, n, strm_)
359#elif HAVE_OPENCL
360 call opencl_cdiv(a_d, c, n, strm_)
361#else
362 call neko_error('No device backend configured')
363#endif
364 end subroutine device_cdiv
365
367 subroutine device_cdiv2(a_d, b_d, c, n, strm)
368 type(c_ptr) :: a_d, b_d
369 real(kind=rp), intent(in) :: c
370 integer :: n
371 type(c_ptr), optional :: strm
372 type(c_ptr) :: strm_
373
374 if (present(strm)) then
375 strm_ = strm
376 else
377 strm_ = glb_cmd_queue
378 end if
379
380#if HAVE_HIP
381 call hip_cdiv2(a_d, b_d, c, n, strm_)
382#elif HAVE_CUDA
383 call cuda_cdiv2(a_d, b_d, c, n, strm_)
384#elif HAVE_OPENCL
385 call opencl_cdiv2(a_d, b_d, c, n, strm_)
386#else
387 call neko_error('No device backend configured')
388#endif
389 end subroutine device_cdiv2
390
392 subroutine device_radd(a_d, c, n, strm)
393 type(c_ptr) :: a_d
394 real(kind=rp), intent(in) :: c
395 integer :: n
396 type(c_ptr), optional :: strm
397 type(c_ptr) :: strm_
398
399 if (n .lt. 1) return
400
401 if (present(strm)) then
402 strm_ = strm
403 else
404 strm_ = glb_cmd_queue
405 end if
406
407#if HAVE_HIP
408 call hip_radd(a_d, c, n, strm_)
409#elif HAVE_CUDA
410 call cuda_radd(a_d, c, n, strm_)
411#elif HAVE_OPENCL
412 call opencl_radd(a_d, c, n, strm_)
413#else
414 call neko_error('No device backend configured')
415#endif
416 end subroutine device_radd
417
419 subroutine device_cadd2(a_d, b_d, c, n, strm)
420 type(c_ptr) :: a_d
421 type(c_ptr) :: b_d
422 real(kind=rp), intent(in) :: c
423 integer :: n
424 type(c_ptr), optional :: strm
425 type(c_ptr) :: strm_
426
427 if (n .lt. 1) return
428
429 if (present(strm)) then
430 strm_ = strm
431 else
432 strm_ = glb_cmd_queue
433 end if
434
435#if HAVE_HIP
436 call hip_cadd2(a_d, b_d, c, n, strm_)
437#elif HAVE_CUDA
438 call cuda_cadd2(a_d, b_d, c, n, strm_)
439#elif HAVE_OPENCL
440 call opencl_cadd2(a_d, b_d, c, n, strm_)
441#else
442 call neko_error('No device backend configured')
443#endif
444 end subroutine device_cadd2
445
447 subroutine device_cfill(a_d, c, n, strm)
448 type(c_ptr) :: a_d
449 real(kind=rp), intent(in) :: c
450 integer :: n
451 type(c_ptr), optional ::strm
452 type(c_ptr) :: strm_
453
454 if (n .lt. 1) return
455
456 if (present(strm)) then
457 strm_ = strm
458 else
459 strm_ = glb_cmd_queue
460 end if
461
462#if HAVE_HIP
463 call hip_cfill(a_d, c, n, strm_)
464#elif HAVE_CUDA
465 call cuda_cfill(a_d, c, n, strm_)
466#elif HAVE_OPENCL
467 call opencl_cfill(a_d, c, n, strm_)
468#else
469 call neko_error('No device backend configured')
470#endif
471 end subroutine device_cfill
472
474 subroutine device_add2(a_d, b_d, n, strm)
475 type(c_ptr) :: a_d, b_d
476 integer :: n
477 type(c_ptr), optional :: strm
478 type(c_ptr) :: strm_
479
480 if (n .lt. 1) return
481
482 if (present(strm)) then
483 strm_ = strm
484 else
485 strm_ = glb_cmd_queue
486 end if
487
488#if HAVE_HIP
489 call hip_add2(a_d, b_d, n, strm_)
490#elif HAVE_CUDA
491 call cuda_add2(a_d, b_d, n, strm_)
492#elif HAVE_OPENCL
493 call opencl_add2(a_d, b_d, n, strm_)
494#else
495 call neko_error('No device backend configured')
496#endif
497 end subroutine device_add2
498
499 subroutine device_add4(a_d, b_d, c_d, d_d, n, strm)
500 type(c_ptr) :: a_d, b_d, c_d, d_d
501 integer :: n
502 type(c_ptr), optional :: strm
503 type(c_ptr) :: strm_
504
505 if (n .lt. 1) return
506
507 if (present(strm)) then
508 strm_ = strm
509 else
510 strm_ = glb_cmd_queue
511 end if
512
513#if HAVE_HIP
514 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
515#elif HAVE_CUDA
516 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
517#elif HAVE_OPENCL
518 call opencl_add4(a_d, b_d, c_d, d_d, n, strm_)
519#else
520 call neko_error('No device backend configured')
521#endif
522 end subroutine device_add4
523
524 subroutine device_add2s1(a_d, b_d, c1, n, strm)
525 type(c_ptr) :: a_d, b_d
526 real(kind=rp) :: c1
527 integer :: n
528 type(c_ptr), optional :: strm
529 type(c_ptr) :: strm_
530
531 if (n .lt. 1) return
532
533 if (present(strm)) then
534 strm_ = strm
535 else
536 strm_ = glb_cmd_queue
537 end if
538
539#if HAVE_HIP
540 call hip_add2s1(a_d, b_d, c1, n, strm_)
541#elif HAVE_CUDA
542 call cuda_add2s1(a_d, b_d, c1, n, strm_)
543#elif HAVE_OPENCL
544 call opencl_add2s1(a_d, b_d, c1, n, strm_)
545#else
546 call neko_error('No device backend configured')
547#endif
548 end subroutine device_add2s1
549
552 subroutine device_add2s2(a_d, b_d, c1, n, strm)
553 type(c_ptr) :: a_d, b_d
554 real(kind=rp) :: c1
555 integer :: n
556 type(c_ptr), optional :: strm
557 type(c_ptr) :: strm_
558
559 if (n .lt. 1) return
560
561 if (present(strm)) then
562 strm_ = strm
563 else
564 strm_ = glb_cmd_queue
565 end if
566
567#if HAVE_HIP
568 call hip_add2s2(a_d, b_d, c1, n, strm_)
569#elif HAVE_CUDA
570 call cuda_add2s2(a_d, b_d, c1, n, strm_)
571#elif HAVE_OPENCL
572 call opencl_add2s2(a_d, b_d, c1, n, strm_)
573#else
574 call neko_error('No device backend configured')
575#endif
576 end subroutine device_add2s2
577
579 subroutine device_addsqr2s2(a_d, b_d, c1, n, strm)
580 type(c_ptr) :: a_d, b_d
581 real(kind=rp) :: c1
582 integer :: n
583 type(c_ptr), optional :: strm
584 type(c_ptr) :: strm_
585
586 if (n .lt. 1) return
587
588 if (present(strm)) then
589 strm_ = strm
590 else
591 strm_ = glb_cmd_queue
592 end if
593
594#if HAVE_HIP
595 call hip_addsqr2s2(a_d, b_d, c1, n, strm_)
596#elif HAVE_CUDA
597 call cuda_addsqr2s2(a_d, b_d, c1, n, strm_)
598#elif HAVE_OPENCL
599 call opencl_addsqr2s2(a_d, b_d, c1, n, strm_)
600#else
601 call neko_error('No device backend configured')
602#endif
603 end subroutine device_addsqr2s2
604
606 subroutine device_add3(a_d, b_d, c_d, n, strm)
607 type(c_ptr) :: a_d, b_d, c_d
608 integer :: n
609 type(c_ptr), optional :: strm
610 type(c_ptr) :: strm_
611
612 if (n .lt. 1) return
613
614 if (present(strm)) then
615 strm_ = strm
616 else
617 strm_ = glb_cmd_queue
618 end if
619
620#if HAVE_HIP
621 call hip_add3(a_d, b_d, c_d, n, strm_)
622#elif HAVE_CUDA
623 call cuda_add3(a_d, b_d, c_d, n, strm_)
624#elif HAVE_OPENCL
625 call opencl_add3(a_d, b_d, c_d, n, strm_)
626#else
627 call neko_error('No device backend configured')
628#endif
629 end subroutine device_add3
630
632 subroutine device_add3s2(a_d, b_d, c_d, c1, c2 , n, strm)
633 type(c_ptr) :: a_d, b_d, c_d
634 real(kind=rp) :: c1, c2
635 integer :: n
636 type(c_ptr), optional :: strm
637 type(c_ptr) :: strm_
638
639 if (n .lt. 1) return
640
641 if (present(strm)) then
642 strm_ = strm
643 else
644 strm_ = glb_cmd_queue
645 end if
646
647#if HAVE_HIP
648 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
649#elif HAVE_CUDA
650 call cuda_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
651#elif HAVE_OPENCL
652 call opencl_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
653#else
654 call neko_error('No device backend configured')
655#endif
656 end subroutine device_add3s2
657
659 subroutine device_invcol1(a_d, n, strm)
660 type(c_ptr) :: a_d
661 integer :: n
662 type(c_ptr), optional :: strm
663 type(c_ptr) :: strm_
664
665 if (n .lt. 1) return
666
667 if (present(strm)) then
668 strm_ = strm
669 else
670 strm_ = glb_cmd_queue
671 end if
672
673#if HAVE_HIP
674 call hip_invcol1(a_d, n, strm_)
675#elif HAVE_CUDA
676 call cuda_invcol1(a_d, n, strm_)
677#elif HAVE_OPENCL
678 call opencl_invcol1(a_d, n, strm_)
679#else
680 call neko_error('No device backend configured')
681#endif
682 end subroutine device_invcol1
683
685 subroutine device_invcol2(a_d, b_d, n, strm)
686 type(c_ptr) :: a_d, b_d
687 integer :: n
688 type(c_ptr), optional :: strm
689 type(c_ptr) :: strm_
690
691 if (n .lt. 1) return
692
693 if (present(strm)) then
694 strm_ = strm
695 else
696 strm_ = glb_cmd_queue
697 end if
698
699#if HAVE_HIP
700 call hip_invcol2(a_d, b_d, n, strm_)
701#elif HAVE_CUDA
702 call cuda_invcol2(a_d, b_d, n, strm_)
703#elif HAVE_OPENCL
704 call opencl_invcol2(a_d, b_d, n, strm_)
705#else
706 call neko_error('No device backend configured')
707#endif
708 end subroutine device_invcol2
709
711 subroutine device_invcol3(a_d, b_d, c_d, n, strm)
712 type(c_ptr) :: a_d, b_d, c_d
713 integer :: n
714 type(c_ptr), optional :: strm
715 type(c_ptr) :: strm_
716
717 if (present(strm)) then
718 strm_ = strm
719 else
720 strm_ = glb_cmd_queue
721 end if
722
723#ifdef HAVE_HIP
724 call hip_invcol3(a_d, b_d, c_d, n, strm_)
725#elif HAVE_CUDA
726 call cuda_invcol3(a_d, b_d, c_d, n, strm_)
727#elif HAVE_OPENCL
728 ! call opencl_invcol3(a_d, b_d, c_d, n)
729 call neko_error('opencl_invcol3 not implemented')
730#else
731 call neko_error('No device backend configured')
732#endif
733 end subroutine device_invcol3
734
736 subroutine device_col2(a_d, b_d, n, strm)
737 type(c_ptr) :: a_d, b_d
738 integer :: n
739 type(c_ptr), optional :: strm
740 type(c_ptr) :: strm_
741
742 if (present(strm)) then
743 strm_ = strm
744 else
745 strm_ = glb_cmd_queue
746 end if
747
748 if (n .lt. 1) return
749#if HAVE_HIP
750 call hip_col2(a_d, b_d, n, strm_)
751#elif HAVE_CUDA
752 call cuda_col2(a_d, b_d, n, strm_)
753#elif HAVE_OPENCL
754 call opencl_col2(a_d, b_d, n, strm_)
755#else
756 call neko_error('No device backend configured')
757#endif
758 end subroutine device_col2
759
761 subroutine device_col3(a_d, b_d, c_d, n, strm)
762 type(c_ptr) :: a_d, b_d, c_d
763 integer :: n
764 type(c_ptr), optional :: strm
765 type(c_ptr) :: strm_
766
767 if (n .lt. 1) return
768
769 if (present(strm)) then
770 strm_ = strm
771 else
772 strm_ = glb_cmd_queue
773 end if
774
775#if HAVE_HIP
776 call hip_col3(a_d, b_d, c_d, n, strm_)
777#elif HAVE_CUDA
778 call cuda_col3(a_d, b_d, c_d, n, strm_)
779#elif HAVE_OPENCL
780 call opencl_col3(a_d, b_d, c_d, n, strm_)
781#else
782 call neko_error('No device backend configured')
783#endif
784 end subroutine device_col3
785
787 subroutine device_subcol3(a_d, b_d, c_d, n, strm)
788 type(c_ptr) :: a_d, b_d, c_d
789 integer :: n
790 type(c_ptr), optional :: strm
791 type(c_ptr) :: strm_
792
793 if (n .lt. 1) return
794
795 if (present(strm)) then
796 strm_ = strm
797 else
798 strm_ = glb_cmd_queue
799 end if
800
801#if HAVE_HIP
802 call hip_subcol3(a_d, b_d, c_d, n, strm_)
803#elif HAVE_CUDA
804 call cuda_subcol3(a_d, b_d, c_d, n, strm_)
805#elif HAVE_OPENCL
806 call opencl_subcol3(a_d, b_d, c_d, n, strm_)
807#else
808 call neko_error('No device backend configured')
809#endif
810 end subroutine device_subcol3
811
813 subroutine device_sub2(a_d, b_d, n, strm)
814 type(c_ptr) :: a_d, b_d
815 integer :: n
816 type(c_ptr), optional :: strm
817 type(c_ptr) :: strm_
818
819 if (n .lt. 1) return
820
821 if (present(strm)) then
822 strm_ = strm
823 else
824 strm_ = glb_cmd_queue
825 end if
826
827#if HAVE_HIP
828 call hip_sub2(a_d, b_d, n, strm_)
829#elif HAVE_CUDA
830 call cuda_sub2(a_d, b_d, n, strm_)
831#elif HAVE_OPENCL
832 call opencl_sub2(a_d, b_d, n, strm_)
833#else
834 call neko_error('No device backend configured')
835#endif
836 end subroutine device_sub2
837
839 subroutine device_sub3(a_d, b_d, c_d, n, strm)
840 type(c_ptr) :: a_d, b_d, c_d
841 integer :: n
842 type(c_ptr), optional :: strm
843 type(c_ptr) :: strm_
844
845 if (n .lt. 1) return
846
847 if (present(strm)) then
848 strm_ = strm
849 else
850 strm_ = glb_cmd_queue
851 end if
852
853#if HAVE_HIP
854 call hip_sub3(a_d, b_d, c_d, n, strm_)
855#elif HAVE_CUDA
856 call cuda_sub3(a_d, b_d, c_d, n, strm_)
857#elif HAVE_OPENCL
858 call opencl_sub3(a_d, b_d, c_d, n, strm_)
859#else
860 call neko_error('No device backend configured')
861#endif
862 end subroutine device_sub3
863
865 subroutine device_addcol3(a_d, b_d, c_d, n, strm)
866 type(c_ptr) :: a_d, b_d, c_d
867 integer :: n
868 type(c_ptr), optional :: strm
869 type(c_ptr) :: strm_
870
871 if (n .lt. 1) return
872
873 if (present(strm)) then
874 strm_ = strm
875 else
876 strm_ = glb_cmd_queue
877 end if
878
879#if HAVE_HIP
880 call hip_addcol3(a_d, b_d, c_d, n, strm_)
881#elif HAVE_CUDA
882 call cuda_addcol3(a_d, b_d, c_d, n, strm_)
883#elif HAVE_OPENCL
884 call opencl_addcol3(a_d, b_d, c_d, n, strm_)
885#else
886 call neko_error('No device backend configured')
887#endif
888 end subroutine device_addcol3
889
891 subroutine device_addcol4(a_d, b_d, c_d, d_d, n, strm)
892 type(c_ptr) :: a_d, b_d, c_d, d_d
893 integer :: n
894 type(c_ptr), optional :: strm
895 type(c_ptr) :: strm_
896
897 if (n .lt. 1) return
898
899 if (present(strm)) then
900 strm_ = strm
901 else
902 strm_ = glb_cmd_queue
903 end if
904
905#if HAVE_HIP
906 call hip_addcol4(a_d, b_d, c_d, d_d, n, strm_)
907#elif HAVE_CUDA
908 call cuda_addcol4(a_d, b_d, c_d, d_d, n, strm_)
909#elif HAVE_OPENCL
910 call opencl_addcol4(a_d, b_d, c_d, d_d, n, strm_)
911#else
912 call neko_error('No device backend configured')
913#endif
914 end subroutine device_addcol4
915
918 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
919 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
920 integer :: n
921 type(c_ptr), optional :: strm
922 type(c_ptr) :: strm_
923
924 if (n .lt. 1) return
925
926 if (present(strm)) then
927 strm_ = strm
928 else
929 strm_ = glb_cmd_queue
930 end if
931
932#if HAVE_HIP
933 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
934#elif HAVE_CUDA
935 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
936#elif HAVE_OPENCL
937 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
938#else
939 call neko_error('No device backend configured')
940#endif
941 end subroutine device_vdot3
942
945 subroutine device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
946 w1_d, w2_d, w3_d, n, strm)
947 type(c_ptr) :: u1_d, u2_d, u3_d
948 type(c_ptr) :: v1_d, v2_d, v3_d
949 type(c_ptr) :: w1_d, w2_d, w3_d
950 integer :: n
951 type(c_ptr), optional :: strm
952 type(c_ptr) :: strm_
953
954 if (n .lt. 1) return
955
956 if (present(strm)) then
957 strm_ = strm
958 else
959 strm_ = glb_cmd_queue
960 end if
961
962#if HAVE_HIP
963 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
964 w1_d, w2_d, w3_d, n, strm_)
965#elif HAVE_CUDA
966 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
967 w1_d, w2_d, w3_d, n, strm_)
968#elif HAVE_OPENCL
969 call opencl_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
970 w1_d, w2_d, w3_d, n, strm_)
971#else
972 call neko_error('No device backend configured')
973#endif
974 end subroutine device_vcross
975
976
978 function device_vlsc3(u_d, v_d, w_d, n, strm) result(res)
979 type(c_ptr) :: u_d, v_d, w_d
980 integer :: n
981 type(c_ptr), optional :: strm
982 type(c_ptr) :: strm_
983 real(kind=rp) :: res
984
985 res = 0.0_rp
986
987 if (n .lt. 1) return
988
989 if (present(strm)) then
990 strm_ = strm
991 else
992 strm_ = glb_cmd_queue
993 end if
994
995#if HAVE_HIP
996 res = hip_vlsc3(u_d, v_d, w_d, n, strm_)
997#elif HAVE_CUDA
998 res = cuda_vlsc3(u_d, v_d, w_d, n, strm_)
999#elif HAVE_OPENCL
1000 ! Same kernel as glsc3 (currently no device MPI for OpenCL)
1001 res = opencl_glsc3(u_d, v_d, w_d, n, strm_)
1002#else
1003 call neko_error('No device backend configured')
1004#endif
1005 end function device_vlsc3
1006
1008 function device_glsc3(a_d, b_d, c_d, n, strm) result(res)
1009 type(c_ptr) :: a_d, b_d, c_d
1010 integer :: n, ierr
1011 type(c_ptr), optional :: strm
1012 type(c_ptr) :: strm_
1013 real(kind=rp) :: res
1014
1015 if (present(strm)) then
1016 strm_ = strm
1017 else
1018 strm_ = glb_cmd_queue
1019 end if
1020
1021 res = 0.0_rp
1022#if HAVE_HIP
1023 res = hip_glsc3(a_d, b_d, c_d, n, strm_)
1024#elif HAVE_CUDA
1025 res = cuda_glsc3(a_d, b_d, c_d, n, strm_)
1026#elif HAVE_OPENCL
1027 res = opencl_glsc3(a_d, b_d, c_d, n, strm_)
1028#else
1029 call neko_error('No device backend configured')
1030#endif
1031
1032#ifndef HAVE_DEVICE_MPI
1033 if (pe_size .gt. 1) then
1034 call mpi_allreduce(mpi_in_place, res, 1, &
1035 mpi_real_precision, mpi_sum, neko_comm, ierr)
1036 end if
1037#endif
1038 end function device_glsc3
1039
1040 subroutine device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
1041 type(c_ptr), value :: w_d, v_d_d, mult_d
1042 integer(c_int) :: j, n
1043 real(c_rp) :: h(j)
1044 type(c_ptr), optional :: strm
1045 type(c_ptr) :: strm_
1046 integer :: ierr
1047
1048 if (present(strm)) then
1049 strm_ = strm
1050 else
1051 strm_ = glb_cmd_queue
1052 end if
1053
1054#if HAVE_HIP
1055 call hip_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm_)
1056#elif HAVE_CUDA
1057 call cuda_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm_)
1058#elif HAVE_OPENCL
1059 call opencl_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm_)
1060#else
1061 call neko_error('No device backend configured')
1062#endif
1063
1064#ifndef HAVE_DEVICE_MPI
1065 if (pe_size .gt. 1) then
1066 call mpi_allreduce(mpi_in_place, h, j, &
1067 mpi_real_precision, mpi_sum, neko_comm, ierr)
1068 end if
1069#endif
1070 end subroutine device_glsc3_many
1071
1072 subroutine device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
1073 type(c_ptr), value :: y_d, x_d_d, a_d
1074 integer(c_int) :: j, n
1075 type(c_ptr), optional :: strm
1076 type(c_ptr) :: strm_
1077
1078 if (n .lt. 1) return
1079
1080 if (present(strm)) then
1081 strm_ = strm
1082 else
1083 strm_ = glb_cmd_queue
1084 end if
1085
1086#if HAVE_HIP
1087 call hip_add2s2_many(y_d, x_d_d, a_d, j, n, strm_)
1088#elif HAVE_CUDA
1089 call cuda_add2s2_many(y_d, x_d_d, a_d, j, n, strm_)
1090#elif HAVE_OPENCL
1091 call opencl_add2s2_many(y_d, x_d_d, a_d, j, n, strm_)
1092#else
1093 call neko_error('No device backend configured')
1094#endif
1095 end subroutine device_add2s2_many
1096
1098 function device_glsc2(a_d, b_d, n, strm) result(res)
1099 type(c_ptr) :: a_d, b_d
1100 integer :: n, ierr
1101 real(kind=rp) :: res
1102 type(c_ptr), optional :: strm
1103 type(c_ptr) :: strm_
1104
1105 if (present(strm)) then
1106 strm_ = strm
1107 else
1108 strm_ = glb_cmd_queue
1109 end if
1110
1111 res = 0.0_rp
1112#if HAVE_HIP
1113 res = hip_glsc2(a_d, b_d, n, strm_)
1114#elif HAVE_CUDA
1115 res = cuda_glsc2(a_d, b_d, n, strm_)
1116#elif HAVE_OPENCL
1117 res = opencl_glsc2(a_d, b_d, n, strm_)
1118#else
1119 call neko_error('No device backend configured')
1120#endif
1121
1122#ifndef HAVE_DEVICE_MPI
1123 if (pe_size .gt. 1) then
1124 call mpi_allreduce(mpi_in_place, res, 1, &
1125 mpi_real_precision, mpi_sum, neko_comm, ierr)
1126 end if
1127#endif
1128 end function device_glsc2
1129
1132 function device_glsubnorm(a_d, b_d, n, strm) result(res)
1133 type(c_ptr), intent(in) :: a_d, b_d
1134 integer, intent(in) :: n
1135 integer :: ierr
1136 real(kind=rp) :: res
1137 type(c_ptr), optional :: strm
1138 type(c_ptr) :: strm_
1139
1140 if (present(strm)) then
1141 strm_ = strm
1142 else
1143 strm_ = glb_cmd_queue
1144 end if
1145
1146 res = 0.0_rp
1147#if HAVE_HIP
1148 res = hip_glsubnorm2(a_d, b_d, n, strm_)
1149#elif HAVE_CUDA
1150 res = cuda_glsubnorm2(a_d, b_d, n, strm_)
1151#elif HAVE_OPENCL
1152 res = opencl_glsubnorm2(a_d, b_d, n, strm_)
1153#else
1154 call neko_error('No device backend configured')
1155#endif
1156
1157#ifndef HAVE_DEVICE_MPI
1158 if (pe_size .gt. 1) then
1159 call mpi_allreduce(mpi_in_place, res, 1, &
1160 mpi_real_precision, mpi_sum, neko_comm, ierr)
1161 end if
1162#endif
1163
1164 res = sqrt(res)
1165 end function device_glsubnorm
1166
1168 function device_glsum(a_d, n, strm) result(res)
1169 type(c_ptr) :: a_d
1170 integer :: n, ierr
1171 real(kind=rp) :: res
1172 type(c_ptr), optional :: strm
1173 type(c_ptr) :: strm_
1174
1175 if (present(strm)) then
1176 strm_ = strm
1177 else
1178 strm_ = glb_cmd_queue
1179 end if
1180
1181 res = 0.0_rp
1182#if HAVE_HIP
1183 res = hip_glsum(a_d, n, strm_)
1184#elif HAVE_CUDA
1185 res = cuda_glsum(a_d, n, strm_)
1186#elif HAVE_OPENCL
1187 res = opencl_glsum(a_d, n, strm_)
1188#else
1189 call neko_error('No device backend configured')
1190#endif
1191
1192#ifndef HAVE_DEVICE_MPI
1193 if (pe_size .gt. 1) then
1194 call mpi_allreduce(mpi_in_place, res, 1, &
1195 mpi_real_precision, mpi_sum, neko_comm, ierr)
1196 end if
1197#endif
1198 end function device_glsum
1199
1200 subroutine device_absval(a_d, n, strm)
1201 integer, intent(in) :: n
1202 type(c_ptr) :: a_d
1203 type(c_ptr), optional :: strm
1204 type(c_ptr) :: strm_
1205
1206 if (n .lt. 1) return
1207
1208 if (present(strm)) then
1209 strm_ = strm
1210 else
1211 strm_ = glb_cmd_queue
1212 end if
1213
1214#ifdef HAVE_HIP
1215 call hip_absval(a_d, n, strm_)
1216#elif HAVE_CUDA
1217 call cuda_absval(a_d, n, strm_)
1218#elif HAVE_OPENCL
1219 call neko_error('OPENCL is not implemented for device_absval')
1220#else
1221 call neko_error('No device backend configured')
1222#endif
1223
1224 end subroutine device_absval
1225
1226 ! ========================================================================== !
1227 ! Device point-wise max
1228
1231 subroutine device_pwmax_vec2(a_d, b_d, n, strm)
1232 type(c_ptr) :: a_d, b_d
1233 integer :: n
1234 type(c_ptr), optional :: strm
1235 type(c_ptr) :: strm_
1236
1237 if (n .lt. 1) return
1238
1239 if (present(strm)) then
1240 strm_ = strm
1241 else
1242 strm_ = glb_cmd_queue
1243 end if
1244
1245#if HAVE_HIP
1246 call hip_pwmax_vec2(a_d, b_d, n, strm_)
1247#elif HAVE_CUDA
1248 call cuda_pwmax_vec2(a_d, b_d, n, strm_)
1249#elif HAVE_OPENCL
1250 call neko_error('No OpenCL backend for device_pwmax_vec2')
1251#else
1252 call neko_error('No device backend configured')
1253#endif
1254 end subroutine device_pwmax_vec2
1255
1258 subroutine device_pwmax_vec3(a_d, b_d, c_d, n, strm)
1259 type(c_ptr) :: a_d, b_d, c_d
1260 integer :: n
1261 type(c_ptr), optional :: strm
1262 type(c_ptr) :: strm_
1263
1264 if (n .lt. 1) return
1265
1266 if (present(strm)) then
1267 strm_ = strm
1268 else
1269 strm_ = glb_cmd_queue
1270 end if
1271
1272#if HAVE_HIP
1273 call hip_pwmax_vec3(a_d, b_d, c_d, n, strm_)
1274#elif HAVE_CUDA
1275 call cuda_pwmax_vec3(a_d, b_d, c_d, n, strm_)
1276#elif HAVE_OPENCL
1277 call neko_error('No OpenCL backend for device_pwmax_vec3')
1278#else
1279 call neko_error('No device backend configured')
1280#endif
1281
1282 end subroutine device_pwmax_vec3
1283
1286 subroutine device_pwmax_sca2(a_d, c, n, strm)
1287 type(c_ptr) :: a_d
1288 real(kind=rp), intent(in) :: c
1289 integer :: n
1290 type(c_ptr), optional :: strm
1291 type(c_ptr) :: strm_
1292
1293 if (n .lt. 1) return
1294
1295 if (present(strm)) then
1296 strm_ = strm
1297 else
1298 strm_ = glb_cmd_queue
1299 end if
1300
1301#if HAVE_HIP
1302 call hip_pwmax_sca2(a_d, c, n, strm_)
1303#elif HAVE_CUDA
1304 call cuda_pwmax_sca2(a_d, c, n, strm_)
1305#elif HAVE_OPENCL
1306 call neko_error('No OpenCL backend for device_pwmax_sca2')
1307#else
1308 call neko_error('No device backend configured')
1309#endif
1310
1311 end subroutine device_pwmax_sca2
1312
1315 subroutine device_pwmax_sca3(a_d, b_d, c, n, strm)
1316 type(c_ptr) :: a_d, b_d
1317 real(kind=rp), intent(in) :: c
1318 integer :: n
1319 type(c_ptr), optional :: strm
1320 type(c_ptr) :: strm_
1321
1322 if (n .lt. 1) return
1323
1324 if (present(strm)) then
1325 strm_ = strm
1326 else
1327 strm_ = glb_cmd_queue
1328 end if
1329
1330#if HAVE_HIP
1331 call hip_pwmax_sca3(a_d, b_d, c, n, strm_)
1332#elif HAVE_CUDA
1333 call cuda_pwmax_sca3(a_d, b_d, c, n, strm_)
1334#elif HAVE_OPENCL
1335 call neko_error('No OpenCL backend for device_pwmax_sca3')
1336#else
1337 call neko_error('No device backend configured')
1338#endif
1339
1340 end subroutine device_pwmax_sca3
1341
1342 ! ========================================================================== !
1343 ! Device point-wise min
1344
1347 subroutine device_pwmin_vec2(a_d, b_d, n, strm)
1348 type(c_ptr) :: a_d, b_d
1349 integer :: n
1350 type(c_ptr), optional :: strm
1351 type(c_ptr) :: strm_
1352
1353 if (n .lt. 1) return
1354
1355 if (present(strm)) then
1356 strm_ = strm
1357 else
1358 strm_ = glb_cmd_queue
1359 end if
1360
1361#if HAVE_HIP
1362 call hip_pwmin_vec2(a_d, b_d, n, strm_)
1363#elif HAVE_CUDA
1364 call cuda_pwmin_vec2(a_d, b_d, n, strm_)
1365#elif HAVE_OPENCL
1366 call neko_error('No OpenCL backend for device_pwmin_vec2')
1367#else
1368 call neko_error('No device backend configured')
1369#endif
1370 end subroutine device_pwmin_vec2
1371
1374 subroutine device_pwmin_vec3(a_d, b_d, c_d, n, strm)
1375 type(c_ptr) :: a_d, b_d, c_d
1376 integer :: n
1377 type(c_ptr), optional :: strm
1378 type(c_ptr) :: strm_
1379
1380 if (n .lt. 1) return
1381
1382 if (present(strm)) then
1383 strm_ = strm
1384 else
1385 strm_ = glb_cmd_queue
1386 end if
1387
1388#if HAVE_HIP
1389 call hip_pwmin_vec3(a_d, b_d, c_d, n, strm_)
1390#elif HAVE_CUDA
1391 call cuda_pwmin_vec3(a_d, b_d, c_d, n, strm_)
1392#elif HAVE_OPENCL
1393 call neko_error('No OpenCL backend for device_pwmin_vec3')
1394#else
1395 call neko_error('No device backend configured')
1396#endif
1397
1398 end subroutine device_pwmin_vec3
1399
1402 subroutine device_pwmin_sca2(a_d, c, n, strm)
1403 type(c_ptr) :: a_d
1404 real(kind=rp), intent(in) :: c
1405 integer :: n
1406 type(c_ptr), optional :: strm
1407 type(c_ptr) :: strm_
1408
1409 if (n .lt. 1) return
1410
1411 if (present(strm)) then
1412 strm_ = strm
1413 else
1414 strm_ = glb_cmd_queue
1415 end if
1416
1417#if HAVE_HIP
1418 call hip_pwmin_sca2(a_d, c, n, strm_)
1419#elif HAVE_CUDA
1420 call cuda_pwmin_sca2(a_d, c, n, strm_)
1421#elif HAVE_OPENCL
1422 call neko_error('No OpenCL backend for device_pwmin_sca2')
1423#else
1424 call neko_error('No device backend configured')
1425#endif
1426
1427 end subroutine device_pwmin_sca2
1428
1431 subroutine device_pwmin_sca3(a_d, b_d, c, n, strm)
1432 type(c_ptr) :: a_d, b_d
1433 real(kind=rp), intent(in) :: c
1434 integer :: n
1435 type(c_ptr), optional :: strm
1436 type(c_ptr) :: strm_
1437
1438 if (n .lt. 1) return
1439
1440 if (present(strm)) then
1441 strm_ = strm
1442 else
1443 strm_ = glb_cmd_queue
1444 end if
1445
1446#if HAVE_HIP
1447 call hip_pwmin_sca3(a_d, b_d, c, n, strm_)
1448#elif HAVE_CUDA
1449 call cuda_pwmin_sca3(a_d, b_d, c, n, strm_)
1450#elif HAVE_OPENCL
1451 call neko_error('No OpenCL backend for device_pwmin_sca3')
1452#else
1453 call neko_error('No device backend configured')
1454#endif
1455
1456 end subroutine device_pwmin_sca3
1457
1458 ! ========================================================================== !
1459 ! Integer operations
1460
1462 subroutine device_iadd(a_d, c, n, strm)
1463 type(c_ptr), intent(inout) :: a_d
1464 integer, intent(in) :: c
1465 integer, intent(in) :: n
1466 type(c_ptr), optional :: strm
1467 type(c_ptr) :: strm_
1468 if (n .lt. 1) return
1469
1470 if (present(strm)) then
1471 strm_ = strm
1472 else
1473 strm_ = glb_cmd_queue
1474 end if
1475
1476#if HAVE_HIP
1477 call hip_iadd(a_d, c, n, strm_)
1478#elif HAVE_CUDA
1479 call cuda_iadd(a_d, c, n, strm_)
1480#elif HAVE_OPENCL
1481 call opencl_iadd(a_d, c, n, strm_)
1482#else
1483 call neko_error('No device backend configured')
1484#endif
1485 end subroutine device_iadd
1486
1487
1488end module device_math
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
integer, public pe_size
MPI size of communicator.
Definition comm.F90:58
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
subroutine device_pwmax_sca2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine device_pwmin_sca2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine device_pwmin_sca3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine device_pwmax_vec3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine device_pwmax_vec2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine device_pwmin_vec2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
subroutine device_pwmax_sca3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35