Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.hip
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include <hip/hip_runtime.h>
37#include <device/hip/check.h>
38#include "math_kernel.h"
39
40extern "C" {
41
44
45#ifdef HAVE_RCCL
48#endif
49
53 void hip_copy(void *a, void *b, int *n, hipStream_t strm) {
54 HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
56 }
57
61 void hip_masked_copy(void *a, void *b, void *mask,
62 int *n, int *m, hipStream_t strm) {
63
64 const dim3 nthrds(1024, 1, 1);
65 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
66
68 nblcks, nthrds, 0, strm, (real *) a,
69 (real *) b, (int *) mask, *n, *m);
70
72
73 }
74
78 void hip_masked_gather_copy(void *a, void *b, void *mask,
79 int *n, int *m, hipStream_t strm) {
80
81 const dim3 nthrds(1024, 1, 1);
82 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
83
85 nblcks, nthrds, 0, strm, (real *) a,
86 (real *) b, (int *) mask, *n, *m);
88
89 }
90
94 void hip_masked_gather_copy_aligned(void *a, void *b, void *mask,
95 int *n, int *m, hipStream_t strm) {
96
97 const dim3 nthrds(1024, 1, 1);
98 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
99
101 nblcks, nthrds, 0, strm, (real *) a,
102 (real *) b, (int *) mask, *n, *m);
104
105 }
106
107
111 void hip_masked_scatter_copy(void *a, void *b, void *mask,
112 int *n, int *m, hipStream_t strm) {
113
114 const dim3 nthrds(1024, 1, 1);
115 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
116
118 nblcks, nthrds, 0, strm, (real *) a,
119 (real *) b, (int *) mask, *n, *m);
120
122
123 }
124
128 void hip_masked_atomic_reduction(void *a, void *b, void *mask,
129 int *n, int *m, hipStream_t strm) {
130
131 const dim3 nthrds(1024, 1, 1);
132 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
133
135 nblcks, nthrds, 0, strm, (real *) a,
136 (real *) b, (int *) mask, *n, *m);
137
139
140 }
141
145 void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size,
147
148 const dim3 nthrds(1024, 1, 1);
149 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
150
152 nblcks, nthrds, 0, strm, (real*)a,
153 *c, *size, (int*)mask, *mask_size);
154
156 }
157
161 void hip_rzero(void *a, int *n, hipStream_t strm) {
162 HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real), strm));
163 }
164
168 void hip_cmult(void *a, real *c, int *n, hipStream_t strm) {
169
170 const dim3 nthrds(1024, 1, 1);
171 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
172
174 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
176
177 }
178
182 void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm) {
183
184 const dim3 nthrds(1024, 1, 1);
185 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
186
188 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
190
191 }
192
196 void hip_cdiv(void *a, real *c, int *n, hipStream_t strm) {
197
198 const dim3 nthrds(1024, 1, 1);
199 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
200
202 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
204
205 }
206
210 void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm) {
211
212 const dim3 nthrds(1024, 1, 1);
213 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
214
216 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
218
219 }
220
224 void hip_radd(void *a, real *c, int *n, hipStream_t strm) {
225
226 const dim3 nthrds(1024, 1, 1);
227 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
228
230 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
232 }
233
238 void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm) {
239
240 const dim3 nthrds(1024, 1, 1);
241 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
242
244 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *c, *n);
246 }
247
251 void hip_cfill(void *a, real *c, int *n, hipStream_t strm) {
252
253 const dim3 nthrds(1024, 1, 1);
254 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
255
256 if (*n > 0) {
258 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
260 }
261
262 }
263
268 void hip_add2(void *a, void *b, int *n, hipStream_t strm) {
269
270 const dim3 nthrds(1024, 1, 1);
271 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
272
274 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
276
277 }
278
283 void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm) {
284
285 const dim3 nthrds(1024, 1, 1);
286 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
287
289 nblcks, nthrds, 0, strm, (real *) a,
290 (real *) b, (real *) c, *n);
292 }
293
298 void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm) {
299
300 const dim3 nthrds(1024, 1, 1);
301 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
302
304 nblcks, nthrds, 0, strm, (real *) a,
305 (real *) b, (real *) c, (real *) d, *n);
307 }
308
314 void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm) {
315
316 const dim3 nthrds(1024, 1, 1);
317 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
318
320 nblcks, nthrds, 0, strm, (real *) a,
321 (real *) b, *c1, *n);
323 }
324
330 void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
331
332 const dim3 nthrds(1024, 1, 1);
333 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
334
336 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
337 *c1, *n);
339 }
340
347 void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n,
349
350 const dim3 nthrds(1024, 1, 1);
351 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
352
354 nblcks, nthrds, 0, strm, (real *) x,
355 (const real **) p, (real *) alpha, *j, *n);
357
358 }
359
365 void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
366
367 const dim3 nthrds(1024, 1, 1);
368 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
369
371 nblcks, nthrds, 0, strm, (real *) a,
372 (real *) b, *c1, *n);
374 }
375
381 void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n,
383
384 const dim3 nthrds(1024, 1, 1);
385 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
386
388 nblcks, nthrds, 0, strm, (real *) a,
389 (real *) b, (real *) c, *c1, *c2, *n);
391 }
392
398 void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2,
399 real *c3, int *n, hipStream_t strm) {
400
401 const dim3 nthrds(1024, 1, 1);
402 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
403
405 nblcks, nthrds, 0, strm, (real *) a,
406 (real *) b, (real *) c, (real *) d, *c1, *c2, *c3, *n);
408 }
409
415 void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1,
416 real *c2, real *c3, real *c4, int *n, hipStream_t strm) {
417
418 const dim3 nthrds(1024, 1, 1);
419 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
420
422 nblcks, nthrds, 0, strm, (real *) a,
423 (real *) b, (real *) c, (real *) d, (real *) e,
424 *c1, *c2, *c3, *c4, *n);
426 }
427
432 void hip_invcol1(void *a, int *n, hipStream_t strm) {
433
434 const dim3 nthrds(1024, 1, 1);
435 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
436
438 nblcks, nthrds, 0, strm, (real *) a, *n);
440 }
441
446 void hip_invcol2(void *a, void *b, int *n, hipStream_t strm) {
447
448 const dim3 nthrds(1024, 1, 1);
449 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
450
452 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
454 }
455
460 void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
461
462 const dim3 nthrds(1024, 1, 1);
463 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
464
466 nblcks, nthrds, 0, strm, (real *) a,
467 (real *) b, (real *) c, *n);
469 }
470
475 void hip_col2(void *a, void *b, int *n, hipStream_t strm) {
476
477 const dim3 nthrds(1024, 1, 1);
478 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
479
481 nblcks, nthrds, 0, strm, (real *) a,
482 (real *) b, *n);
484 }
485
490 void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm) {
491
492 const dim3 nthrds(1024, 1, 1);
493 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
494
496 nblcks, nthrds, 0, strm, (real *) a,
497 (real *) b, (real *) c, *n);
499 }
500
505 void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
506
507 const dim3 nthrds(1024, 1, 1);
508 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
509
511 nblcks, nthrds, 0, strm, (real *) a,
512 (real *) b, (real *) c, *n);
514 }
515
520 void hip_sub2(void *a, void *b, int *n, hipStream_t strm) {
521
522 const dim3 nthrds(1024, 1, 1);
523 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
524
526 nblcks, nthrds, 0, strm, (real *) a,
527 (real *) b, *n);
529 }
530
535 void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm) {
536
537 const dim3 nthrds(1024, 1, 1);
538 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
539
541 nblcks, nthrds, 0, strm, (real *) a,
542 (real *) b, (real *) c, *n);
544 }
545
550 void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
551
552 const dim3 nthrds(1024, 1, 1);
553 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
554
556 nblcks, nthrds, 0, strm, (real *) a,
557 (real *) b, (real *) c, *n);
559 }
560
565 void hip_addcol4(void *a, void *b, void *c, void *d, int *n,
567
568 const dim3 nthrds(1024, 1, 1);
569 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
570
572 nblcks, nthrds, 0, strm, (real *) a,
573 (real *) b, (real *) c, (real *) d, *n);
575 }
576
581 void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n,
583
584 const dim3 nthrds(1024, 1, 1);
585 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
586
588 nblcks, nthrds, 0, strm, (real *) a,
589 (real *) b, (real *) c, *s, *n);
591 }
592
597 void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
598 void *v1, void *v2, void *v3, int *n,
600
601 const dim3 nthrds(1024, 1, 1);
602 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
603
605 nblcks, nthrds, 0, strm,
606 (real *) dot, (real *) u1, (real *) u2, (real *) u3,
607 (real *) v1, (real *) v2, (real *) v3, *n);
609 }
610
615 void hip_vcross(void *u1, void *u2, void *u3,
616 void *v1, void *v2, void *v3,
617 void *w1, void *w2, void *w3,
618 int *n, hipStream_t strm) {
619
620 const dim3 nthrds(1024, 1, 1);
621 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
622
624 nblcks, nthrds, 0, strm,
625 (real *) u1, (real *) u2, (real *) u3,
626 (real *) v1, (real *) v2, (real *) v3,
627 (real *) w1, (real *) w2, (real *) w3, *n);
629 }
630
631
632 /*
633 * Reduction buffer
634 */
635 int red_s = 0;
638
640 if ( nb >= red_s) {
641 red_s = nb+1;
642 if (bufred != NULL) {
645 }
648 }
649 }
650
655 const hipStream_t stream) {
656 #ifdef HAVE_RCCL
658 DEVICE_NCCL_SUM, stream);
660 hipMemcpyDeviceToHost, stream));
662 #elif HAVE_DEVICE_MPI
665 #else
667 hipMemcpyDeviceToHost, stream));
669 #endif
670 }
671
672
677 const hipStream_t stream) {
678 #ifdef HAVE_RCCL
680 DEVICE_NCCL_MAX, stream);
682 hipMemcpyDeviceToHost, stream));
684 #elif HAVE_DEVICE_MPI
687 #else
689 hipMemcpyDeviceToHost, stream));
691 #endif
692 }
693
698 const hipStream_t stream) {
699 #ifdef HAVE_RCCL
701 DEVICE_NCCL_MIN, stream);
703 hipMemcpyDeviceToHost, stream));
705 #elif HAVE_DEVICE_MPI
708 #else
710 hipMemcpyDeviceToHost, stream));
712 #endif
713 }
714
719 real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream) {
720
721 const dim3 nthrds(1024, 1, 1);
722 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
723 const int nb = ((*n) + 1024 - 1)/ 1024;
724
726
727
729 nblcks, nthrds, 0, stream,
730 (real *) u, (real *) v,
731 (real *) w, bufred_d, *n);
734 1, 1024, 0, stream, bufred_d, nb);
736
738 hipMemcpyDeviceToHost, stream));
740
741 return bufred[0];
742 }
743
748 real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream) {
749
750 const dim3 nthrds(1024, 1, 1);
751 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
752 const int nb = ((*n) + 1024 - 1)/ 1024;
753
755
756 if (*n > 0) {
758 nblcks, nthrds, 0, stream,
759 (real *) a, (real *) b,
760 (real *) c, bufred_d, *n);
763 1, 1024, 0, stream, bufred_d, nb);
765 }
766 else {
767 hip_rzero(bufred_d, &red_s, stream);
768 }
770
771 return bufred[0];
772 }
773
778 void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n,
779 hipStream_t stream){
780 int pow2 = 1;
781 while(pow2 < (*j)){
782 pow2 = 2*pow2;
783 }
784 const int nt = 1024/pow2;
785 const dim3 nthrds(pow2, nt, 1);
786 const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
787 const dim3 nthrds_red(1024,1,1);
788 const dim3 nblcks_red( (*j),1,1);
789 const int nb = ((*n) + nt - 1)/nt;
790
792
793 if (*n > 0) {
795 nblcks, nthrds, 0, stream,
796 (const real *) w, (const real **) v,
797 (const real *)mult, bufred_d, *j, *n);
799
801 nblcks_red, nthrds_red, 0, stream,
802 bufred_d, nb, *j);
804 }
805 else {
806 hip_rzero(bufred_d, &red_s, stream);
807 }
808 hip_global_reduce_add(h, bufred_d, (*j), stream);
809 }
810
815 real hip_glsc2(void *a, void *b, int *n, hipStream_t stream) {
816
817 const dim3 nthrds(1024, 1, 1);
818 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
819 const int nb = ((*n) + 1024 - 1)/ 1024;
820
821
823
824 if( *n > 0) {
826 nblcks, nthrds, 0, stream,
827 (real *) a, (real *) b, bufred_d, *n);
830 1, 1024, 0, stream, bufred_d, nb);
832 }
833 else {
834 hip_rzero(bufred_d, &red_s, stream);
835 }
837
838 return bufred[0];
839 }
840
845 real hip_glsubnorm2(void* a, void* b, int* n, hipStream_t stream) {
846
847 const dim3 nthrds(1024, 1, 1);
848 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
849 const int nb = ((*n) + 1024 - 1) / 1024;
850
852
853 if (*n > 0) {
855 nblcks, nthrds, 0, stream,
856 (real*)a, (real*)b, bufred_d, *n);
859 1, 1024, 0, stream, bufred_d, nb);
861 }
862 else {
863 hip_rzero(bufred_d, &red_s, stream);
864 }
866
867 return bufred[0];
868 }
869
874 real hip_glsum(void *a, int *n, hipStream_t stream) {
875 const dim3 nthrds(1024, 1, 1);
876 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
877 const int nb = ((*n) + 1024 - 1)/ 1024;
878
880 if( *n > 0) {
882 nblcks, nthrds, 0, stream,
883 (real *) a, bufred_d, *n);
886 1, 1024, 0, stream, bufred_d, nb);
888 }
889 else {
890 hip_rzero(bufred_d, &red_s, stream);
891 }
892
894
895 return bufred[0];
896 }
897
898
903 real hip_glmax(void *a, real *ninf, int *n, hipStream_t stream) {
904 const dim3 nthrds(1024, 1, 1);
905 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
906 const int nb = ((*n) + 1024 - 1)/ 1024;
907
909 if( *n > 0) {
911 nblcks, nthrds, 0, stream,
912 (real *) a, *ninf, bufred_d, *n);
915 1, 1024, 0, stream, bufred_d, *ninf, nb);
917 }
918 else {
919 hip_rzero(bufred_d, &red_s, stream);
920 }
921
923
924 return bufred[0];
925 }
926
931 real hip_glmin(void *a, real *pinf, int *n, hipStream_t stream) {
932 const dim3 nthrds(1024, 1, 1);
933 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
934 const int nb = ((*n) + 1024 - 1)/ 1024;
935
937 if( *n > 0) {
939 nblcks, nthrds, 0, stream,
940 (real *) a, *pinf, bufred_d, *n);
943 1, 1024, 0, stream, bufred_d, *pinf, nb);
945 }
946 else {
947 hip_rzero(bufred_d, &red_s, stream);
948 }
949
951
952 return bufred[0];
953 }
954
959 void hip_absval(void *a, int *n, hipStream_t stream) {
960
961 const dim3 nthrds(1024, 1, 1);
962 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
963
965 nblcks, nthrds, 0, stream, (real *) a, *n);
967
968}
969
970 // ======================================================================== //
971 // Point-wise operations.
972
977 void hip_pwmax_vec2(void* a, void* b, int* n, hipStream_t stream) {
978
979 const dim3 nthrds(1024, 1, 1);
980 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
981
983 nblcks, nthrds, 0, stream, (real*)a, (real*)b, *n);
985}
986
991 void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
992
993 const dim3 nthrds(1024, 1, 1);
994 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
995
997 nblcks, nthrds, 0, stream,
998 (real *)a, (real *)b, (real *)c, *n);
1000 }
1001
1006 void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream) {
1007
1008 const dim3 nthrds(1024, 1, 1);
1009 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1010
1012 nblcks, nthrds, 0, stream, (real *)a, *c, *n);
1014 }
1015
1020 void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
1021
1022 const dim3 nthrds(1024, 1, 1);
1023 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1024
1026 nblcks, nthrds, 0, stream,
1027 (real *)a, (real *)b, *c, *n);
1029 }
1030
1035 void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream) {
1036
1037 const dim3 nthrds(1024, 1, 1);
1038 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1039
1041 nblcks, nthrds, 0, stream,
1042 (real *)a, (real *)b, *n);
1044 }
1045
1050 void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
1051
1052 const dim3 nthrds(1024, 1, 1);
1053 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1054
1056 nblcks, nthrds, 0, stream,
1057 (real *)a, (real *)b, (real *)c, *n);
1059 }
1060
1065 void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream) {
1066
1067 const dim3 nthrds(1024, 1, 1);
1068 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1069
1071 nblcks, nthrds, 0, stream,
1072 (real *)a, *c, *n);
1074 }
1075
1080 void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
1081
1082 const dim3 nthrds(1024, 1, 1);
1083 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1084
1086 nblcks, nthrds, 0, stream,
1087 (real *)a, (real *)b, *c, *n);
1089 }
1090
1091 // ======================================================================== //
1092
1096 void hip_iadd(void *a, int *c, int *n, hipStream_t stream) {
1097
1098 const dim3 nthrds(1024, 1, 1);
1099 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
1100
1102 nblcks, nthrds, 0, stream,
1103 (int *) a, *c, *n);
1105 }
1106
1107} /* extern "C" */
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
const int e
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
const int j
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
#define DEVICE_MPI_MAX
#define DEVICE_MPI_MIN
#define DEVICE_MPI_SUM
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define DEVICE_NCCL_MAX
#define DEVICE_NCCL_MIN
#define DEVICE_NCCL_SUM
void device_nccl_allreduce(void *sbuf_d, void *rbuf_d, int count, int nbytes, int op, void *stream)
#define HIP_CHECK(err)
Definition check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, hipStream_t strm)
Definition math.hip:597
void hip_global_reduce_add(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:654
void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:490
void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:365
real hip_glmax(void *a, real *ninf, int *n, hipStream_t stream)
Definition math.hip:903
void hip_rzero(void *a, int *n, hipStream_t strm)
Definition math.hip:161
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size, hipStream_t strm)
Definition math.hip:145
void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:1020
void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:460
void hip_absval(void *a, int *n, hipStream_t stream)
Definition math.hip:959
void hip_masked_gather_copy_aligned(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:94
void hip_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, hipStream_t strm)
Definition math.hip:615
real hip_glsubnorm2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:845
void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:550
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, hipStream_t strm)
Definition math.hip:381
void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1, real *c2, real *c3, real *c4, int *n, hipStream_t strm)
Definition math.hip:415
void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:330
real hip_glsum(void *a, int *n, hipStream_t stream)
Definition math.hip:874
void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:298
void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:238
real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream)
Definition math.hip:719
void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:210
void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:1065
void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:182
real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:748
void hip_invcol1(void *a, int *n, hipStream_t strm)
Definition math.hip:432
void hip_invcol2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:446
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:61
void hip_global_reduce_max(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:676
void hip_cdiv(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:196
void hip_sub2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:520
real hip_glsc2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:815
real * bufred
Definition math.hip:636
void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2, real *c3, int *n, hipStream_t strm)
Definition math.hip:398
void hip_col2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:475
real hip_glmin(void *a, real *pinf, int *n, hipStream_t stream)
Definition math.hip:931
void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:1050
void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n, hipStream_t strm)
Definition math.hip:581
void hip_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:78
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, hipStream_t stream)
Definition math.hip:778
int red_s
Definition math.hip:635
void hip_cfill(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:251
void hip_masked_atomic_reduction(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:128
void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:283
void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:991
void hip_add2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:268
void hip_copy(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:53
void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:505
void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:1006
void hip_iadd(void *a, int *c, int *n, hipStream_t stream)
Definition math.hip:1096
void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:1080
void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:314
void hip_radd(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:224
void hip_redbuf_check_alloc(int nb)
Definition math.hip:639
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n, hipStream_t strm)
Definition math.hip:347
void hip_addcol4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:565
void hip_pwmax_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:977
void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:1035
void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:535
void hip_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:111
void hip_cmult(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:168
real * bufred_d
Definition math.hip:637
void hip_global_reduce_min(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:697
Object for handling masks in Neko.
Definition mask.f90:34