Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.hip
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include <hip/hip_runtime.h>
37#include <device/hip/check.h>
38#include "math_kernel.h"
39
40extern "C" {
41
44
45#ifdef HAVE_RCCL
48#endif
49
53 void hip_copy(void *a, void *b, int *n, hipStream_t strm) {
54 HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
56 }
57
61 void hip_masked_copy(void *a, void *b, void *mask,
62 int *n, int *m, hipStream_t strm) {
63
64 const dim3 nthrds(1024, 1, 1);
65 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
66
68 nblcks, nthrds, 0, strm, (real *) a,
69 (real *) b, (int *) mask, *n, *m);
70
72
73 }
74
78 void hip_masked_gather_copy(void *a, void *b, void *mask,
79 int *n, int *m, hipStream_t strm) {
80
81 const dim3 nthrds(1024, 1, 1);
82 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
83
85 nblcks, nthrds, 0, strm, (real *) a,
86 (real *) b, (int *) mask, *n, *m);
88
89 }
90
94 void hip_masked_gather_copy_aligned(void *a, void *b, void *mask,
95 int *n, int *m, hipStream_t strm) {
96
97 const dim3 nthrds(1024, 1, 1);
98 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
99
101 nblcks, nthrds, 0, strm, (real *) a,
102 (real *) b, (int *) mask, *n, *m);
104
105 }
106
110 void hip_face_masked_gather_copy(void *a, void *b, void *mask,
111 void *facet, int *n1, int *n2, int *lx,
112 int *ly, int *lz, int *m,
114
115 const dim3 nthrds(1024, 1, 1);
116 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
117
119 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
120 (int *) mask, (int *) facet, *n1, *n2, *lx, *ly, *lz,
121 *m);
123
124 }
125
126
130 void hip_masked_scatter_copy(void *a, void *b, void *mask,
131 int *n, int *m, hipStream_t strm) {
132
133 const dim3 nthrds(1024, 1, 1);
134 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
135
137 nblcks, nthrds, 0, strm, (real *) a,
138 (real *) b, (int *) mask, *n, *m);
139
141
142 }
143
147 void hip_masked_scatter_copy_aligned(void *a, void *b, void *mask,
148 int *n, int *m, hipStream_t strm) {
149
150 const dim3 nthrds(1024, 1, 1);
151 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
152
154 nblcks, nthrds, 0, strm, (real *) a,
155 (real *) b, (int *) mask, *n, *m);
156
158
159 }
160
164 void hip_masked_atomic_reduction(void *a, void *b, void *mask,
165 int *n, int *m, hipStream_t strm) {
166
167 const dim3 nthrds(1024, 1, 1);
168 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
169
171 nblcks, nthrds, 0, strm, (real *) a,
172 (real *) b, (int *) mask, *n, *m);
173
175
176 }
177
181 void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size,
183
184 const dim3 nthrds(1024, 1, 1);
185 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
186
188 nblcks, nthrds, 0, strm, (real*)a,
189 *c, *size, (int*)mask, *mask_size);
190
192 }
193
197 void hip_rzero(void *a, int *n, hipStream_t strm) {
198 HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real), strm));
199 }
200
204 void hip_cmult(void *a, real *c, int *n, hipStream_t strm) {
205
206 const dim3 nthrds(1024, 1, 1);
207 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
208
210 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
212
213 }
214
218 void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm) {
219
220 const dim3 nthrds(1024, 1, 1);
221 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
222
224 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
226
227 }
228
232 void hip_cdiv(void *a, real *c, int *n, hipStream_t strm) {
233
234 const dim3 nthrds(1024, 1, 1);
235 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
236
238 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
240
241 }
242
246 void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm) {
247
248 const dim3 nthrds(1024, 1, 1);
249 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
250
252 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
254
255 }
256
260 void hip_radd(void *a, real *c, int *n, hipStream_t strm) {
261
262 const dim3 nthrds(1024, 1, 1);
263 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
264
266 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
268 }
269
274 void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm) {
275
276 const dim3 nthrds(1024, 1, 1);
277 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
278
280 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *c, *n);
282 }
283
288 void hip_cwrap(void *a, real *min_val, real *max_val, int *n,
290
291 const dim3 nthrds(1024, 1, 1);
292 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
293
295 nblcks, nthrds, 0, strm, (real *) a,
296 *min_val, *max_val, *n);
298 }
299
303 void hip_cfill(void *a, real *c, int *n, hipStream_t strm) {
304
305 const dim3 nthrds(1024, 1, 1);
306 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
307
308 if (*n > 0) {
310 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
312 }
313
314 }
315
320 void hip_add2(void *a, void *b, int *n, hipStream_t strm) {
321
322 const dim3 nthrds(1024, 1, 1);
323 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
324
326 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
328
329 }
330
335 void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm) {
336
337 const dim3 nthrds(1024, 1, 1);
338 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
339
341 nblcks, nthrds, 0, strm, (real *) a,
342 (real *) b, (real *) c, *n);
344 }
345
350 void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm) {
351
352 const dim3 nthrds(1024, 1, 1);
353 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
354
356 nblcks, nthrds, 0, strm, (real *) a,
357 (real *) b, (real *) c, (real *) d, *n);
359 }
360
366 void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm) {
367
368 const dim3 nthrds(1024, 1, 1);
369 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
370
372 nblcks, nthrds, 0, strm, (real *) a,
373 (real *) b, *c1, *n);
375 }
376
382 void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
383
384 const dim3 nthrds(1024, 1, 1);
385 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
386
388 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
389 *c1, *n);
391 }
392
399 void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n,
401
402 const dim3 nthrds(1024, 1, 1);
403 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
404
406 nblcks, nthrds, 0, strm, (real *) x,
407 (const real **) p, (real *) alpha, *j, *n);
409
410 }
411
417 void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
418
419 const dim3 nthrds(1024, 1, 1);
420 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
421
423 nblcks, nthrds, 0, strm, (real *) a,
424 (real *) b, *c1, *n);
426 }
427
433 void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n,
435
436 const dim3 nthrds(1024, 1, 1);
437 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
438
440 nblcks, nthrds, 0, strm, (real *) a,
441 (real *) b, (real *) c, *c1, *c2, *n);
443 }
444
450 void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2,
451 real *c3, int *n, hipStream_t strm) {
452
453 const dim3 nthrds(1024, 1, 1);
454 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
455
457 nblcks, nthrds, 0, strm, (real *) a,
458 (real *) b, (real *) c, (real *) d, *c1, *c2, *c3, *n);
460 }
461
467 void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1,
468 real *c2, real *c3, real *c4, int *n, hipStream_t strm) {
469
470 const dim3 nthrds(1024, 1, 1);
471 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
472
474 nblcks, nthrds, 0, strm, (real *) a,
475 (real *) b, (real *) c, (real *) d, (real *) e,
476 *c1, *c2, *c3, *c4, *n);
478 }
479
484 void hip_invcol1(void *a, int *n, hipStream_t strm) {
485
486 const dim3 nthrds(1024, 1, 1);
487 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
488
490 nblcks, nthrds, 0, strm, (real *) a, *n);
492 }
493
498 void hip_invcol2(void *a, void *b, int *n, hipStream_t strm) {
499
500 const dim3 nthrds(1024, 1, 1);
501 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
502
504 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
506 }
507
512 void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
513
514 const dim3 nthrds(1024, 1, 1);
515 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
516
518 nblcks, nthrds, 0, strm, (real *) a,
519 (real *) b, (real *) c, *n);
521 }
522
527 void hip_col2(void *a, void *b, int *n, hipStream_t strm) {
528
529 const dim3 nthrds(1024, 1, 1);
530 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
531
533 nblcks, nthrds, 0, strm, (real *) a,
534 (real *) b, *n);
536 }
537
542 void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm) {
543
544 const dim3 nthrds(1024, 1, 1);
545 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
546
548 nblcks, nthrds, 0, strm, (real *) a,
549 (real *) b, (real *) c, *n);
551 }
552
557 void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
558
559 const dim3 nthrds(1024, 1, 1);
560 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
561
563 nblcks, nthrds, 0, strm, (real *) a,
564 (real *) b, (real *) c, *n);
566 }
567
572 void hip_sub2(void *a, void *b, int *n, hipStream_t strm) {
573
574 const dim3 nthrds(1024, 1, 1);
575 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
576
578 nblcks, nthrds, 0, strm, (real *) a,
579 (real *) b, *n);
581 }
582
587 void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm) {
588
589 const dim3 nthrds(1024, 1, 1);
590 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
591
593 nblcks, nthrds, 0, strm, (real *) a,
594 (real *) b, (real *) c, *n);
596 }
597
602 void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
603
604 const dim3 nthrds(1024, 1, 1);
605 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
606
608 nblcks, nthrds, 0, strm, (real *) a,
609 (real *) b, (real *) c, *n);
611 }
612
617 void hip_addcol4(void *a, void *b, void *c, void *d, int *n,
619
620 const dim3 nthrds(1024, 1, 1);
621 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
622
624 nblcks, nthrds, 0, strm, (real *) a,
625 (real *) b, (real *) c, (real *) d, *n);
627 }
628
633 void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n,
635
636 const dim3 nthrds(1024, 1, 1);
637 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
638
640 nblcks, nthrds, 0, strm, (real *) a,
641 (real *) b, (real *) c, *s, *n);
643 }
644
649 void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
650 void *v1, void *v2, void *v3, int *n,
652
653 const dim3 nthrds(1024, 1, 1);
654 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
655
657 nblcks, nthrds, 0, strm,
658 (real *) dot, (real *) u1, (real *) u2, (real *) u3,
659 (real *) v1, (real *) v2, (real *) v3, *n);
661 }
662
667 void hip_vcross(void *u1, void *u2, void *u3,
668 void *v1, void *v2, void *v3,
669 void *w1, void *w2, void *w3,
670 int *n, hipStream_t strm) {
671
672 const dim3 nthrds(1024, 1, 1);
673 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
674
676 nblcks, nthrds, 0, strm,
677 (real *) u1, (real *) u2, (real *) u3,
678 (real *) v1, (real *) v2, (real *) v3,
679 (real *) w1, (real *) w2, (real *) w3, *n);
681 }
682
683
684 /*
685 * Reduction buffer
686 */
687 int red_s = 0;
690
692 if ( nb >= red_s) {
693 red_s = nb+1;
694 if (bufred != NULL) {
697 }
700 }
701 }
702
707 const hipStream_t stream) {
708 #ifdef HAVE_RCCL
710 DEVICE_NCCL_SUM, stream);
712 hipMemcpyDeviceToHost, stream));
714 #elif HAVE_DEVICE_MPI
717 #else
719 hipMemcpyDeviceToHost, stream));
721 #endif
722 }
723
724
729 const hipStream_t stream) {
730 #ifdef HAVE_RCCL
732 DEVICE_NCCL_MAX, stream);
734 hipMemcpyDeviceToHost, stream));
736 #elif HAVE_DEVICE_MPI
739 #else
741 hipMemcpyDeviceToHost, stream));
743 #endif
744 }
745
750 const hipStream_t stream) {
751 #ifdef HAVE_RCCL
753 DEVICE_NCCL_MIN, stream);
755 hipMemcpyDeviceToHost, stream));
757 #elif HAVE_DEVICE_MPI
760 #else
762 hipMemcpyDeviceToHost, stream));
764 #endif
765 }
766
771 real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream) {
772
773 const dim3 nthrds(1024, 1, 1);
774 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
775 const int nb = ((*n) + 1024 - 1)/ 1024;
776
778
779
781 nblcks, nthrds, 0, stream,
782 (real *) u, (real *) v,
783 (real *) w, bufred_d, *n);
786 1, 1024, 0, stream, bufred_d, nb);
788
790 hipMemcpyDeviceToHost, stream));
792
793 return bufred[0];
794 }
795
800 real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream) {
801
802 const dim3 nthrds(1024, 1, 1);
803 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
804 const int nb = ((*n) + 1024 - 1)/ 1024;
805
807
808 if (*n > 0) {
810 nblcks, nthrds, 0, stream,
811 (real *) a, (real *) b,
812 (real *) c, bufred_d, *n);
815 1, 1024, 0, stream, bufred_d, nb);
817 }
818 else {
819 hip_rzero(bufred_d, &red_s, stream);
820 }
822
823 return bufred[0];
824 }
825
830 void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n,
831 hipStream_t stream){
832 int pow2 = 1;
833 while(pow2 < (*j)){
834 pow2 = 2*pow2;
835 }
836 const int nt = 1024/pow2;
837 const dim3 nthrds(pow2, nt, 1);
838 const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
839 const dim3 nthrds_red(1024,1,1);
840 const dim3 nblcks_red( (*j),1,1);
841 const int nb = ((*n) + nt - 1)/nt;
842
844
845 if (*n > 0) {
847 nblcks, nthrds, 0, stream,
848 (const real *) w, (const real **) v,
849 (const real *)mult, bufred_d, *j, *n);
851
853 nblcks_red, nthrds_red, 0, stream,
854 bufred_d, nb, *j);
856 }
857 else {
858 hip_rzero(bufred_d, &red_s, stream);
859 }
860 hip_global_reduce_add(h, bufred_d, (*j), stream);
861 }
862
867 real hip_glsc2(void *a, void *b, int *n, hipStream_t stream) {
868
869 const dim3 nthrds(1024, 1, 1);
870 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
871 const int nb = ((*n) + 1024 - 1)/ 1024;
872
873
875
876 if( *n > 0) {
878 nblcks, nthrds, 0, stream,
879 (real *) a, (real *) b, bufred_d, *n);
882 1, 1024, 0, stream, bufred_d, nb);
884 }
885 else {
886 hip_rzero(bufred_d, &red_s, stream);
887 }
889
890 return bufred[0];
891 }
892
897 real hip_glsubnorm2(void* a, void* b, int* n, hipStream_t stream) {
898
899 const dim3 nthrds(1024, 1, 1);
900 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
901 const int nb = ((*n) + 1024 - 1) / 1024;
902
904
905 if (*n > 0) {
907 nblcks, nthrds, 0, stream,
908 (real*)a, (real*)b, bufred_d, *n);
911 1, 1024, 0, stream, bufred_d, nb);
913 }
914 else {
915 hip_rzero(bufred_d, &red_s, stream);
916 }
918
919 return bufred[0];
920 }
921
926 real hip_glsum(void *a, int *n, hipStream_t stream) {
927 const dim3 nthrds(1024, 1, 1);
928 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
929 const int nb = ((*n) + 1024 - 1)/ 1024;
930
932 if( *n > 0) {
934 nblcks, nthrds, 0, stream,
935 (real *) a, bufred_d, *n);
938 1, 1024, 0, stream, bufred_d, nb);
940 }
941 else {
942 hip_rzero(bufred_d, &red_s, stream);
943 }
944
946
947 return bufred[0];
948 }
949
950
955 real hip_glmax(void *a, real *ninf, int *n, hipStream_t stream) {
956 const dim3 nthrds(1024, 1, 1);
957 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
958 const int nb = ((*n) + 1024 - 1)/ 1024;
959
961 if( *n > 0) {
963 nblcks, nthrds, 0, stream,
964 (real *) a, *ninf, bufred_d, *n);
967 1, 1024, 0, stream, bufred_d, *ninf, nb);
969 }
970 else {
971 hip_rzero(bufred_d, &red_s, stream);
972 }
973
975
976 return bufred[0];
977 }
978
983 real hip_glmin(void *a, real *pinf, int *n, hipStream_t stream) {
984 const dim3 nthrds(1024, 1, 1);
985 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
986 const int nb = ((*n) + 1024 - 1)/ 1024;
987
989 if( *n > 0) {
991 nblcks, nthrds, 0, stream,
992 (real *) a, *pinf, bufred_d, *n);
995 1, 1024, 0, stream, bufred_d, *pinf, nb);
997 }
998 else {
999 hip_rzero(bufred_d, &red_s, stream);
1000 }
1001
1003
1004 return bufred[0];
1005 }
1006
1011 void hip_absval(void *a, int *n, hipStream_t stream) {
1012
1013 const dim3 nthrds(1024, 1, 1);
1014 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
1015
1017 nblcks, nthrds, 0, stream, (real *) a, *n);
1019
1020}
1021
1022 // ======================================================================== //
1023 // Point-wise operations.
1024
1029 void hip_pwmax_vec2(void* a, void* b, int* n, hipStream_t stream) {
1030
1031 const dim3 nthrds(1024, 1, 1);
1032 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1033
1035 nblcks, nthrds, 0, stream, (real*)a, (real*)b, *n);
1037}
1038
1043 void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
1044
1045 const dim3 nthrds(1024, 1, 1);
1046 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1047
1049 nblcks, nthrds, 0, stream,
1050 (real *)a, (real *)b, (real *)c, *n);
1052 }
1053
1058 void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream) {
1059
1060 const dim3 nthrds(1024, 1, 1);
1061 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1062
1064 nblcks, nthrds, 0, stream, (real *)a, *c, *n);
1066 }
1067
1072 void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
1073
1074 const dim3 nthrds(1024, 1, 1);
1075 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1076
1078 nblcks, nthrds, 0, stream,
1079 (real *)a, (real *)b, *c, *n);
1081 }
1082
1087 void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream) {
1088
1089 const dim3 nthrds(1024, 1, 1);
1090 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1091
1093 nblcks, nthrds, 0, stream,
1094 (real *)a, (real *)b, *n);
1096 }
1097
1102 void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
1103
1104 const dim3 nthrds(1024, 1, 1);
1105 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1106
1108 nblcks, nthrds, 0, stream,
1109 (real *)a, (real *)b, (real *)c, *n);
1111 }
1112
1117 void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream) {
1118
1119 const dim3 nthrds(1024, 1, 1);
1120 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1121
1123 nblcks, nthrds, 0, stream,
1124 (real *)a, *c, *n);
1126 }
1127
1132 void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
1133
1134 const dim3 nthrds(1024, 1, 1);
1135 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
1136
1138 nblcks, nthrds, 0, stream,
1139 (real *)a, (real *)b, *c, *n);
1141 }
1142
1143 // ======================================================================== //
1144
1148 void hip_iadd(void *a, int *c, int *n, hipStream_t stream) {
1149
1150 const dim3 nthrds(1024, 1, 1);
1151 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
1152
1154 nblcks, nthrds, 0, stream,
1155 (int *) a, *c, *n);
1157 }
1158
1159} /* extern "C" */
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
const int e
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
const int j
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
#define DEVICE_MPI_MAX
#define DEVICE_MPI_MIN
#define DEVICE_MPI_SUM
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define DEVICE_NCCL_MAX
#define DEVICE_NCCL_MIN
#define DEVICE_NCCL_SUM
void device_nccl_allreduce(void *sbuf_d, void *rbuf_d, int count, int nbytes, int op, void *stream)
#define HIP_CHECK(err)
Definition check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, hipStream_t strm)
Definition math.hip:649
void hip_global_reduce_add(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:706
void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:542
void hip_cwrap(void *a, real *min_val, real *max_val, int *n, hipStream_t strm)
Definition math.hip:288
void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:417
real hip_glmax(void *a, real *ninf, int *n, hipStream_t stream)
Definition math.hip:955
void hip_rzero(void *a, int *n, hipStream_t strm)
Definition math.hip:197
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size, hipStream_t strm)
Definition math.hip:181
void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:1072
void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:512
void hip_absval(void *a, int *n, hipStream_t stream)
Definition math.hip:1011
void hip_masked_gather_copy_aligned(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:94
void hip_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, hipStream_t strm)
Definition math.hip:667
real hip_glsubnorm2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:897
void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:602
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, hipStream_t strm)
Definition math.hip:433
void hip_masked_scatter_copy_aligned(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:147
void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1, real *c2, real *c3, real *c4, int *n, hipStream_t strm)
Definition math.hip:467
void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:382
real hip_glsum(void *a, int *n, hipStream_t stream)
Definition math.hip:926
void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:350
void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:274
real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream)
Definition math.hip:771
void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:246
void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:1117
void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:218
real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:800
void hip_invcol1(void *a, int *n, hipStream_t strm)
Definition math.hip:484
void hip_invcol2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:498
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:61
void hip_global_reduce_max(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:728
void hip_cdiv(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:232
void hip_sub2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:572
real hip_glsc2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:867
real * bufred
Definition math.hip:688
void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2, real *c3, int *n, hipStream_t strm)
Definition math.hip:450
void hip_col2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:527
real hip_glmin(void *a, real *pinf, int *n, hipStream_t stream)
Definition math.hip:983
void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:1102
void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n, hipStream_t strm)
Definition math.hip:633
void hip_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:78
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, hipStream_t stream)
Definition math.hip:830
int red_s
Definition math.hip:687
void hip_cfill(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:303
void hip_masked_atomic_reduction(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:164
void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:335
void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:1043
void hip_add2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:320
void hip_copy(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:53
void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:557
void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:1058
void hip_iadd(void *a, int *c, int *n, hipStream_t stream)
Definition math.hip:1148
void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:1132
void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:366
void hip_radd(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:260
void hip_redbuf_check_alloc(int nb)
Definition math.hip:691
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n, hipStream_t strm)
Definition math.hip:399
void hip_addcol4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:617
void hip_pwmax_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:1029
void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:1087
void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:587
void hip_face_masked_gather_copy(void *a, void *b, void *mask, void *facet, int *n1, int *n2, int *lx, int *ly, int *lz, int *m, hipStream_t strm)
Definition math.hip:110
void hip_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:130
void hip_cmult(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:204
real * bufred_d
Definition math.hip:689
void hip_global_reduce_min(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:749
Object for handling masks in Neko.
Definition mask.f90:34