Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.hip
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include <hip/hip_runtime.h>
37#include <device/hip/check.h>
38#include "math_kernel.h"
39
40extern "C" {
41
44
45#ifdef HAVE_RCCL
48#endif
49
53 void hip_copy(void *a, void *b, int *n, hipStream_t strm) {
54 HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
56 }
57
61 void hip_masked_copy(void *a, void *b, void *mask,
62 int *n, int *m, hipStream_t strm) {
63
64 const dim3 nthrds(1024, 1, 1);
65 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
66
68 nblcks, nthrds, 0, strm, (real *) a,
69 (real *) b, (int *) mask, *n, *m);
70
72
73 }
74
78 void hip_masked_gather_copy(void *a, void *b, void *mask,
79 int *n, int *m, hipStream_t strm) {
80
81 const dim3 nthrds(1024, 1, 1);
82 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
83
85 nblcks, nthrds, 0, strm, (real *) a,
86 (real *) b, (int *) mask, *n, *m);
88
89 }
90
94 void hip_masked_scatter_copy(void *a, void *b, void *mask,
95 int *n, int *m, hipStream_t strm) {
96
97 const dim3 nthrds(1024, 1, 1);
98 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
99
101 nblcks, nthrds, 0, strm, (real *) a,
102 (real *) b, (int *) mask, *n, *m);
103
105
106 }
107
111 void hip_masked_atomic_reduction(void *a, void *b, void *mask,
112 int *n, int *m, hipStream_t strm) {
113
114 const dim3 nthrds(1024, 1, 1);
115 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
116
118 nblcks, nthrds, 0, strm, (real *) a,
119 (real *) b, (int *) mask, *n, *m);
120
122
123 }
124
128 void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size,
130
131 const dim3 nthrds(1024, 1, 1);
132 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
133
135 nblcks, nthrds, 0, strm, (real*)a,
136 *c, *size, (int*)mask, *mask_size);
137
139 }
140
144 void hip_rzero(void *a, int *n, hipStream_t strm) {
145 HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real), strm));
146 }
147
151 void hip_cmult(void *a, real *c, int *n, hipStream_t strm) {
152
153 const dim3 nthrds(1024, 1, 1);
154 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
155
157 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
159
160 }
161
165 void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm) {
166
167 const dim3 nthrds(1024, 1, 1);
168 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
169
171 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
173
174 }
175
179 void hip_cdiv(void *a, real *c, int *n, hipStream_t strm) {
180
181 const dim3 nthrds(1024, 1, 1);
182 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
183
185 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
187
188 }
189
193 void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm) {
194
195 const dim3 nthrds(1024, 1, 1);
196 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
197
199 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
201
202 }
203
207 void hip_radd(void *a, real *c, int *n, hipStream_t strm) {
208
209 const dim3 nthrds(1024, 1, 1);
210 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
211
213 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
215 }
216
221 void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm) {
222
223 const dim3 nthrds(1024, 1, 1);
224 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
225
227 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *c, *n);
229 }
230
234 void hip_cfill(void *a, real *c, int *n, hipStream_t strm) {
235
236 const dim3 nthrds(1024, 1, 1);
237 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
238
239 if (*n > 0) {
241 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
243 }
244
245 }
246
251 void hip_add2(void *a, void *b, int *n, hipStream_t strm) {
252
253 const dim3 nthrds(1024, 1, 1);
254 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
255
257 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
259
260 }
261
266 void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm) {
267
268 const dim3 nthrds(1024, 1, 1);
269 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
270
272 nblcks, nthrds, 0, strm, (real *) a,
273 (real *) b, (real *) c, *n);
275 }
276
281 void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm) {
282
283 const dim3 nthrds(1024, 1, 1);
284 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
285
287 nblcks, nthrds, 0, strm, (real *) a,
288 (real *) b, (real *) c, (real *) d, *n);
290 }
291
297 void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm) {
298
299 const dim3 nthrds(1024, 1, 1);
300 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
301
303 nblcks, nthrds, 0, strm, (real *) a,
304 (real *) b, *c1, *n);
306 }
307
313 void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
314
315 const dim3 nthrds(1024, 1, 1);
316 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
317
319 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
320 *c1, *n);
322 }
323
330 void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n,
332
333 const dim3 nthrds(1024, 1, 1);
334 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
335
337 nblcks, nthrds, 0, strm, (real *) x,
338 (const real **) p, (real *) alpha, *j, *n);
340
341 }
342
348 void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
349
350 const dim3 nthrds(1024, 1, 1);
351 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
352
354 nblcks, nthrds, 0, strm, (real *) a,
355 (real *) b, *c1, *n);
357 }
358
364 void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n,
366
367 const dim3 nthrds(1024, 1, 1);
368 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
369
371 nblcks, nthrds, 0, strm, (real *) a,
372 (real *) b, (real *) c, *c1, *c2, *n);
374 }
375
381 void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2,
382 real *c3, int *n, hipStream_t strm) {
383
384 const dim3 nthrds(1024, 1, 1);
385 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
386
388 nblcks, nthrds, 0, strm, (real *) a,
389 (real *) b, (real *) c, (real *) d, *c1, *c2, *c3, *n);
391 }
392
398 void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1,
399 real *c2, real *c3, real *c4, int *n, hipStream_t strm) {
400
401 const dim3 nthrds(1024, 1, 1);
402 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
403
405 nblcks, nthrds, 0, strm, (real *) a,
406 (real *) b, (real *) c, (real *) d, (real *) e,
407 *c1, *c2, *c3, *c4, *n);
409 }
410
415 void hip_invcol1(void *a, int *n, hipStream_t strm) {
416
417 const dim3 nthrds(1024, 1, 1);
418 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
419
421 nblcks, nthrds, 0, strm, (real *) a, *n);
423 }
424
429 void hip_invcol2(void *a, void *b, int *n, hipStream_t strm) {
430
431 const dim3 nthrds(1024, 1, 1);
432 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
433
435 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
437 }
438
443 void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
444
445 const dim3 nthrds(1024, 1, 1);
446 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
447
449 nblcks, nthrds, 0, strm, (real *) a,
450 (real *) b, (real *) c, *n);
452 }
453
458 void hip_col2(void *a, void *b, int *n, hipStream_t strm) {
459
460 const dim3 nthrds(1024, 1, 1);
461 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
462
464 nblcks, nthrds, 0, strm, (real *) a,
465 (real *) b, *n);
467 }
468
473 void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm) {
474
475 const dim3 nthrds(1024, 1, 1);
476 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
477
479 nblcks, nthrds, 0, strm, (real *) a,
480 (real *) b, (real *) c, *n);
482 }
483
488 void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
489
490 const dim3 nthrds(1024, 1, 1);
491 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
492
494 nblcks, nthrds, 0, strm, (real *) a,
495 (real *) b, (real *) c, *n);
497 }
498
503 void hip_sub2(void *a, void *b, int *n, hipStream_t strm) {
504
505 const dim3 nthrds(1024, 1, 1);
506 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
507
509 nblcks, nthrds, 0, strm, (real *) a,
510 (real *) b, *n);
512 }
513
518 void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm) {
519
520 const dim3 nthrds(1024, 1, 1);
521 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
522
524 nblcks, nthrds, 0, strm, (real *) a,
525 (real *) b, (real *) c, *n);
527 }
528
533 void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
534
535 const dim3 nthrds(1024, 1, 1);
536 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
537
539 nblcks, nthrds, 0, strm, (real *) a,
540 (real *) b, (real *) c, *n);
542 }
543
548 void hip_addcol4(void *a, void *b, void *c, void *d, int *n,
550
551 const dim3 nthrds(1024, 1, 1);
552 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
553
555 nblcks, nthrds, 0, strm, (real *) a,
556 (real *) b, (real *) c, (real *) d, *n);
558 }
559
564 void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n,
566
567 const dim3 nthrds(1024, 1, 1);
568 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
569
571 nblcks, nthrds, 0, strm, (real *) a,
572 (real *) b, (real *) c, *s, *n);
574 }
575
580 void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
581 void *v1, void *v2, void *v3, int *n,
583
584 const dim3 nthrds(1024, 1, 1);
585 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
586
588 nblcks, nthrds, 0, strm,
589 (real *) dot, (real *) u1, (real *) u2, (real *) u3,
590 (real *) v1, (real *) v2, (real *) v3, *n);
592 }
593
598 void hip_vcross(void *u1, void *u2, void *u3,
599 void *v1, void *v2, void *v3,
600 void *w1, void *w2, void *w3,
601 int *n, hipStream_t strm) {
602
603 const dim3 nthrds(1024, 1, 1);
604 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
605
607 nblcks, nthrds, 0, strm,
608 (real *) u1, (real *) u2, (real *) u3,
609 (real *) v1, (real *) v2, (real *) v3,
610 (real *) w1, (real *) w2, (real *) w3, *n);
612 }
613
614
615 /*
616 * Reduction buffer
617 */
618 int red_s = 0;
621
623 if ( nb >= red_s) {
624 red_s = nb+1;
625 if (bufred != NULL) {
628 }
631 }
632 }
633
638 const hipStream_t stream) {
639 #ifdef HAVE_RCCL
641 DEVICE_NCCL_SUM, stream);
643 hipMemcpyDeviceToHost, stream));
645 #elif HAVE_DEVICE_MPI
648 #else
650 hipMemcpyDeviceToHost, stream));
652 #endif
653 }
654
659 real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream) {
660
661 const dim3 nthrds(1024, 1, 1);
662 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
663 const int nb = ((*n) + 1024 - 1)/ 1024;
664
666
667
669 nblcks, nthrds, 0, stream,
670 (real *) u, (real *) v,
671 (real *) w, bufred_d, *n);
674 1, 1024, 0, stream, bufred_d, nb);
676
678 hipMemcpyDeviceToHost, stream));
680
681 return bufred[0];
682 }
683
688 real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream) {
689
690 const dim3 nthrds(1024, 1, 1);
691 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
692 const int nb = ((*n) + 1024 - 1)/ 1024;
693
695
696 if (*n > 0) {
698 nblcks, nthrds, 0, stream,
699 (real *) a, (real *) b,
700 (real *) c, bufred_d, *n);
703 1, 1024, 0, stream, bufred_d, nb);
705 }
706 else {
707 hip_rzero(bufred_d, &red_s, stream);
708 }
710
711 return bufred[0];
712 }
713
718 void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n,
719 hipStream_t stream){
720 int pow2 = 1;
721 while(pow2 < (*j)){
722 pow2 = 2*pow2;
723 }
724 const int nt = 1024/pow2;
725 const dim3 nthrds(pow2, nt, 1);
726 const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
727 const dim3 nthrds_red(1024,1,1);
728 const dim3 nblcks_red( (*j),1,1);
729 const int nb = ((*n) + nt - 1)/nt;
730
732
733 if (*n > 0) {
735 nblcks, nthrds, 0, stream,
736 (const real *) w, (const real **) v,
737 (const real *)mult, bufred_d, *j, *n);
739
741 nblcks_red, nthrds_red, 0, stream,
742 bufred_d, nb, *j);
744 }
745 else {
746 hip_rzero(bufred_d, &red_s, stream);
747 }
748 hip_global_reduce_add(h, bufred_d, (*j), stream);
749 }
750
755 real hip_glsc2(void *a, void *b, int *n, hipStream_t stream) {
756
757 const dim3 nthrds(1024, 1, 1);
758 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
759 const int nb = ((*n) + 1024 - 1)/ 1024;
760
761
763
764 if( *n > 0) {
766 nblcks, nthrds, 0, stream,
767 (real *) a, (real *) b, bufred_d, *n);
770 1, 1024, 0, stream, bufred_d, nb);
772 }
773 else {
774 hip_rzero(bufred_d, &red_s, stream);
775 }
777
778 return bufred[0];
779 }
780
785 real hip_glsubnorm2(void* a, void* b, int* n, hipStream_t stream) {
786
787 const dim3 nthrds(1024, 1, 1);
788 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
789 const int nb = ((*n) + 1024 - 1) / 1024;
790
792
793 if (*n > 0) {
795 nblcks, nthrds, 0, stream,
796 (real*)a, (real*)b, bufred_d, *n);
799 1, 1024, 0, stream, bufred_d, nb);
801 }
802 else {
803 hip_rzero(bufred_d, &red_s, stream);
804 }
806
807 return bufred[0];
808 }
809
814 real hip_glsum(void *a, int *n, hipStream_t stream) {
815 const dim3 nthrds(1024, 1, 1);
816 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
817 const int nb = ((*n) + 1024 - 1)/ 1024;
818
820 if( *n > 0) {
822 nblcks, nthrds, 0, stream,
823 (real *) a, bufred_d, *n);
826 1, 1024, 0, stream, bufred_d, nb);
828 }
829 else {
830 hip_rzero(bufred_d, &red_s, stream);
831 }
832
834
835 return bufred[0];
836 }
837
842 void hip_absval(void *a, int *n, hipStream_t stream) {
843
844 const dim3 nthrds(1024, 1, 1);
845 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
846
848 nblcks, nthrds, 0, stream, (real *) a, *n);
850
851}
852
853 // ======================================================================== //
854 // Point-wise operations.
855
860 void hip_pwmax_vec2(void* a, void* b, int* n, hipStream_t stream) {
861
862 const dim3 nthrds(1024, 1, 1);
863 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
864
866 nblcks, nthrds, 0, stream, (real*)a, (real*)b, *n);
868}
869
874 void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
875
876 const dim3 nthrds(1024, 1, 1);
877 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
878
880 nblcks, nthrds, 0, stream,
881 (real *)a, (real *)b, (real *)c, *n);
883 }
884
889 void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream) {
890
891 const dim3 nthrds(1024, 1, 1);
892 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
893
895 nblcks, nthrds, 0, stream, (real *)a, *c, *n);
897 }
898
903 void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
904
905 const dim3 nthrds(1024, 1, 1);
906 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
907
909 nblcks, nthrds, 0, stream,
910 (real *)a, (real *)b, *c, *n);
912 }
913
918 void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream) {
919
920 const dim3 nthrds(1024, 1, 1);
921 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
922
924 nblcks, nthrds, 0, stream,
925 (real *)a, (real *)b, *n);
927 }
928
933 void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
934
935 const dim3 nthrds(1024, 1, 1);
936 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
937
939 nblcks, nthrds, 0, stream,
940 (real *)a, (real *)b, (real *)c, *n);
942 }
943
948 void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream) {
949
950 const dim3 nthrds(1024, 1, 1);
951 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
952
954 nblcks, nthrds, 0, stream,
955 (real *)a, *c, *n);
957 }
958
963 void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
964
965 const dim3 nthrds(1024, 1, 1);
966 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
967
969 nblcks, nthrds, 0, stream,
970 (real *)a, (real *)b, *c, *n);
972 }
973
974 // ======================================================================== //
975
979 void hip_iadd(void *a, int *c, int *n, hipStream_t stream) {
980
981 const dim3 nthrds(1024, 1, 1);
982 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
983
985 nblcks, nthrds, 0, stream,
986 (int *) a, *c, *n);
988 }
989
990} /* extern "C" */
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
const int e
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
const int j
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
#define DEVICE_MPI_SUM
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define DEVICE_NCCL_SUM
void device_nccl_allreduce(void *sbuf_d, void *rbuf_d, int count, int nbytes, int op, void *stream)
#define HIP_CHECK(err)
Definition check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, hipStream_t strm)
Definition math.hip:580
void hip_global_reduce_add(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:637
void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:473
void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:348
void hip_rzero(void *a, int *n, hipStream_t strm)
Definition math.hip:144
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size, hipStream_t strm)
Definition math.hip:128
void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:903
void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:443
void hip_absval(void *a, int *n, hipStream_t stream)
Definition math.hip:842
void hip_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, hipStream_t strm)
Definition math.hip:598
real hip_glsubnorm2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:785
void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:533
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, hipStream_t strm)
Definition math.hip:364
void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1, real *c2, real *c3, real *c4, int *n, hipStream_t strm)
Definition math.hip:398
void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:313
real hip_glsum(void *a, int *n, hipStream_t stream)
Definition math.hip:814
void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:281
void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:221
real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream)
Definition math.hip:659
void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:193
void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:948
void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:165
real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:688
void hip_invcol1(void *a, int *n, hipStream_t strm)
Definition math.hip:415
void hip_invcol2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:429
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:61
void hip_cdiv(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:179
void hip_sub2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:503
real hip_glsc2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:755
real * bufred
Definition math.hip:619
void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2, real *c3, int *n, hipStream_t strm)
Definition math.hip:381
void hip_col2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:458
void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:933
void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n, hipStream_t strm)
Definition math.hip:564
void hip_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:78
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, hipStream_t stream)
Definition math.hip:718
int red_s
Definition math.hip:618
void hip_cfill(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:234
void hip_masked_atomic_reduction(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:111
void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:266
void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:874
void hip_add2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:251
void hip_copy(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:53
void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:488
void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:889
void hip_iadd(void *a, int *c, int *n, hipStream_t stream)
Definition math.hip:979
void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:963
void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:297
void hip_radd(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:207
void hip_redbuf_check_alloc(int nb)
Definition math.hip:622
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n, hipStream_t strm)
Definition math.hip:330
void hip_addcol4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:548
void hip_pwmax_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:860
void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:918
void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:518
void hip_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:94
void hip_cmult(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:151
real * bufred_d
Definition math.hip:620
Object for handling masks in Neko.
Definition mask.f90:34