Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.hip
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include <hip/hip_runtime.h>
37#include <device/hip/check.h>
38#include "math_kernel.h"
39
40extern "C" {
41
44
45#ifdef HAVE_RCCL
48#endif
49
53 void hip_copy(void *a, void *b, int *n, hipStream_t strm) {
54 HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
56 }
57
61 void hip_masked_copy(void *a, void *b, void *mask,
62 int *n, int *m, hipStream_t strm) {
63
64 const dim3 nthrds(1024, 1, 1);
65 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
66
68 nblcks, nthrds, 0, strm, (real *) a,
69 (real *) b, (int *) mask, *n, *m);
70
72
73 }
74
78 void hip_masked_gather_copy(void *a, void *b, void *mask,
79 int *n, int *m, hipStream_t strm) {
80
81 const dim3 nthrds(1024, 1, 1);
82 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
83
85 nblcks, nthrds, 0, strm, (real *) a,
86 (real *) b, (int *) mask, *n, *m);
88
89 }
90
94 void hip_masked_gather_copy_aligned(void *a, void *b, void *mask,
95 int *n, int *m, hipStream_t strm) {
96
97 const dim3 nthrds(1024, 1, 1);
98 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
99
101 nblcks, nthrds, 0, strm, (real *) a,
102 (real *) b, (int *) mask, *n, *m);
104
105 }
106
107
111 void hip_masked_scatter_copy(void *a, void *b, void *mask,
112 int *n, int *m, hipStream_t strm) {
113
114 const dim3 nthrds(1024, 1, 1);
115 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
116
118 nblcks, nthrds, 0, strm, (real *) a,
119 (real *) b, (int *) mask, *n, *m);
120
122
123 }
124
128 void hip_masked_atomic_reduction(void *a, void *b, void *mask,
129 int *n, int *m, hipStream_t strm) {
130
131 const dim3 nthrds(1024, 1, 1);
132 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
133
135 nblcks, nthrds, 0, strm, (real *) a,
136 (real *) b, (int *) mask, *n, *m);
137
139
140 }
141
145 void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size,
147
148 const dim3 nthrds(1024, 1, 1);
149 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
150
152 nblcks, nthrds, 0, strm, (real*)a,
153 *c, *size, (int*)mask, *mask_size);
154
156 }
157
161 void hip_rzero(void *a, int *n, hipStream_t strm) {
162 HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real), strm));
163 }
164
168 void hip_cmult(void *a, real *c, int *n, hipStream_t strm) {
169
170 const dim3 nthrds(1024, 1, 1);
171 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
172
174 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
176
177 }
178
182 void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm) {
183
184 const dim3 nthrds(1024, 1, 1);
185 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
186
188 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
190
191 }
192
196 void hip_cdiv(void *a, real *c, int *n, hipStream_t strm) {
197
198 const dim3 nthrds(1024, 1, 1);
199 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
200
202 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
204
205 }
206
210 void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm) {
211
212 const dim3 nthrds(1024, 1, 1);
213 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
214
216 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
218
219 }
220
224 void hip_radd(void *a, real *c, int *n, hipStream_t strm) {
225
226 const dim3 nthrds(1024, 1, 1);
227 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
228
230 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
232 }
233
238 void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm) {
239
240 const dim3 nthrds(1024, 1, 1);
241 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
242
244 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *c, *n);
246 }
247
251 void hip_cfill(void *a, real *c, int *n, hipStream_t strm) {
252
253 const dim3 nthrds(1024, 1, 1);
254 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
255
256 if (*n > 0) {
258 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
260 }
261
262 }
263
268 void hip_add2(void *a, void *b, int *n, hipStream_t strm) {
269
270 const dim3 nthrds(1024, 1, 1);
271 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
272
274 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
276
277 }
278
283 void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm) {
284
285 const dim3 nthrds(1024, 1, 1);
286 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
287
289 nblcks, nthrds, 0, strm, (real *) a,
290 (real *) b, (real *) c, *n);
292 }
293
298 void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm) {
299
300 const dim3 nthrds(1024, 1, 1);
301 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
302
304 nblcks, nthrds, 0, strm, (real *) a,
305 (real *) b, (real *) c, (real *) d, *n);
307 }
308
314 void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm) {
315
316 const dim3 nthrds(1024, 1, 1);
317 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
318
320 nblcks, nthrds, 0, strm, (real *) a,
321 (real *) b, *c1, *n);
323 }
324
330 void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
331
332 const dim3 nthrds(1024, 1, 1);
333 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
334
336 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
337 *c1, *n);
339 }
340
347 void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n,
349
350 const dim3 nthrds(1024, 1, 1);
351 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
352
354 nblcks, nthrds, 0, strm, (real *) x,
355 (const real **) p, (real *) alpha, *j, *n);
357
358 }
359
365 void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
366
367 const dim3 nthrds(1024, 1, 1);
368 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
369
371 nblcks, nthrds, 0, strm, (real *) a,
372 (real *) b, *c1, *n);
374 }
375
381 void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n,
383
384 const dim3 nthrds(1024, 1, 1);
385 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
386
388 nblcks, nthrds, 0, strm, (real *) a,
389 (real *) b, (real *) c, *c1, *c2, *n);
391 }
392
398 void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2,
399 real *c3, int *n, hipStream_t strm) {
400
401 const dim3 nthrds(1024, 1, 1);
402 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
403
405 nblcks, nthrds, 0, strm, (real *) a,
406 (real *) b, (real *) c, (real *) d, *c1, *c2, *c3, *n);
408 }
409
415 void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1,
416 real *c2, real *c3, real *c4, int *n, hipStream_t strm) {
417
418 const dim3 nthrds(1024, 1, 1);
419 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
420
422 nblcks, nthrds, 0, strm, (real *) a,
423 (real *) b, (real *) c, (real *) d, (real *) e,
424 *c1, *c2, *c3, *c4, *n);
426 }
427
432 void hip_invcol1(void *a, int *n, hipStream_t strm) {
433
434 const dim3 nthrds(1024, 1, 1);
435 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
436
438 nblcks, nthrds, 0, strm, (real *) a, *n);
440 }
441
446 void hip_invcol2(void *a, void *b, int *n, hipStream_t strm) {
447
448 const dim3 nthrds(1024, 1, 1);
449 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
450
452 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
454 }
455
460 void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
461
462 const dim3 nthrds(1024, 1, 1);
463 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
464
466 nblcks, nthrds, 0, strm, (real *) a,
467 (real *) b, (real *) c, *n);
469 }
470
475 void hip_col2(void *a, void *b, int *n, hipStream_t strm) {
476
477 const dim3 nthrds(1024, 1, 1);
478 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
479
481 nblcks, nthrds, 0, strm, (real *) a,
482 (real *) b, *n);
484 }
485
490 void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm) {
491
492 const dim3 nthrds(1024, 1, 1);
493 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
494
496 nblcks, nthrds, 0, strm, (real *) a,
497 (real *) b, (real *) c, *n);
499 }
500
505 void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
506
507 const dim3 nthrds(1024, 1, 1);
508 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
509
511 nblcks, nthrds, 0, strm, (real *) a,
512 (real *) b, (real *) c, *n);
514 }
515
520 void hip_sub2(void *a, void *b, int *n, hipStream_t strm) {
521
522 const dim3 nthrds(1024, 1, 1);
523 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
524
526 nblcks, nthrds, 0, strm, (real *) a,
527 (real *) b, *n);
529 }
530
535 void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm) {
536
537 const dim3 nthrds(1024, 1, 1);
538 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
539
541 nblcks, nthrds, 0, strm, (real *) a,
542 (real *) b, (real *) c, *n);
544 }
545
550 void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
551
552 const dim3 nthrds(1024, 1, 1);
553 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
554
556 nblcks, nthrds, 0, strm, (real *) a,
557 (real *) b, (real *) c, *n);
559 }
560
565 void hip_addcol4(void *a, void *b, void *c, void *d, int *n,
567
568 const dim3 nthrds(1024, 1, 1);
569 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
570
572 nblcks, nthrds, 0, strm, (real *) a,
573 (real *) b, (real *) c, (real *) d, *n);
575 }
576
581 void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n,
583
584 const dim3 nthrds(1024, 1, 1);
585 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
586
588 nblcks, nthrds, 0, strm, (real *) a,
589 (real *) b, (real *) c, *s, *n);
591 }
592
597 void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
598 void *v1, void *v2, void *v3, int *n,
600
601 const dim3 nthrds(1024, 1, 1);
602 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
603
605 nblcks, nthrds, 0, strm,
606 (real *) dot, (real *) u1, (real *) u2, (real *) u3,
607 (real *) v1, (real *) v2, (real *) v3, *n);
609 }
610
615 void hip_vcross(void *u1, void *u2, void *u3,
616 void *v1, void *v2, void *v3,
617 void *w1, void *w2, void *w3,
618 int *n, hipStream_t strm) {
619
620 const dim3 nthrds(1024, 1, 1);
621 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
622
624 nblcks, nthrds, 0, strm,
625 (real *) u1, (real *) u2, (real *) u3,
626 (real *) v1, (real *) v2, (real *) v3,
627 (real *) w1, (real *) w2, (real *) w3, *n);
629 }
630
631
632 /*
633 * Reduction buffer
634 */
635 int red_s = 0;
638
640 if ( nb >= red_s) {
641 red_s = nb+1;
642 if (bufred != NULL) {
645 }
648 }
649 }
650
655 const hipStream_t stream) {
656 #ifdef HAVE_RCCL
658 DEVICE_NCCL_SUM, stream);
660 hipMemcpyDeviceToHost, stream));
662 #elif HAVE_DEVICE_MPI
665 #else
667 hipMemcpyDeviceToHost, stream));
669 #endif
670 }
671
676 real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream) {
677
678 const dim3 nthrds(1024, 1, 1);
679 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
680 const int nb = ((*n) + 1024 - 1)/ 1024;
681
683
684
686 nblcks, nthrds, 0, stream,
687 (real *) u, (real *) v,
688 (real *) w, bufred_d, *n);
691 1, 1024, 0, stream, bufred_d, nb);
693
695 hipMemcpyDeviceToHost, stream));
697
698 return bufred[0];
699 }
700
705 real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream) {
706
707 const dim3 nthrds(1024, 1, 1);
708 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
709 const int nb = ((*n) + 1024 - 1)/ 1024;
710
712
713 if (*n > 0) {
715 nblcks, nthrds, 0, stream,
716 (real *) a, (real *) b,
717 (real *) c, bufred_d, *n);
720 1, 1024, 0, stream, bufred_d, nb);
722 }
723 else {
724 hip_rzero(bufred_d, &red_s, stream);
725 }
727
728 return bufred[0];
729 }
730
735 void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n,
736 hipStream_t stream){
737 int pow2 = 1;
738 while(pow2 < (*j)){
739 pow2 = 2*pow2;
740 }
741 const int nt = 1024/pow2;
742 const dim3 nthrds(pow2, nt, 1);
743 const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
744 const dim3 nthrds_red(1024,1,1);
745 const dim3 nblcks_red( (*j),1,1);
746 const int nb = ((*n) + nt - 1)/nt;
747
749
750 if (*n > 0) {
752 nblcks, nthrds, 0, stream,
753 (const real *) w, (const real **) v,
754 (const real *)mult, bufred_d, *j, *n);
756
758 nblcks_red, nthrds_red, 0, stream,
759 bufred_d, nb, *j);
761 }
762 else {
763 hip_rzero(bufred_d, &red_s, stream);
764 }
765 hip_global_reduce_add(h, bufred_d, (*j), stream);
766 }
767
772 real hip_glsc2(void *a, void *b, int *n, hipStream_t stream) {
773
774 const dim3 nthrds(1024, 1, 1);
775 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
776 const int nb = ((*n) + 1024 - 1)/ 1024;
777
778
780
781 if( *n > 0) {
783 nblcks, nthrds, 0, stream,
784 (real *) a, (real *) b, bufred_d, *n);
787 1, 1024, 0, stream, bufred_d, nb);
789 }
790 else {
791 hip_rzero(bufred_d, &red_s, stream);
792 }
794
795 return bufred[0];
796 }
797
802 real hip_glsubnorm2(void* a, void* b, int* n, hipStream_t stream) {
803
804 const dim3 nthrds(1024, 1, 1);
805 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
806 const int nb = ((*n) + 1024 - 1) / 1024;
807
809
810 if (*n > 0) {
812 nblcks, nthrds, 0, stream,
813 (real*)a, (real*)b, bufred_d, *n);
816 1, 1024, 0, stream, bufred_d, nb);
818 }
819 else {
820 hip_rzero(bufred_d, &red_s, stream);
821 }
823
824 return bufred[0];
825 }
826
831 real hip_glsum(void *a, int *n, hipStream_t stream) {
832 const dim3 nthrds(1024, 1, 1);
833 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
834 const int nb = ((*n) + 1024 - 1)/ 1024;
835
837 if( *n > 0) {
839 nblcks, nthrds, 0, stream,
840 (real *) a, bufred_d, *n);
843 1, 1024, 0, stream, bufred_d, nb);
845 }
846 else {
847 hip_rzero(bufred_d, &red_s, stream);
848 }
849
851
852 return bufred[0];
853 }
854
859 void hip_absval(void *a, int *n, hipStream_t stream) {
860
861 const dim3 nthrds(1024, 1, 1);
862 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
863
865 nblcks, nthrds, 0, stream, (real *) a, *n);
867
868}
869
870 // ======================================================================== //
871 // Point-wise operations.
872
877 void hip_pwmax_vec2(void* a, void* b, int* n, hipStream_t stream) {
878
879 const dim3 nthrds(1024, 1, 1);
880 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
881
883 nblcks, nthrds, 0, stream, (real*)a, (real*)b, *n);
885}
886
891 void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
892
893 const dim3 nthrds(1024, 1, 1);
894 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
895
897 nblcks, nthrds, 0, stream,
898 (real *)a, (real *)b, (real *)c, *n);
900 }
901
906 void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream) {
907
908 const dim3 nthrds(1024, 1, 1);
909 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
910
912 nblcks, nthrds, 0, stream, (real *)a, *c, *n);
914 }
915
920 void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
921
922 const dim3 nthrds(1024, 1, 1);
923 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
924
926 nblcks, nthrds, 0, stream,
927 (real *)a, (real *)b, *c, *n);
929 }
930
935 void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream) {
936
937 const dim3 nthrds(1024, 1, 1);
938 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
939
941 nblcks, nthrds, 0, stream,
942 (real *)a, (real *)b, *n);
944 }
945
950 void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
951
952 const dim3 nthrds(1024, 1, 1);
953 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
954
956 nblcks, nthrds, 0, stream,
957 (real *)a, (real *)b, (real *)c, *n);
959 }
960
965 void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream) {
966
967 const dim3 nthrds(1024, 1, 1);
968 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
969
971 nblcks, nthrds, 0, stream,
972 (real *)a, *c, *n);
974 }
975
980 void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
981
982 const dim3 nthrds(1024, 1, 1);
983 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
984
986 nblcks, nthrds, 0, stream,
987 (real *)a, (real *)b, *c, *n);
989 }
990
991 // ======================================================================== //
992
996 void hip_iadd(void *a, int *c, int *n, hipStream_t stream) {
997
998 const dim3 nthrds(1024, 1, 1);
999 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
1000
1002 nblcks, nthrds, 0, stream,
1003 (int *) a, *c, *n);
1005 }
1006
1007} /* extern "C" */
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
const int e
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
const int j
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
#define DEVICE_MPI_SUM
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define DEVICE_NCCL_SUM
void device_nccl_allreduce(void *sbuf_d, void *rbuf_d, int count, int nbytes, int op, void *stream)
#define HIP_CHECK(err)
Definition check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, hipStream_t strm)
Definition math.hip:597
void hip_global_reduce_add(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:654
void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:490
void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:365
void hip_rzero(void *a, int *n, hipStream_t strm)
Definition math.hip:161
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size, hipStream_t strm)
Definition math.hip:145
void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:920
void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:460
void hip_absval(void *a, int *n, hipStream_t stream)
Definition math.hip:859
void hip_masked_gather_copy_aligned(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:94
void hip_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, hipStream_t strm)
Definition math.hip:615
real hip_glsubnorm2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:802
void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:550
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, hipStream_t strm)
Definition math.hip:381
void hip_add5s4(void *a, void *b, void *c, void *d, void *e, real *c1, real *c2, real *c3, real *c4, int *n, hipStream_t strm)
Definition math.hip:415
void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:330
real hip_glsum(void *a, int *n, hipStream_t stream)
Definition math.hip:831
void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:298
void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:238
real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream)
Definition math.hip:676
void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:210
void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:965
void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:182
real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:705
void hip_invcol1(void *a, int *n, hipStream_t strm)
Definition math.hip:432
void hip_invcol2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:446
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:61
void hip_cdiv(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:196
void hip_sub2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:520
real hip_glsc2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:772
real * bufred
Definition math.hip:636
void hip_add4s3(void *a, void *b, void *c, void *d, real *c1, real *c2, real *c3, int *n, hipStream_t strm)
Definition math.hip:398
void hip_col2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:475
void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:950
void hip_addcol3s2(void *a, void *b, void *c, real *s, int *n, hipStream_t strm)
Definition math.hip:581
void hip_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:78
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, hipStream_t stream)
Definition math.hip:735
int red_s
Definition math.hip:635
void hip_cfill(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:251
void hip_masked_atomic_reduction(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:128
void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:283
void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:891
void hip_add2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:268
void hip_copy(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:53
void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:505
void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:906
void hip_iadd(void *a, int *c, int *n, hipStream_t stream)
Definition math.hip:996
void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:980
void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:314
void hip_radd(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:224
void hip_redbuf_check_alloc(int nb)
Definition math.hip:639
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n, hipStream_t strm)
Definition math.hip:347
void hip_addcol4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:565
void hip_pwmax_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:877
void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:935
void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:535
void hip_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:111
void hip_cmult(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:168
real * bufred_d
Definition math.hip:637
Object for handling masks in Neko.
Definition mask.f90:34