Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.hip
Go to the documentation of this file.
1/*
2 Copyright (c) 2021-2025, The Neko Authors
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
16
17 * Neither the name of the authors nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include <hip/hip_runtime.h>
37#include <device/hip/check.h>
38#include "math_kernel.h"
39
40extern "C" {
41
44
45#ifdef HAVE_RCCL
48#endif
49
53 void hip_copy(void *a, void *b, int *n, hipStream_t strm) {
54 HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
56 }
57
61 void hip_masked_copy(void *a, void *b, void *mask,
62 int *n, int *m, hipStream_t strm) {
63
64 const dim3 nthrds(1024, 1, 1);
65 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
66
68 nblcks, nthrds, 0, strm, (real *) a,
69 (real *) b, (int *) mask, *n, *m);
70
72
73 }
74
78 void hip_masked_gather_copy(void *a, void *b, void *mask,
79 int *n, int *m, hipStream_t strm) {
80
81 const dim3 nthrds(1024, 1, 1);
82 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
83
85 nblcks, nthrds, 0, strm, (real *) a,
86 (real *) b, (int *) mask, *n, *m);
88
89 }
90
94 void hip_masked_scatter_copy(void *a, void *b, void *mask,
95 int *n, int *m, hipStream_t strm) {
96
97 const dim3 nthrds(1024, 1, 1);
98 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
99
101 nblcks, nthrds, 0, strm, (real *) a,
102 (real *) b, (int *) mask, *n, *m);
103
105
106 }
107
111 void hip_masked_atomic_reduction(void *a, void *b, void *mask,
112 int *n, int *m, hipStream_t strm) {
113
114 const dim3 nthrds(1024, 1, 1);
115 const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
116
118 nblcks, nthrds, 0, strm, (real *) a,
119 (real *) b, (int *) mask, *n, *m);
120
122
123 }
124
128 void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size,
130
131 const dim3 nthrds(1024, 1, 1);
132 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
133
135 nblcks, nthrds, 0, strm, (real*)a,
136 *c, *size, (int*)mask, *mask_size);
137
139 }
140
144 void hip_rzero(void *a, int *n, hipStream_t strm) {
145 HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real), strm));
146 }
147
151 void hip_cmult(void *a, real *c, int *n, hipStream_t strm) {
152
153 const dim3 nthrds(1024, 1, 1);
154 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
155
157 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
159
160 }
161
165 void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm) {
166
167 const dim3 nthrds(1024, 1, 1);
168 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
169
171 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
173
174 }
175
179 void hip_cdiv(void *a, real *c, int *n, hipStream_t strm) {
180
181 const dim3 nthrds(1024, 1, 1);
182 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
183
185 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
187
188 }
189
193 void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm) {
194
195 const dim3 nthrds(1024, 1, 1);
196 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
197
199 nblcks, nthrds, 0, strm, (real *) a,(real *) b, *c, *n);
201
202 }
203
207 void hip_radd(void *a, real *c, int *n, hipStream_t strm) {
208
209 const dim3 nthrds(1024, 1, 1);
210 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
211
213 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
215 }
216
221 void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm) {
222
223 const dim3 nthrds(1024, 1, 1);
224 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
225
227 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *c, *n);
229 }
230
234 void hip_cfill(void *a, real *c, int *n, hipStream_t strm) {
235
236 const dim3 nthrds(1024, 1, 1);
237 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
238
239 if (*n > 0) {
241 nblcks, nthrds, 0, strm, (real *) a, *c, *n);
243 }
244
245 }
246
251 void hip_add2(void *a, void *b, int *n, hipStream_t strm) {
252
253 const dim3 nthrds(1024, 1, 1);
254 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
255
257 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
259
260 }
261
266 void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm) {
267
268 const dim3 nthrds(1024, 1, 1);
269 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
270
272 nblcks, nthrds, 0, strm, (real *) a,
273 (real *) b, (real *) c, *n);
275 }
276
281 void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm) {
282
283 const dim3 nthrds(1024, 1, 1);
284 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
285
287 nblcks, nthrds, 0, strm, (real *) a,
288 (real *) b, (real *) c, (real *) d, *n);
290 }
291
297 void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm) {
298
299 const dim3 nthrds(1024, 1, 1);
300 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
301
303 nblcks, nthrds, 0, strm, (real *) a,
304 (real *) b, *c1, *n);
306 }
307
313 void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
314
315 const dim3 nthrds(1024, 1, 1);
316 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
317
319 nblcks, nthrds, 0, strm, (real *) a, (real *) b,
320 *c1, *n);
322 }
323
330 void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n,
332
333 const dim3 nthrds(1024, 1, 1);
334 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
335
337 nblcks, nthrds, 0, strm, (real *) x,
338 (const real **) p, (real *) alpha, *j, *n);
340
341 }
342
348 void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm) {
349
350 const dim3 nthrds(1024, 1, 1);
351 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
352
354 nblcks, nthrds, 0, strm, (real *) a,
355 (real *) b, *c1, *n);
357 }
358
364 void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n,
366
367 const dim3 nthrds(1024, 1, 1);
368 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
369
371 nblcks, nthrds, 0, strm, (real *) a,
372 (real *) b, (real *) c, *c1, *c2, *n);
374 }
375
380 void hip_invcol1(void *a, int *n, hipStream_t strm) {
381
382 const dim3 nthrds(1024, 1, 1);
383 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
384
386 nblcks, nthrds, 0, strm, (real *) a, *n);
388 }
389
394 void hip_invcol2(void *a, void *b, int *n, hipStream_t strm) {
395
396 const dim3 nthrds(1024, 1, 1);
397 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
398
400 nblcks, nthrds, 0, strm, (real *) a, (real *) b, *n);
402 }
403
408 void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
409
410 const dim3 nthrds(1024, 1, 1);
411 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
412
414 nblcks, nthrds, 0, strm, (real *) a,
415 (real *) b, (real *) c, *n);
417 }
418
423 void hip_col2(void *a, void *b, int *n, hipStream_t strm) {
424
425 const dim3 nthrds(1024, 1, 1);
426 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
427
429 nblcks, nthrds, 0, strm, (real *) a,
430 (real *) b, *n);
432 }
433
438 void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm) {
439
440 const dim3 nthrds(1024, 1, 1);
441 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
442
444 nblcks, nthrds, 0, strm, (real *) a,
445 (real *) b, (real *) c, *n);
447 }
448
453 void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
454
455 const dim3 nthrds(1024, 1, 1);
456 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
457
459 nblcks, nthrds, 0, strm, (real *) a,
460 (real *) b, (real *) c, *n);
462 }
463
468 void hip_sub2(void *a, void *b, int *n, hipStream_t strm) {
469
470 const dim3 nthrds(1024, 1, 1);
471 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
472
474 nblcks, nthrds, 0, strm, (real *) a,
475 (real *) b, *n);
477 }
478
483 void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm) {
484
485 const dim3 nthrds(1024, 1, 1);
486 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
487
489 nblcks, nthrds, 0, strm, (real *) a,
490 (real *) b, (real *) c, *n);
492 }
493
498 void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm) {
499
500 const dim3 nthrds(1024, 1, 1);
501 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
502
504 nblcks, nthrds, 0, strm, (real *) a,
505 (real *) b, (real *) c, *n);
507 }
508
513 void hip_addcol4(void *a, void *b, void *c, void *d, int *n,
515
516 const dim3 nthrds(1024, 1, 1);
517 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
518
520 nblcks, nthrds, 0, strm, (real *) a,
521 (real *) b, (real *) c, (real *) d, *n);
523 }
524
529 void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
530 void *v1, void *v2, void *v3, int *n,
532
533 const dim3 nthrds(1024, 1, 1);
534 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
535
537 nblcks, nthrds, 0, strm,
538 (real *) dot, (real *) u1, (real *) u2, (real *) u3,
539 (real *) v1, (real *) v2, (real *) v3, *n);
541 }
542
547 void hip_vcross(void *u1, void *u2, void *u3,
548 void *v1, void *v2, void *v3,
549 void *w1, void *w2, void *w3,
550 int *n, hipStream_t strm) {
551
552 const dim3 nthrds(1024, 1, 1);
553 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
554
556 nblcks, nthrds, 0, strm,
557 (real *) u1, (real *) u2, (real *) u3,
558 (real *) v1, (real *) v2, (real *) v3,
559 (real *) w1, (real *) w2, (real *) w3, *n);
561 }
562
563
564 /*
565 * Reduction buffer
566 */
567 int red_s = 0;
570
572 if ( nb >= red_s) {
573 red_s = nb+1;
574 if (bufred != NULL) {
577 }
580 }
581 }
582
587 const hipStream_t stream) {
588 #ifdef HAVE_RCCL
590 DEVICE_NCCL_SUM, stream);
592 hipMemcpyDeviceToHost, stream));
594 #elif HAVE_DEVICE_MPI
597 #else
599 hipMemcpyDeviceToHost, stream));
601 #endif
602 }
603
608 real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream) {
609
610 const dim3 nthrds(1024, 1, 1);
611 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
612 const int nb = ((*n) + 1024 - 1)/ 1024;
613
615
616
618 nblcks, nthrds, 0, stream,
619 (real *) u, (real *) v,
620 (real *) w, bufred_d, *n);
623 1, 1024, 0, stream, bufred_d, nb);
625
627 hipMemcpyDeviceToHost, stream));
629
630 return bufred[0];
631 }
632
637 real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream) {
638
639 const dim3 nthrds(1024, 1, 1);
640 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
641 const int nb = ((*n) + 1024 - 1)/ 1024;
642
644
645 if (*n > 0) {
647 nblcks, nthrds, 0, stream,
648 (real *) a, (real *) b,
649 (real *) c, bufred_d, *n);
652 1, 1024, 0, stream, bufred_d, nb);
654 }
655 else {
656 hip_rzero(bufred_d, &red_s, stream);
657 }
659
660 return bufred[0];
661 }
662
667 void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n,
668 hipStream_t stream){
669 int pow2 = 1;
670 while(pow2 < (*j)){
671 pow2 = 2*pow2;
672 }
673 const int nt = 1024/pow2;
674 const dim3 nthrds(pow2, nt, 1);
675 const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
676 const dim3 nthrds_red(1024,1,1);
677 const dim3 nblcks_red( (*j),1,1);
678 const int nb = ((*n) + nt - 1)/nt;
679
681
682 if (*n > 0) {
684 nblcks, nthrds, 0, stream,
685 (const real *) w, (const real **) v,
686 (const real *)mult, bufred_d, *j, *n);
688
690 nblcks_red, nthrds_red, 0, stream,
691 bufred_d, nb, *j);
693 }
694 else {
695 hip_rzero(bufred_d, &red_s, stream);
696 }
697 hip_global_reduce_add(h, bufred_d, (*j), stream);
698 }
699
704 real hip_glsc2(void *a, void *b, int *n, hipStream_t stream) {
705
706 const dim3 nthrds(1024, 1, 1);
707 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
708 const int nb = ((*n) + 1024 - 1)/ 1024;
709
710
712
713 if( *n > 0) {
715 nblcks, nthrds, 0, stream,
716 (real *) a, (real *) b, bufred_d, *n);
719 1, 1024, 0, stream, bufred_d, nb);
721 }
722 else {
723 hip_rzero(bufred_d, &red_s, stream);
724 }
726
727 return bufred[0];
728 }
729
734 real hip_glsubnorm2(void* a, void* b, int* n, hipStream_t stream) {
735
736 const dim3 nthrds(1024, 1, 1);
737 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
738 const int nb = ((*n) + 1024 - 1) / 1024;
739
741
742 if (*n > 0) {
744 nblcks, nthrds, 0, stream,
745 (real*)a, (real*)b, bufred_d, *n);
748 1, 1024, 0, stream, bufred_d, nb);
750 }
751 else {
752 hip_rzero(bufred_d, &red_s, stream);
753 }
755
756 return bufred[0];
757 }
758
763 real hip_glsum(void *a, int *n, hipStream_t stream) {
764 const dim3 nthrds(1024, 1, 1);
765 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
766 const int nb = ((*n) + 1024 - 1)/ 1024;
767
769 if( *n > 0) {
771 nblcks, nthrds, 0, stream,
772 (real *) a, bufred_d, *n);
775 1, 1024, 0, stream, bufred_d, nb);
777 }
778 else {
779 hip_rzero(bufred_d, &red_s, stream);
780 }
781
783
784 return bufred[0];
785 }
786
791 void hip_absval(void *a, int *n, hipStream_t stream) {
792
793 const dim3 nthrds(1024, 1, 1);
794 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
795
797 nblcks, nthrds, 0, stream, (real *) a, *n);
799
800}
801
802 // ======================================================================== //
803 // Point-wise operations.
804
809 void hip_pwmax_vec2(void* a, void* b, int* n, hipStream_t stream) {
810
811 const dim3 nthrds(1024, 1, 1);
812 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
813
815 nblcks, nthrds, 0, stream, (real*)a, (real*)b, *n);
817}
818
823 void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
824
825 const dim3 nthrds(1024, 1, 1);
826 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
827
829 nblcks, nthrds, 0, stream,
830 (real *)a, (real *)b, (real *)c, *n);
832 }
833
838 void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream) {
839
840 const dim3 nthrds(1024, 1, 1);
841 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
842
844 nblcks, nthrds, 0, stream, (real *)a, *c, *n);
846 }
847
852 void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
853
854 const dim3 nthrds(1024, 1, 1);
855 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
856
858 nblcks, nthrds, 0, stream,
859 (real *)a, (real *)b, *c, *n);
861 }
862
867 void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream) {
868
869 const dim3 nthrds(1024, 1, 1);
870 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
871
873 nblcks, nthrds, 0, stream,
874 (real *)a, (real *)b, *n);
876 }
877
882 void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream) {
883
884 const dim3 nthrds(1024, 1, 1);
885 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
886
888 nblcks, nthrds, 0, stream,
889 (real *)a, (real *)b, (real *)c, *n);
891 }
892
897 void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream) {
898
899 const dim3 nthrds(1024, 1, 1);
900 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
901
903 nblcks, nthrds, 0, stream,
904 (real *)a, *c, *n);
906 }
907
912 void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream) {
913
914 const dim3 nthrds(1024, 1, 1);
915 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
916
918 nblcks, nthrds, 0, stream,
919 (real *)a, (real *)b, *c, *n);
921 }
922
923 // ======================================================================== //
924
928 void hip_iadd(void *a, int *c, int *n, hipStream_t stream) {
929
930 const dim3 nthrds(1024, 1, 1);
931 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
932
934 nblcks, nthrds, 0, stream,
935 (int *) a, *c, *n);
937 }
938
939} /* extern "C" */
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ u
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ v
const int j
__global__ void const T *__restrict__ x
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w3
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
double real
#define DEVICE_MPI_SUM
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define DEVICE_NCCL_SUM
void device_nccl_allreduce(void *sbuf_d, void *rbuf_d, int count, int nbytes, int op, void *stream)
#define HIP_CHECK(err)
Definition check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n, hipStream_t strm)
Definition math.hip:529
void hip_global_reduce_add(real *bufred, void *bufred_d, int n, const hipStream_t stream)
Definition math.hip:586
void hip_col3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:438
void hip_addsqr2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:348
void hip_rzero(void *a, int *n, hipStream_t strm)
Definition math.hip:144
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size, hipStream_t strm)
Definition math.hip:128
void hip_pwmax_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:852
void hip_invcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:408
void hip_absval(void *a, int *n, hipStream_t stream)
Definition math.hip:791
void hip_vcross(void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, void *w1, void *w2, void *w3, int *n, hipStream_t strm)
Definition math.hip:547
real hip_glsubnorm2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:734
void hip_addcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:498
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n, hipStream_t strm)
Definition math.hip:364
void hip_add2s2(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:313
real hip_glsum(void *a, int *n, hipStream_t stream)
Definition math.hip:763
void hip_add4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:281
void hip_cadd2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:221
real hip_vlsc3(void *u, void *v, void *w, int *n, hipStream_t stream)
Definition math.hip:608
void hip_cdiv2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:193
void hip_pwmin_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:897
void hip_cmult2(void *a, void *b, real *c, int *n, hipStream_t strm)
Definition math.hip:165
real hip_glsc3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:637
void hip_invcol1(void *a, int *n, hipStream_t strm)
Definition math.hip:380
void hip_invcol2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:394
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:61
void hip_cdiv(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:179
void hip_sub2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:468
real hip_glsc2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:704
real * bufred
Definition math.hip:568
void hip_col2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:423
void hip_pwmin_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:882
void hip_masked_gather_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:78
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n, hipStream_t stream)
Definition math.hip:667
int red_s
Definition math.hip:567
void hip_cfill(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:234
void hip_masked_atomic_reduction(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:111
void hip_add3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:266
void hip_pwmax_vec3(void *a, void *b, void *c, int *n, hipStream_t stream)
Definition math.hip:823
void hip_add2(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:251
void hip_copy(void *a, void *b, int *n, hipStream_t strm)
Definition math.hip:53
void hip_subcol3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:453
void hip_pwmax_sca2(void *a, real *c, int *n, hipStream_t stream)
Definition math.hip:838
void hip_iadd(void *a, int *c, int *n, hipStream_t stream)
Definition math.hip:928
void hip_pwmin_sca3(void *a, void *b, real *c, int *n, hipStream_t stream)
Definition math.hip:912
void hip_add2s1(void *a, void *b, real *c1, int *n, hipStream_t strm)
Definition math.hip:297
void hip_radd(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:207
void hip_redbuf_check_alloc(int nb)
Definition math.hip:571
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n, hipStream_t strm)
Definition math.hip:330
void hip_addcol4(void *a, void *b, void *c, void *d, int *n, hipStream_t strm)
Definition math.hip:513
void hip_pwmax_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:809
void hip_pwmin_vec2(void *a, void *b, int *n, hipStream_t stream)
Definition math.hip:867
void hip_sub3(void *a, void *b, void *c, int *n, hipStream_t strm)
Definition math.hip:483
void hip_masked_scatter_copy(void *a, void *b, void *mask, int *n, int *m, hipStream_t strm)
Definition math.hip:94
void hip_cmult(void *a, real *c, int *n, hipStream_t strm)
Definition math.hip:151
real * bufred_d
Definition math.hip:569
Object for handling masks in Neko.
Definition mask.f90:34