Neko  0.8.99
A portable framework for high-order spectral element flow simulations
math.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2023, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <hip/hip_runtime.h>
36 #include <device/device_config.h>
37 #include <device/hip/check.h>
38 #include "math_kernel.h"
39 
40 extern "C" {
41 
44 
48  void hip_copy(void *a, void *b, int *n) {
49  HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
50  hipMemcpyDeviceToDevice,
51  (hipStream_t) glb_cmd_queue));
52  }
53 
57  void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m) {
58 
59  const dim3 nthrds(1024, 1, 1);
60  const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
61 
62  hipLaunchKernelGGL(HIP_KERNEL_NAME(masked_copy_kernel<real>),
63  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
64  (real *) a, (real *) b, (int *) mask, *n, *m);
65 
66  HIP_CHECK(hipGetLastError());
67 
68  }
69 
73  void hip_cfill_mask(void* a, real* c, int* size, void* mask, int* mask_size) {
74 
75  const dim3 nthrds(1024, 1, 1);
76  const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
77 
78  hipLaunchKernelGGL(HIP_KERNEL_NAME(cfill_mask_kernel<real>),
79  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
80  (real*)a, *c, *size, (int*)mask, *mask_size);
81 
82  HIP_CHECK(hipGetLastError());
83  }
84 
88  void hip_rzero(void *a, int *n) {
89  HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real),
90  (hipStream_t) glb_cmd_queue));
91  }
92 
96  void hip_cmult(void *a, real *c, int *n) {
97 
98  const dim3 nthrds(1024, 1, 1);
99  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
100 
101  hipLaunchKernelGGL(HIP_KERNEL_NAME(cmult_kernel<real>),
102  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
103  (real *) a, *c, *n);
104  HIP_CHECK(hipGetLastError());
105 
106  }
107 
111  void hip_cmult2(void *a, void *b, real *c, int *n) {
112 
113  const dim3 nthrds(1024, 1, 1);
114  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
115 
116  hipLaunchKernelGGL(HIP_KERNEL_NAME(cmult2_kernel<real>),
117  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
118  (real *) a,(real *) b, *c, *n);
119  HIP_CHECK(hipGetLastError());
120 
121  }
125  void hip_cadd(void *a, real *c, int *n) {
126 
127  const dim3 nthrds(1024, 1, 1);
128  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
129 
130  hipLaunchKernelGGL(HIP_KERNEL_NAME(cadd_kernel<real>),
131  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
132  (real *) a, *c, *n);
133  HIP_CHECK(hipGetLastError());
134  }
135 
140  void hip_cadd2(void *a, void *b, real *c, int *n) {
141 
142  const dim3 nthrds(1024, 1, 1);
143  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
144 
145  hipLaunchKernelGGL(HIP_KERNEL_NAME(cadd2_kernel<real>),
146  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
147  (real *) a, (real *) b, *c, *n);
148  HIP_CHECK(hipGetLastError());
149  }
150 
154  void hip_cfill(void *a, real *c, int *n) {
155 
156  const dim3 nthrds(1024, 1, 1);
157  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
158 
159  hipLaunchKernelGGL(HIP_KERNEL_NAME(cfill_kernel<real>),
160  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
161  (real *) a, *c, *n);
162  HIP_CHECK(hipGetLastError());
163  }
164 
169  void hip_add2(void *a, void *b, int *n) {
170 
171  const dim3 nthrds(1024, 1, 1);
172  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
173 
174  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2_kernel<real>),
175  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
176  (real *) a, (real *) b, *n);
177  HIP_CHECK(hipGetLastError());
178  }
179 
185  void hip_add2s1(void *a, void *b, real *c1, int *n) {
186 
187  const dim3 nthrds(1024, 1, 1);
188  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
189 
190  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s1_kernel<real>),
191  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
192  (real *) a, (real *) b,
193  *c1, *n);
194  HIP_CHECK(hipGetLastError());
195  }
196 
202  void hip_add2s2(void *a, void *b, real *c1, int *n) {
203 
204  const dim3 nthrds(1024, 1, 1);
205  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
206 
207  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s2_kernel<real>),
208  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
209  (real *) a, (real *) b,
210  *c1, *n);
211  HIP_CHECK(hipGetLastError());
212  }
213 
219  void hip_addsqr2s2(void *a, void *b, real *c1, int *n) {
220 
221  const dim3 nthrds(1024, 1, 1);
222  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
223 
224  hipLaunchKernelGGL(HIP_KERNEL_NAME(addsqr2s2_kernel<real>),
225  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
226  (real *) a, (real *) b,
227  *c1, *n);
228  HIP_CHECK(hipGetLastError());
229  }
230 
236  void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n) {
237 
238  const dim3 nthrds(1024, 1, 1);
239  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
240 
241  hipLaunchKernelGGL(HIP_KERNEL_NAME(add3s2_kernel<real>),
242  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
243  (real *) a, (real *) b, (real *) c,
244  *c1, *c2, *n);
245  HIP_CHECK(hipGetLastError());
246  }
247 
252  void hip_invcol1(void *a, int *n) {
253 
254  const dim3 nthrds(1024, 1, 1);
255  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
256 
257  hipLaunchKernelGGL(HIP_KERNEL_NAME(invcol1_kernel<real>),
258  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
259  (real *) a, *n);
260  HIP_CHECK(hipGetLastError());
261  }
262 
267  void hip_invcol2(void *a, void *b, int *n) {
268 
269  const dim3 nthrds(1024, 1, 1);
270  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
271 
272  hipLaunchKernelGGL(HIP_KERNEL_NAME(invcol2_kernel<real>),
273  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
274  (real *) a, (real *) b, *n);
275  HIP_CHECK(hipGetLastError());
276  }
277 
282  void hip_col2(void *a, void *b, int *n) {
283 
284  const dim3 nthrds(1024, 1, 1);
285  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
286 
287  hipLaunchKernelGGL(HIP_KERNEL_NAME(col2_kernel<real>),
288  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
289  (real *) a, (real *) b, *n);
290  HIP_CHECK(hipGetLastError());
291  }
292 
297  void hip_col3(void *a, void *b, void *c, int *n) {
298 
299  const dim3 nthrds(1024, 1, 1);
300  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
301 
302  hipLaunchKernelGGL(HIP_KERNEL_NAME(col3_kernel<real>),
303  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
304  (real *) a, (real *) b, (real *) c, *n);
305  HIP_CHECK(hipGetLastError());
306  }
307 
312  void hip_subcol3(void *a, void *b, void *c, int *n) {
313 
314  const dim3 nthrds(1024, 1, 1);
315  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
316 
317  hipLaunchKernelGGL(HIP_KERNEL_NAME(subcol3_kernel<real>),
318  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
319  (real *) a, (real *) b, (real *) c, *n);
320  HIP_CHECK(hipGetLastError());
321  }
322 
327  void hip_sub2(void *a, void *b, int *n) {
328 
329  const dim3 nthrds(1024, 1, 1);
330  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
331 
332  hipLaunchKernelGGL(HIP_KERNEL_NAME(sub2_kernel<real>),
333  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
334  (real *) a, (real *) b, *n);
335  HIP_CHECK(hipGetLastError());
336  }
337 
342  void hip_sub3(void *a, void *b, void *c, int *n) {
343 
344  const dim3 nthrds(1024, 1, 1);
345  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
346 
347  hipLaunchKernelGGL(HIP_KERNEL_NAME(sub3_kernel<real>),
348  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
349  (real *) a, (real *) b, (real *) c, *n);
350  HIP_CHECK(hipGetLastError());
351  }
352 
357  void hip_addcol3(void *a, void *b, void *c, int *n) {
358 
359  const dim3 nthrds(1024, 1, 1);
360  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
361 
362  hipLaunchKernelGGL(HIP_KERNEL_NAME(addcol3_kernel<real>),
363  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
364  (real *) a, (real *) b, (real *) c, *n);
365  HIP_CHECK(hipGetLastError());
366  }
367 
372  void hip_addcol4(void *a, void *b, void *c, void *d, int *n) {
373 
374  const dim3 nthrds(1024, 1, 1);
375  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
376 
377  hipLaunchKernelGGL(HIP_KERNEL_NAME(addcol4_kernel<real>),
378  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
379  (real *) a, (real *) b, (real *) c, (real *) d, *n);
380  HIP_CHECK(hipGetLastError());
381  }
382 
387  void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
388  void *v1, void *v2, void *v3, int *n) {
389 
390  const dim3 nthrds(1024, 1, 1);
391  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
392 
393  hipLaunchKernelGGL(HIP_KERNEL_NAME(vdot3_kernel<real>),
394  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
395  (real *) dot, (real *) u1, (real *) u2, (real *) u3,
396  (real *) v1, (real *) v2, (real *) v3, *n);
397  HIP_CHECK(hipGetLastError());
398  }
399 
400  /*
401  * Reduction buffer
402  */
403  int red_s = 0;
404  real * bufred = NULL;
405  real * bufred_d = NULL;
406 
411  real hip_vlsc3(void *u, void *v, void *w, int *n) {
412 
413  const dim3 nthrds(1024, 1, 1);
414  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
415  const int nb = ((*n) + 1024 - 1)/ 1024;
416  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
417 
418  if ( nb > red_s){
419  red_s = nb;
420  if (bufred != NULL) {
421  HIP_CHECK(hipHostFree(bufred));
422  HIP_CHECK(hipFree(bufred_d));
423  }
424  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
425  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
426  }
427 
428  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel<real>),
429  nblcks, nthrds, 0, stream,
430  (real *) u, (real *) v,
431  (real *) w, bufred_d, *n);
432  HIP_CHECK(hipGetLastError());
433  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
434  1, 1024, 0, stream, bufred_d, nb);
435  HIP_CHECK(hipGetLastError());
436 
437  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
438  hipMemcpyDeviceToHost, stream));
439  HIP_CHECK(hipStreamSynchronize(stream));
440 
441  return bufred[0];
442  }
443 
444 
449  real hip_glsc3(void *a, void *b, void *c, int *n) {
450 
451  const dim3 nthrds(1024, 1, 1);
452  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
453  const int nb = ((*n) + 1024 - 1)/ 1024;
454  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
455 
456  if ( nb > red_s){
457  red_s = nb;
458  if (bufred != NULL) {
459  HIP_CHECK(hipHostFree(bufred));
460  HIP_CHECK(hipFree(bufred_d));
461  }
462  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
463  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
464  }
465 
466  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel<real>),
467  nblcks, nthrds, 0, stream,
468  (real *) a, (real *) b,
469  (real *) c, bufred_d, *n);
470  HIP_CHECK(hipGetLastError());
471  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
472  1, 1024, 0, stream, bufred_d, nb);
473  HIP_CHECK(hipGetLastError());
474 
475 #ifdef HAVE_DEVICE_MPI
476  HIP_CHECK(hipStreamSynchronize(stream));
478 #else
479  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
480  hipMemcpyDeviceToHost, stream));
481  HIP_CHECK(hipStreamSynchronize(stream));
482 #endif
483  return bufred[0];
484  }
485 
490  void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n){
491  int pow2 = 1;
492  while(pow2 < (*j)){
493  pow2 = 2*pow2;
494  }
495  const int nt = 1024/pow2;
496  const dim3 nthrds(pow2, nt, 1);
497  const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
498  const dim3 nthrds_red(1024,1,1);
499  const dim3 nblcks_red( (*j),1,1);
500  const int nb = ((*n) + nt - 1)/nt;
501  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
502 
503  if((*j)*nb>red_s){
504  red_s = (*j)*nb;
505  if (bufred != NULL) {
506  HIP_CHECK(hipHostFree(bufred));
507  HIP_CHECK(hipFree(bufred_d));
508  }
509  HIP_CHECK(hipHostMalloc(&bufred,(*j)*nb*sizeof(real),hipHostMallocDefault));
510  HIP_CHECK(hipMalloc(&bufred_d, (*j)*nb*sizeof(real)));
511  }
512  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_many_kernel<real>),
513  nblcks, nthrds, 0, stream,
514  (const real *) w, (const real **) v,
515  (const real *)mult, bufred_d, *j, *n);
516  HIP_CHECK(hipGetLastError());
517 
518  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_reduce_kernel<real>),
519  nblcks_red, nthrds_red, 0, stream,
520  bufred_d, nb, *j);
521  HIP_CHECK(hipGetLastError());
522 
523 #ifdef HAVE_DEVICE_MPI
524  HIP_CHECK(hipStreamSynchronize(stream));
526 #else
527  HIP_CHECK(hipMemcpyAsync(h, bufred_d, (*j)* sizeof(real),
528  hipMemcpyDeviceToHost, stream));
529  HIP_CHECK(hipStreamSynchronize(stream));
530 #endif
531  }
532 
539  void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n) {
540 
541  const dim3 nthrds(1024, 1, 1);
542  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
543 
544  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s2_many_kernel<real>),
545  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
546  (real *) x, (const real **) p, (real *) alpha, *j, *n);
547  HIP_CHECK(hipGetLastError());
548 
549  }
550 
555  void hip_add3(void *a, void *b, void *c, int *n) {
556 
557  const dim3 nthrds(1024, 1, 1);
558  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
559 
560  hipLaunchKernelGGL(HIP_KERNEL_NAME(add3_kernel<real>),
561  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
562  (real *) a, (real *) b, (real *) c, *n);
563  HIP_CHECK(hipGetLastError());
564  }
565 
570  real hip_glsc2(void *a, void *b, int *n) {
571 
572  const dim3 nthrds(1024, 1, 1);
573  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
574  const int nb = ((*n) + 1024 - 1)/ 1024;
575  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
576 
577  if ( nb > red_s){
578  red_s = nb;
579  if (bufred != NULL) {
580  HIP_CHECK(hipHostFree(bufred));
581  HIP_CHECK(hipFree(bufred_d));
582  }
583  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
584  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
585  }
586 
587  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc2_kernel<real>),
588  nblcks, nthrds, 0, stream,
589  (real *) a, (real *) b, bufred_d, *n);
590  HIP_CHECK(hipGetLastError());
591  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
592  1, 1024, 0, stream, bufred_d, nb);
593  HIP_CHECK(hipGetLastError());
594 
595 #ifdef HAVE_DEVICE_MPI
596  HIP_CHECK(hipStreamSynchronize(stream));
598 #else
599  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
600  hipMemcpyDeviceToHost, stream));
601  HIP_CHECK(hipStreamSynchronize(stream));
602 #endif
603  return bufred[0];
604  }
605 
610  real hip_glsum(void *a, int *n) {
611  const dim3 nthrds(1024, 1, 1);
612  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
613  const int nb = ((*n) + 1024 - 1)/ 1024;
614  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
615 
616  if ( nb > red_s){
617  red_s = nb;
618  if (bufred != NULL) {
619  HIP_CHECK(hipHostFree(bufred));
620  HIP_CHECK(hipFree(bufred_d));
621  }
622  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
623  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
624  }
625 
626  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsum_kernel<real>),
627  nblcks, nthrds, 0, stream,
628  (real *) a, bufred_d, *n);
629  HIP_CHECK(hipGetLastError());
630  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
631  1, 1024, 0, stream, bufred_d, nb);
632  HIP_CHECK(hipGetLastError());
633 
634 #ifdef HAVE_DEVICE_MPI
635  HIP_CHECK(hipStreamSynchronize(stream));
637 #else
638  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d,sizeof(real),
639  hipMemcpyDeviceToHost, stream));
640  HIP_CHECK(hipStreamSynchronize(stream));
641 #endif
642  return bufred[0];
643  }
644 }
__global__ void const T *__restrict__ x
Definition: cdtp_kernel.h:106
const int j
Definition: cdtp_kernel.h:127
__global__ void const T *__restrict__ u
Definition: conv1_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void const T *__restrict__ const T *__restrict__ v
double real
Definition: device_config.h:12
void * glb_cmd_queue
#define DEVICE_MPI_SUM
Definition: device_mpi_op.h:9
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define HIP_CHECK(err)
Definition: check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.hip:387
void hip_cmult2(void *a, void *b, real *c, int *n)
Definition: math.hip:111
real hip_glsc3(void *a, void *b, void *c, int *n)
Definition: math.hip:449
void hip_cfill_mask(void *a, real *c, int *size, void *mask, int *mask_size)
Definition: math.hip:73
void hip_invcol2(void *a, void *b, int *n)
Definition: math.hip:267
void hip_cadd2(void *a, void *b, real *c, int *n)
Definition: math.hip:140
void hip_invcol1(void *a, int *n)
Definition: math.hip:252
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.hip:236
void hip_subcol3(void *a, void *b, void *c, int *n)
Definition: math.hip:312
void hip_col3(void *a, void *b, void *c, int *n)
Definition: math.hip:297
real hip_glsc2(void *a, void *b, int *n)
Definition: math.hip:570
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.hip:57
void hip_copy(void *a, void *b, int *n)
Definition: math.hip:48
void hip_add2(void *a, void *b, int *n)
Definition: math.hip:169
real hip_vlsc3(void *u, void *v, void *w, int *n)
Definition: math.hip:411
void hip_add3(void *a, void *b, void *c, int *n)
Definition: math.hip:555
real * bufred
Definition: math.hip:404
void hip_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.hip:219
void hip_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.hip:202
void hip_rzero(void *a, int *n)
Definition: math.hip:88
void hip_sub2(void *a, void *b, int *n)
Definition: math.hip:327
void hip_cadd(void *a, real *c, int *n)
Definition: math.hip:125
real hip_glsum(void *a, int *n)
Definition: math.hip:610
int red_s
Definition: math.hip:403
void hip_addcol3(void *a, void *b, void *c, int *n)
Definition: math.hip:357
void hip_cfill(void *a, real *c, int *n)
Definition: math.hip:154
void hip_col2(void *a, void *b, int *n)
Definition: math.hip:282
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.hip:490
void hip_sub3(void *a, void *b, void *c, int *n)
Definition: math.hip:342
void hip_cmult(void *a, real *c, int *n)
Definition: math.hip:96
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
Definition: math.hip:539
void hip_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.hip:372
real * bufred_d
Definition: math.hip:405
void hip_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.hip:185