Neko  0.8.1
A portable framework for high-order spectral element flow simulations
math.hip
Go to the documentation of this file.
1 /*
2  Copyright (c) 2021-2023, The Neko Authors
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above
13  copyright notice, this list of conditions and the following
14  disclaimer in the documentation and/or other materials provided
15  with the distribution.
16 
17  * Neither the name of the authors nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <hip/hip_runtime.h>
36 #include <device/device_config.h>
37 #include <device/hip/check.h>
38 #include "math_kernel.h"
39 
40 extern "C" {
41 
44 
48  void hip_copy(void *a, void *b, int *n) {
49  HIP_CHECK(hipMemcpyAsync(a, b, (*n) * sizeof(real),
50  hipMemcpyDeviceToDevice,
51  (hipStream_t) glb_cmd_queue));
52  }
53 
57  void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m) {
58 
59  const dim3 nthrds(1024, 1, 1);
60  const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
61 
62  hipLaunchKernelGGL(HIP_KERNEL_NAME(masked_copy_kernel<real>),
63  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
64  (real *) a, (real *) b, (int *) mask, *n, *m);
65 
66  HIP_CHECK(hipGetLastError());
67 
68  }
72  void hip_rzero(void *a, int *n) {
73  HIP_CHECK(hipMemsetAsync(a, 0, (*n) * sizeof(real),
74  (hipStream_t) glb_cmd_queue));
75  }
76 
80  void hip_cmult(void *a, real *c, int *n) {
81 
82  const dim3 nthrds(1024, 1, 1);
83  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
84 
85  hipLaunchKernelGGL(HIP_KERNEL_NAME(cmult_kernel<real>),
86  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
87  (real *) a, *c, *n);
88  HIP_CHECK(hipGetLastError());
89 
90  }
91 
95  void hip_cmult2(void *a, void *b, real *c, int *n) {
96 
97  const dim3 nthrds(1024, 1, 1);
98  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
99 
100  hipLaunchKernelGGL(HIP_KERNEL_NAME(cmult2_kernel<real>),
101  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
102  (real *) a,(real *) b, *c, *n);
103  HIP_CHECK(hipGetLastError());
104 
105  }
109  void hip_cadd(void *a, real *c, int *n) {
110 
111  const dim3 nthrds(1024, 1, 1);
112  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
113 
114  hipLaunchKernelGGL(HIP_KERNEL_NAME(cadd_kernel<real>),
115  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
116  (real *) a, *c, *n);
117  HIP_CHECK(hipGetLastError());
118  }
119 
123  void hip_cfill(void *a, real *c, int *n) {
124 
125  const dim3 nthrds(1024, 1, 1);
126  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
127 
128  hipLaunchKernelGGL(HIP_KERNEL_NAME(cfill_kernel<real>),
129  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
130  (real *) a, *c, *n);
131  HIP_CHECK(hipGetLastError());
132  }
133 
138  void hip_add2(void *a, void *b, int *n) {
139 
140  const dim3 nthrds(1024, 1, 1);
141  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
142 
143  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2_kernel<real>),
144  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
145  (real *) a, (real *) b, *n);
146  HIP_CHECK(hipGetLastError());
147  }
148 
154  void hip_add2s1(void *a, void *b, real *c1, int *n) {
155 
156  const dim3 nthrds(1024, 1, 1);
157  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
158 
159  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s1_kernel<real>),
160  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
161  (real *) a, (real *) b,
162  *c1, *n);
163  HIP_CHECK(hipGetLastError());
164  }
165 
171  void hip_add2s2(void *a, void *b, real *c1, int *n) {
172 
173  const dim3 nthrds(1024, 1, 1);
174  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
175 
176  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s2_kernel<real>),
177  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
178  (real *) a, (real *) b,
179  *c1, *n);
180  HIP_CHECK(hipGetLastError());
181  }
182 
188  void hip_addsqr2s2(void *a, void *b, real *c1, int *n) {
189 
190  const dim3 nthrds(1024, 1, 1);
191  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
192 
193  hipLaunchKernelGGL(HIP_KERNEL_NAME(addsqr2s2_kernel<real>),
194  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
195  (real *) a, (real *) b,
196  *c1, *n);
197  HIP_CHECK(hipGetLastError());
198  }
199 
205  void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n) {
206 
207  const dim3 nthrds(1024, 1, 1);
208  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
209 
210  hipLaunchKernelGGL(HIP_KERNEL_NAME(add3s2_kernel<real>),
211  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
212  (real *) a, (real *) b, (real *) c,
213  *c1, *c2, *n);
214  HIP_CHECK(hipGetLastError());
215  }
216 
221  void hip_invcol1(void *a, int *n) {
222 
223  const dim3 nthrds(1024, 1, 1);
224  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
225 
226  hipLaunchKernelGGL(HIP_KERNEL_NAME(invcol1_kernel<real>),
227  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
228  (real *) a, *n);
229  HIP_CHECK(hipGetLastError());
230  }
231 
236  void hip_invcol2(void *a, void *b, int *n) {
237 
238  const dim3 nthrds(1024, 1, 1);
239  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
240 
241  hipLaunchKernelGGL(HIP_KERNEL_NAME(invcol2_kernel<real>),
242  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
243  (real *) a, (real *) b, *n);
244  HIP_CHECK(hipGetLastError());
245  }
246 
251  void hip_col2(void *a, void *b, int *n) {
252 
253  const dim3 nthrds(1024, 1, 1);
254  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
255 
256  hipLaunchKernelGGL(HIP_KERNEL_NAME(col2_kernel<real>),
257  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
258  (real *) a, (real *) b, *n);
259  HIP_CHECK(hipGetLastError());
260  }
261 
266  void hip_col3(void *a, void *b, void *c, int *n) {
267 
268  const dim3 nthrds(1024, 1, 1);
269  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
270 
271  hipLaunchKernelGGL(HIP_KERNEL_NAME(col3_kernel<real>),
272  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
273  (real *) a, (real *) b, (real *) c, *n);
274  HIP_CHECK(hipGetLastError());
275  }
276 
281  void hip_subcol3(void *a, void *b, void *c, int *n) {
282 
283  const dim3 nthrds(1024, 1, 1);
284  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
285 
286  hipLaunchKernelGGL(HIP_KERNEL_NAME(subcol3_kernel<real>),
287  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
288  (real *) a, (real *) b, (real *) c, *n);
289  HIP_CHECK(hipGetLastError());
290  }
291 
296  void hip_sub2(void *a, void *b, int *n) {
297 
298  const dim3 nthrds(1024, 1, 1);
299  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
300 
301  hipLaunchKernelGGL(HIP_KERNEL_NAME(sub2_kernel<real>),
302  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
303  (real *) a, (real *) b, *n);
304  HIP_CHECK(hipGetLastError());
305  }
306 
311  void hip_sub3(void *a, void *b, void *c, int *n) {
312 
313  const dim3 nthrds(1024, 1, 1);
314  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
315 
316  hipLaunchKernelGGL(HIP_KERNEL_NAME(sub3_kernel<real>),
317  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
318  (real *) a, (real *) b, (real *) c, *n);
319  HIP_CHECK(hipGetLastError());
320  }
321 
326  void hip_addcol3(void *a, void *b, void *c, int *n) {
327 
328  const dim3 nthrds(1024, 1, 1);
329  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
330 
331  hipLaunchKernelGGL(HIP_KERNEL_NAME(addcol3_kernel<real>),
332  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
333  (real *) a, (real *) b, (real *) c, *n);
334  HIP_CHECK(hipGetLastError());
335  }
336 
341  void hip_addcol4(void *a, void *b, void *c, void *d, int *n) {
342 
343  const dim3 nthrds(1024, 1, 1);
344  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
345 
346  hipLaunchKernelGGL(HIP_KERNEL_NAME(addcol4_kernel<real>),
347  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
348  (real *) a, (real *) b, (real *) c, (real *) d, *n);
349  HIP_CHECK(hipGetLastError());
350  }
351 
356  void hip_vdot3(void *dot, void *u1, void *u2, void *u3,
357  void *v1, void *v2, void *v3, int *n) {
358 
359  const dim3 nthrds(1024, 1, 1);
360  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
361 
362  hipLaunchKernelGGL(HIP_KERNEL_NAME(vdot3_kernel<real>),
363  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
364  (real *) dot, (real *) u1, (real *) u2, (real *) u3,
365  (real *) v1, (real *) v2, (real *) v3, *n);
366  HIP_CHECK(hipGetLastError());
367  }
368 
369  /*
370  * Reduction buffer
371  */
372  int red_s = 0;
373  real * bufred = NULL;
374  real * bufred_d = NULL;
375 
380  real hip_vlsc3(void *u, void *v, void *w, int *n) {
381 
382  const dim3 nthrds(1024, 1, 1);
383  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
384  const int nb = ((*n) + 1024 - 1)/ 1024;
385  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
386 
387  if ( nb > red_s){
388  red_s = nb;
389  if (bufred != NULL) {
390  HIP_CHECK(hipHostFree(bufred));
391  HIP_CHECK(hipFree(bufred_d));
392  }
393  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
394  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
395  }
396 
397  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel<real>),
398  nblcks, nthrds, 0, stream,
399  (real *) u, (real *) v,
400  (real *) w, bufred_d, *n);
401  HIP_CHECK(hipGetLastError());
402  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
403  1, 1024, 0, stream, bufred_d, nb);
404  HIP_CHECK(hipGetLastError());
405 
406  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
407  hipMemcpyDeviceToHost, stream));
408  hipStreamSynchronize(stream);
409 
410  return bufred[0];
411  }
412 
413 
418  real hip_glsc3(void *a, void *b, void *c, int *n) {
419 
420  const dim3 nthrds(1024, 1, 1);
421  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
422  const int nb = ((*n) + 1024 - 1)/ 1024;
423  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
424 
425  if ( nb > red_s){
426  red_s = nb;
427  if (bufred != NULL) {
428  HIP_CHECK(hipHostFree(bufred));
429  HIP_CHECK(hipFree(bufred_d));
430  }
431  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
432  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
433  }
434 
435  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel<real>),
436  nblcks, nthrds, 0, stream,
437  (real *) a, (real *) b,
438  (real *) c, bufred_d, *n);
439  HIP_CHECK(hipGetLastError());
440  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
441  1, 1024, 0, stream, bufred_d, nb);
442  HIP_CHECK(hipGetLastError());
443 
444 #ifdef HAVE_DEVICE_MPI
445  hipStreamSynchronize(stream);
447 #else
448  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
449  hipMemcpyDeviceToHost, stream));
450  hipStreamSynchronize(stream);
451 #endif
452  return bufred[0];
453  }
454 
459  void hip_glsc3_many(real *h, void * w, void *v,void *mult, int *j, int *n){
460  int pow2 = 1;
461  while(pow2 < (*j)){
462  pow2 = 2*pow2;
463  }
464  const int nt = 1024/pow2;
465  const dim3 nthrds(pow2, nt, 1);
466  const dim3 nblcks(((*n)+nt - 1)/nt, 1, 1);
467  const dim3 nthrds_red(1024,1,1);
468  const dim3 nblcks_red( (*j),1,1);
469  const int nb = ((*n) + nt - 1)/nt;
470  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
471 
472  if((*j)*nb>red_s){
473  red_s = (*j)*nb;
474  if (bufred != NULL) {
475  HIP_CHECK(hipHostFree(bufred));
476  HIP_CHECK(hipFree(bufred_d));
477  }
478  HIP_CHECK(hipHostMalloc(&bufred,(*j)*nb*sizeof(real),hipHostMallocDefault));
479  HIP_CHECK(hipMalloc(&bufred_d, (*j)*nb*sizeof(real)));
480  }
481  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_many_kernel<real>),
482  nblcks, nthrds, 0, stream,
483  (const real *) w, (const real **) v,
484  (const real *)mult, bufred_d, *j, *n);
485  HIP_CHECK(hipGetLastError());
486 
487  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_reduce_kernel<real>),
488  nblcks_red, nthrds_red, 0, stream,
489  bufred_d, nb, *j);
490  HIP_CHECK(hipGetLastError());
491 
492 #ifdef HAVE_DEVICE_MPI
493  hipStreamSynchronize(stream);
495 #else
496  HIP_CHECK(hipMemcpyAsync(h, bufred_d, (*j)* sizeof(real),
497  hipMemcpyDeviceToHost, stream));
498  hipStreamSynchronize(stream);
499 #endif
500  }
501 
508  void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n) {
509 
510  const dim3 nthrds(1024, 1, 1);
511  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
512 
513  hipLaunchKernelGGL(HIP_KERNEL_NAME(add2s2_many_kernel<real>),
514  nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
515  (real *) x, (const real **) p, (real *) alpha, *j, *n);
516  HIP_CHECK(hipGetLastError());
517 
518  }
519 
524  real hip_glsc2(void *a, void *b, int *n) {
525 
526  const dim3 nthrds(1024, 1, 1);
527  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
528  const int nb = ((*n) + 1024 - 1)/ 1024;
529  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
530 
531  if ( nb > red_s){
532  red_s = nb;
533  if (bufred != NULL) {
534  HIP_CHECK(hipHostFree(bufred));
535  HIP_CHECK(hipFree(bufred_d));
536  }
537  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
538  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
539  }
540 
541  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc2_kernel<real>),
542  nblcks, nthrds, 0, stream,
543  (real *) a, (real *) b, bufred_d, *n);
544  HIP_CHECK(hipGetLastError());
545  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
546  1, 1024, 0, stream, bufred_d, nb);
547  HIP_CHECK(hipGetLastError());
548 
549 #ifdef HAVE_DEVICE_MPI
550  hipStreamSynchronize(stream);
552 #else
553  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
554  hipMemcpyDeviceToHost, stream));
555  hipStreamSynchronize(stream);
556 #endif
557  return bufred[0];
558  }
559 
564  real hip_glsum(void *a, int *n) {
565  const dim3 nthrds(1024, 1, 1);
566  const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
567  const int nb = ((*n) + 1024 - 1)/ 1024;
568  const hipStream_t stream = (hipStream_t) glb_cmd_queue;
569 
570  if ( nb > red_s){
571  red_s = nb;
572  if (bufred != NULL) {
573  HIP_CHECK(hipHostFree(bufred));
574  HIP_CHECK(hipFree(bufred_d));
575  }
576  HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
577  HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
578  }
579 
580  hipLaunchKernelGGL(HIP_KERNEL_NAME(glsum_kernel<real>),
581  nblcks, nthrds, 0, stream,
582  (real *) a, bufred_d, *n);
583  HIP_CHECK(hipGetLastError());
584  hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
585  1, 1024, 0, stream, bufred_d, nb);
586  HIP_CHECK(hipGetLastError());
587 
588 #ifdef HAVE_DEVICE_MPI
589  hipStreamSynchronize(stream);
591 #else
592  HIP_CHECK(hipMemcpyAsync(bufred, bufred_d,sizeof(real),
593  hipMemcpyDeviceToHost, stream));
594  hipStreamSynchronize(stream);
595 #endif
596  return bufred[0];
597  }
598 }
__global__ void const T *__restrict__ x
Definition: cdtp_kernel.h:109
const int j
Definition: cdtp_kernel.h:131
__global__ void const T *__restrict__ u
Definition: conv1_kernel.h:132
__global__ void const T *__restrict__ const T *__restrict__ const T *__restrict__ w
__global__ void const T *__restrict__ const T *__restrict__ v
double real
Definition: device_config.h:12
void * glb_cmd_queue
#define DEVICE_MPI_SUM
Definition: device_mpi_op.h:9
void device_mpi_allreduce(void *buf_d, void *buf, int count, int nbytes, int op)
#define HIP_CHECK(err)
Definition: check.h:8
void hip_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
Definition: math.hip:356
void hip_cmult2(void *a, void *b, real *c, int *n)
Definition: math.hip:95
real hip_glsc3(void *a, void *b, void *c, int *n)
Definition: math.hip:418
void hip_invcol2(void *a, void *b, int *n)
Definition: math.hip:236
void hip_invcol1(void *a, int *n)
Definition: math.hip:221
void hip_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
Definition: math.hip:205
void hip_subcol3(void *a, void *b, void *c, int *n)
Definition: math.hip:281
void hip_col3(void *a, void *b, void *c, int *n)
Definition: math.hip:266
real hip_glsc2(void *a, void *b, int *n)
Definition: math.hip:524
void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m)
Definition: math.hip:57
void hip_copy(void *a, void *b, int *n)
Definition: math.hip:48
void hip_add2(void *a, void *b, int *n)
Definition: math.hip:138
real hip_vlsc3(void *u, void *v, void *w, int *n)
Definition: math.hip:380
real * bufred
Definition: math.hip:373
void hip_addsqr2s2(void *a, void *b, real *c1, int *n)
Definition: math.hip:188
void hip_add2s2(void *a, void *b, real *c1, int *n)
Definition: math.hip:171
void hip_rzero(void *a, int *n)
Definition: math.hip:72
void hip_sub2(void *a, void *b, int *n)
Definition: math.hip:296
void hip_cadd(void *a, real *c, int *n)
Definition: math.hip:109
real hip_glsum(void *a, int *n)
Definition: math.hip:564
int red_s
Definition: math.hip:372
void hip_addcol3(void *a, void *b, void *c, int *n)
Definition: math.hip:326
void hip_cfill(void *a, real *c, int *n)
Definition: math.hip:123
void hip_col2(void *a, void *b, int *n)
Definition: math.hip:251
void hip_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
Definition: math.hip:459
void hip_sub3(void *a, void *b, void *c, int *n)
Definition: math.hip:311
void hip_cmult(void *a, real *c, int *n)
Definition: math.hip:80
void hip_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
Definition: math.hip:508
void hip_addcol4(void *a, void *b, void *c, void *d, int *n)
Definition: math.hip:341
real * bufred_d
Definition: math.hip:374
void hip_add2s1(void *a, void *b, real *c1, int *n)
Definition: math.hip:154