37 use,
intrinsic :: iso_c_binding
44 bind(c, name=
'hip_copy')
45 use,
intrinsic :: iso_c_binding
46 type(c_ptr),
value :: a_d, b_d
53 bind(c, name=
'hip_masked_copy')
54 use,
intrinsic :: iso_c_binding
55 type(c_ptr),
value :: a_d, b_d, mask_d
56 integer(c_int) :: n, m
62 bind(c, name=
'hip_cfill_mask')
63 use,
intrinsic :: iso_c_binding
65 type(c_ptr),
value :: a_d
67 integer(c_int) :: size
68 type(c_ptr),
value :: mask_d
69 integer(c_int) :: mask_size
75 bind(c, name=
'hip_cmult')
76 use,
intrinsic :: iso_c_binding
78 type(c_ptr),
value :: a_d
86 bind(c, name=
'hip_cmult2')
87 use,
intrinsic :: iso_c_binding
89 type(c_ptr),
value :: a_d, b_d
97 bind(c, name=
'hip_cadd')
98 use,
intrinsic :: iso_c_binding
100 type(c_ptr),
value :: a_d
108 bind(c, name=
'hip_cadd2')
109 use,
intrinsic :: iso_c_binding
111 type(c_ptr),
value :: a_d
112 type(c_ptr),
value :: b_d
120 bind(c, name=
'hip_cfill')
121 use,
intrinsic :: iso_c_binding
123 type(c_ptr),
value :: a_d
131 bind(c, name=
'hip_rzero')
132 use,
intrinsic :: iso_c_binding
133 type(c_ptr),
value :: a_d
140 bind(c, name=
'hip_add2')
141 use,
intrinsic :: iso_c_binding
144 type(c_ptr),
value :: a_d, b_d
151 bind(c, name=
'hip_add2s1')
152 use,
intrinsic :: iso_c_binding
155 type(c_ptr),
value :: a_d, b_d
163 bind(c, name=
'hip_add2s2')
164 use,
intrinsic :: iso_c_binding
167 type(c_ptr),
value :: a_d, b_d
175 bind(c, name=
'hip_add2s2_many')
176 use,
intrinsic :: iso_c_binding
179 type(c_ptr),
value :: y_d, x_d_d, a_d
180 integer(c_int) :: j, n
186 bind(c, name=
'hip_addsqr2s2')
187 use,
intrinsic :: iso_c_binding
190 type(c_ptr),
value :: a_d, b_d
198 bind(c, name=
'hip_add3s2')
199 use,
intrinsic :: iso_c_binding
202 type(c_ptr),
value :: a_d, b_d, c_d
210 bind(c, name=
'hip_invcol1')
211 use,
intrinsic :: iso_c_binding
213 type(c_ptr),
value :: a_d
220 bind(c, name=
'hip_invcol2')
221 use,
intrinsic :: iso_c_binding
223 type(c_ptr),
value :: a_d, b_d
230 bind(c, name=
'hip_col2')
231 use,
intrinsic :: iso_c_binding
233 type(c_ptr),
value :: a_d, b_d
240 bind(c, name=
'hip_col3')
241 use,
intrinsic :: iso_c_binding
243 type(c_ptr),
value :: a_d, b_d, c_d
250 bind(c, name=
'hip_subcol3')
251 use,
intrinsic :: iso_c_binding
253 type(c_ptr),
value :: a_d, b_d, c_d
260 bind(c, name=
'hip_sub2')
261 use,
intrinsic :: iso_c_binding
263 type(c_ptr),
value :: a_d, b_d
270 bind(c, name=
'hip_sub3')
271 use,
intrinsic :: iso_c_binding
273 type(c_ptr),
value :: a_d, b_d, c_d
280 bind(c, name=
'hip_add3')
281 use,
intrinsic :: iso_c_binding
283 type(c_ptr),
value :: a_d, b_d, c_d
290 bind(c, name=
'hip_addcol3')
291 use,
intrinsic :: iso_c_binding
293 type(c_ptr),
value :: a_d, b_d, c_d
300 bind(c, name=
'hip_addcol4')
301 use,
intrinsic :: iso_c_binding
303 type(c_ptr),
value :: a_d, b_d, c_d, d_d
309 subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
310 bind(c, name=
'hip_vdot3')
311 use,
intrinsic :: iso_c_binding
313 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
320 bind(c, name=
'hip_vlsc3')
321 use,
intrinsic :: iso_c_binding
324 type(c_ptr),
value :: u_d, v_d, w_d
331 bind(c, name=
'hip_glsc3')
332 use,
intrinsic :: iso_c_binding
335 type(c_ptr),
value :: a_d, b_d, c_d
342 bind(c, name=
'hip_glsc3_many')
343 use,
intrinsic :: iso_c_binding
346 type(c_ptr),
value :: w_d, v_d_d, mult_d
347 integer(c_int) :: j, n
354 bind(c, name=
'hip_glsc2')
355 use,
intrinsic :: iso_c_binding
358 type(c_ptr),
value :: a_d, b_d
365 bind(c, name=
'hip_glsum')
366 use,
intrinsic :: iso_c_binding
369 type(c_ptr),
value :: a_d
376 bind(c, name=
'cuda_copy')
377 use,
intrinsic :: iso_c_binding
378 type(c_ptr),
value :: a_d, b_d
385 bind(c, name=
'cuda_masked_copy')
386 use,
intrinsic :: iso_c_binding
387 type(c_ptr),
value :: a_d, b_d, mask_d
388 integer(c_int) :: n, m
394 bind(c, name=
'cuda_cfill_mask')
395 use,
intrinsic :: iso_c_binding
397 type(c_ptr),
value :: a_d
399 integer(c_int) :: size
400 type(c_ptr),
value :: mask_d
401 integer(c_int) :: mask_size
407 bind(c, name=
'cuda_cmult')
408 use,
intrinsic :: iso_c_binding
410 type(c_ptr),
value :: a_d
418 bind(c, name=
'cuda_cmult2')
419 use,
intrinsic :: iso_c_binding
421 type(c_ptr),
value :: a_d, b_d
429 bind(c, name=
'cuda_cadd')
430 use,
intrinsic :: iso_c_binding
432 type(c_ptr),
value :: a_d
440 bind(c, name=
'cuda_cadd2')
441 use,
intrinsic :: iso_c_binding
443 type(c_ptr),
value :: a_d
444 type(c_ptr),
value :: b_d
452 bind(c, name=
'cuda_cfill')
453 use,
intrinsic :: iso_c_binding
455 type(c_ptr),
value :: a_d
463 bind(c, name=
'cuda_rzero')
464 use,
intrinsic :: iso_c_binding
465 type(c_ptr),
value :: a_d
472 bind(c, name=
'cuda_add2')
473 use,
intrinsic :: iso_c_binding
476 type(c_ptr),
value :: a_d, b_d
483 bind(c, name=
'cuda_add2s1')
484 use,
intrinsic :: iso_c_binding
487 type(c_ptr),
value :: a_d, b_d
495 bind(c, name=
'cuda_add2s2')
496 use,
intrinsic :: iso_c_binding
499 type(c_ptr),
value :: a_d, b_d
507 bind(c, name=
'cuda_addsqr2s2')
508 use,
intrinsic :: iso_c_binding
511 type(c_ptr),
value :: a_d, b_d
519 bind(c, name=
'cuda_add3s2')
520 use,
intrinsic :: iso_c_binding
523 type(c_ptr),
value :: a_d, b_d, c_d
531 bind(c, name=
'cuda_invcol1')
532 use,
intrinsic :: iso_c_binding
534 type(c_ptr),
value :: a_d
541 bind(c, name=
'cuda_invcol2')
542 use,
intrinsic :: iso_c_binding
544 type(c_ptr),
value :: a_d, b_d
551 bind(c, name=
'cuda_col2')
552 use,
intrinsic :: iso_c_binding
554 type(c_ptr),
value :: a_d, b_d
561 bind(c, name=
'cuda_col3')
562 use,
intrinsic :: iso_c_binding
564 type(c_ptr),
value :: a_d, b_d, c_d
571 bind(c, name=
'cuda_subcol3')
572 use,
intrinsic :: iso_c_binding
574 type(c_ptr),
value :: a_d, b_d, c_d
581 bind(c, name=
'cuda_sub2')
582 use,
intrinsic :: iso_c_binding
584 type(c_ptr),
value :: a_d, b_d
591 bind(c, name=
'cuda_sub3')
592 use,
intrinsic :: iso_c_binding
594 type(c_ptr),
value :: a_d, b_d, c_d
601 bind(c, name=
'cuda_add3')
602 use,
intrinsic :: iso_c_binding
604 type(c_ptr),
value :: a_d, b_d, c_d
611 bind(c, name=
'cuda_addcol3')
612 use,
intrinsic :: iso_c_binding
614 type(c_ptr),
value :: a_d, b_d, c_d
621 bind(c, name=
'cuda_addcol4')
622 use,
intrinsic :: iso_c_binding
624 type(c_ptr),
value :: a_d, b_d, c_d, d_d
630 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
631 bind(c, name=
'cuda_vdot3')
632 use,
intrinsic :: iso_c_binding
634 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
640 real(c_rp) function
cuda_vlsc3(u_d, v_d, w_d, n) &
641 bind(c, name=
'cuda_vlsc3')
642 use,
intrinsic :: iso_c_binding
645 type(c_ptr),
value :: u_d, v_d, w_d
652 bind(c, name=
'cuda_add2s2_many')
653 use,
intrinsic :: iso_c_binding
656 type(c_ptr),
value :: y_d, x_d_d, a_d
657 integer(c_int) :: j, n
662 real(c_rp) function
cuda_glsc3(a_d, b_d, c_d, n) &
663 bind(c, name=
'cuda_glsc3')
664 use,
intrinsic :: iso_c_binding
667 type(c_ptr),
value :: a_d, b_d, c_d
674 bind(c, name=
'cuda_glsc3_many')
675 use,
intrinsic :: iso_c_binding
678 type(c_ptr),
value :: w_d, v_d_d, mult_d
679 integer(c_int) :: j, n
686 bind(c, name=
'cuda_glsc2')
687 use,
intrinsic :: iso_c_binding
690 type(c_ptr),
value :: a_d, b_d
697 bind(c, name=
'cuda_glsum')
698 use,
intrinsic :: iso_c_binding
701 type(c_ptr),
value :: a_d
708 bind(c, name=
'opencl_copy')
709 use,
intrinsic :: iso_c_binding
710 type(c_ptr),
value :: a_d, b_d
717 bind(c, name=
'opencl_masked_copy')
718 use,
intrinsic :: iso_c_binding
719 type(c_ptr),
value :: a_d, b_d, mask_d
720 integer(c_int) :: n, m
726 bind(c, name=
'opencl_cfill_mask')
727 use,
intrinsic :: iso_c_binding
729 type(c_ptr),
value :: a_d
731 integer(c_int) :: size
732 type(c_ptr),
value :: mask_d
733 integer(c_int) :: mask_size
739 bind(c, name=
'opencl_cmult')
740 use,
intrinsic :: iso_c_binding
742 type(c_ptr),
value :: a_d
750 bind(c, name=
'opencl_cmult2')
751 use,
intrinsic :: iso_c_binding
753 type(c_ptr),
value :: a_d, b_d
761 bind(c, name=
'opencl_cadd')
762 use,
intrinsic :: iso_c_binding
764 type(c_ptr),
value :: a_d
772 bind(c, name=
'opencl_cadd2')
773 use,
intrinsic :: iso_c_binding
775 type(c_ptr),
value :: a_d
776 type(c_ptr),
value :: b_d
784 bind(c, name=
'opencl_cfill')
785 use,
intrinsic :: iso_c_binding
787 type(c_ptr),
value :: a_d
795 bind(c, name=
'opencl_rzero')
796 use,
intrinsic :: iso_c_binding
797 type(c_ptr),
value :: a_d
804 bind(c, name=
'opencl_rone')
805 use,
intrinsic :: iso_c_binding
806 type(c_ptr),
value :: a_d
813 bind(c, name=
'opencl_add2')
814 use,
intrinsic :: iso_c_binding
816 type(c_ptr),
value :: a_d, b_d
823 bind(c, name=
'opencl_add2s1')
824 use,
intrinsic :: iso_c_binding
827 type(c_ptr),
value :: a_d, b_d
835 bind(c, name=
'opencl_add2s2')
836 use,
intrinsic :: iso_c_binding
839 type(c_ptr),
value :: a_d, b_d
847 bind(c, name=
'opencl_add2s2_many')
848 use,
intrinsic :: iso_c_binding
851 type(c_ptr),
value :: y_d, x_d_d, a_d
852 integer(c_int) :: j, n
858 bind(c, name=
'opencl_addsqr2s2')
859 use,
intrinsic :: iso_c_binding
862 type(c_ptr),
value :: a_d, b_d
870 bind(c, name=
'opencl_add3s2')
871 use,
intrinsic :: iso_c_binding
874 type(c_ptr),
value :: a_d, b_d, c_d
882 bind(c, name=
'opencl_invcol1')
883 use,
intrinsic :: iso_c_binding
885 type(c_ptr),
value :: a_d
892 bind(c, name=
'opencl_invcol2')
893 use,
intrinsic :: iso_c_binding
895 type(c_ptr),
value :: a_d, b_d
902 bind(c, name=
'opencl_col2')
903 use,
intrinsic :: iso_c_binding
905 type(c_ptr),
value :: a_d, b_d
912 bind(c, name=
'opencl_col3')
913 use,
intrinsic :: iso_c_binding
915 type(c_ptr),
value :: a_d, b_d, c_d
922 bind(c, name=
'opencl_subcol3')
923 use,
intrinsic :: iso_c_binding
925 type(c_ptr),
value :: a_d, b_d, c_d
932 bind(c, name=
'opencl_sub2')
933 use,
intrinsic :: iso_c_binding
935 type(c_ptr),
value :: a_d, b_d
942 bind(c, name=
'opencl_sub3')
943 use,
intrinsic :: iso_c_binding
945 type(c_ptr),
value :: a_d, b_d, c_d
952 bind(c, name=
'opencl_add3')
953 use,
intrinsic :: iso_c_binding
955 type(c_ptr),
value :: a_d, b_d, c_d
962 bind(c, name=
'opencl_addcol3')
963 use,
intrinsic :: iso_c_binding
965 type(c_ptr),
value :: a_d, b_d, c_d
972 bind(c, name=
'opencl_addcol4')
973 use,
intrinsic :: iso_c_binding
975 type(c_ptr),
value :: a_d, b_d, c_d, d_d
981 subroutine opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
982 bind(c, name=
'opencl_vdot3')
983 use,
intrinsic :: iso_c_binding
985 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
992 bind(c, name=
'opencl_glsc3')
993 use,
intrinsic :: iso_c_binding
996 type(c_ptr),
value :: a_d, b_d, c_d
1003 bind(c, name=
'opencl_glsc3_many')
1004 use,
intrinsic :: iso_c_binding
1007 integer(c_int) :: j, n
1008 type(c_ptr),
value :: w_d, v_d_d, mult_d
1015 bind(c, name=
'opencl_glsc2')
1016 use,
intrinsic :: iso_c_binding
1019 type(c_ptr),
value :: a_d, b_d
1026 bind(c, name=
'opencl_glsum')
1027 use,
intrinsic :: iso_c_binding
1030 type(c_ptr),
value :: a_d
1048 type(c_ptr) :: a_d, b_d
1057 call neko_error(
'no device backend configured')
1063 type(c_ptr) :: a_d, b_d, mask_d
1072 call neko_error(
'no device backend configured')
1080 real(kind=rp),
intent(in) :: c
1082 type(c_ptr) :: mask_d
1083 integer :: mask_size
1091 call neko_error(
'No device backend configured')
1106 call neko_error(
'No device backend configured')
1114 real(kind=rp) :: one = 1.0_rp
1115 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
1120 call neko_error(
'No device backend configured')
1127 real(kind=rp),
intent(in) :: c
1136 call neko_error(
'No device backend configured')
1142 type(c_ptr) :: a_d, b_d
1143 real(kind=rp),
intent(in) :: c
1152 call neko_error(
'No device backend configured')
1159 real(kind=rp),
intent(in) :: c
1168 call neko_error(
'No device backend configured')
1176 real(kind=rp),
intent(in) :: c
1185 call neko_error(
'No device backend configured')
1192 real(kind=rp),
intent(in) :: c
1201 call neko_error(
'No device backend configured')
1207 type(c_ptr) :: a_d, b_d
1216 call neko_error(
'No device backend configured')
1221 type(c_ptr) :: a_d, b_d
1231 call neko_error(
'No device backend configured')
1238 type(c_ptr) :: a_d, b_d
1248 call neko_error(
'No device backend configured')
1254 type(c_ptr) :: a_d, b_d
1264 call neko_error(
'No device backend configured')
1270 type(c_ptr) :: a_d, b_d, c_d
1279 call neko_error(
'No device backend configured')
1285 type(c_ptr) :: a_d, b_d, c_d
1286 real(kind=rp) :: c1, c2
1295 call neko_error(
'No device backend configured')
1310 call neko_error(
'No device backend configured')
1316 type(c_ptr) :: a_d, b_d
1325 call neko_error(
'No device backend configured')
1331 type(c_ptr) :: a_d, b_d
1340 call neko_error(
'No device backend configured')
1346 type(c_ptr) :: a_d, b_d, c_d
1355 call neko_error(
'No device backend configured')
1361 type(c_ptr) :: a_d, b_d, c_d
1370 call neko_error(
'No device backend configured')
1376 type(c_ptr) :: a_d, b_d
1385 call neko_error(
'No device backend configured')
1391 type(c_ptr) :: a_d, b_d, c_d
1400 call neko_error(
'No device backend configured')
1406 type(c_ptr) :: a_d, b_d, c_d
1415 call neko_error(
'No device backend configured')
1421 type(c_ptr) :: a_d, b_d, c_d, d_d
1430 call neko_error(
'No device backend configured')
1437 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1440 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1442 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1444 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1446 call neko_error(
'No device backend configured')
1452 type(c_ptr) :: u_d, v_d, w_d
1454 real(kind=rp) :: res
1464 call neko_error(
'No device backend configured')
1470 type(c_ptr) :: a_d, b_d, c_d
1472 real(kind=rp) :: res
1480 call neko_error(
'No device backend configured')
1483 #ifndef HAVE_DEVICE_MPI
1484 if (pe_size .gt. 1)
then
1485 call mpi_allreduce(mpi_in_place, res, 1, &
1486 mpi_real_precision, mpi_sum, neko_comm, ierr)
1492 type(c_ptr),
value :: w_d, v_d_d, mult_d
1493 integer(c_int) :: j, n
1503 call neko_error(
'No device backend configured')
1506 #ifndef HAVE_DEVICE_MPI
1507 if (pe_size .gt. 1)
then
1508 call mpi_allreduce(mpi_in_place, h, j, &
1509 mpi_real_precision, mpi_sum, neko_comm, ierr)
1515 type(c_ptr),
value :: y_d, x_d_d, a_d
1516 integer(c_int) :: j, n
1524 call neko_error(
'No device backend configured')
1530 type(c_ptr) :: a_d, b_d
1532 real(kind=rp) :: res
1540 call neko_error(
'No device backend configured')
1543 #ifndef HAVE_DEVICE_MPI
1544 if (pe_size .gt. 1)
then
1545 call mpi_allreduce(mpi_in_place, res, 1, &
1546 mpi_real_precision, mpi_sum, neko_comm, ierr)
1555 real(kind=rp) :: res
1563 call neko_error(
'No device backend configured')
1566 #ifndef HAVE_DEVICE_MPI
1567 if (pe_size .gt. 1)
then
1568 call mpi_allreduce(mpi_in_place, res, 1, &
1569 mpi_real_precision, mpi_sum, neko_comm, ierr)
void opencl_add3(void *a, void *b, void *c, int *n)
void opencl_addcol3(void *a, void *b, void *c, int *n)
void opencl_invcol1(void *a, int *n)
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n)
void opencl_cmult(void *a, real *c, int *n)
void opencl_sub3(void *a, void *b, void *c, int *n)
void opencl_rone(void *a, int *n)
void opencl_cadd(void *a, real *c, int *n)
void opencl_cmult2(void *a, void *b, real *c, int *n)
real opencl_glsc3(void *a, void *b, void *c, int *n)
void opencl_add2s2(void *a, void *b, real *c1, int *n)
void opencl_rzero(void *a, int *n)
void opencl_sub2(void *a, void *b, int *n)
void opencl_col2(void *a, void *b, int *n)
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n)
void opencl_col3(void *a, void *b, void *c, int *n)
void opencl_subcol3(void *a, void *b, void *c, int *n)
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n)
void opencl_invcol2(void *a, void *b, int *n)
void opencl_cadd2(void *a, void *b, real *c, int *n)
void opencl_add2(void *a, void *b, int *n)
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m)
void opencl_cfill_mask(void *a, void *c, int *size, void *mask, int *mask_size)
void opencl_cfill(void *a, real *c, int *n)
void opencl_add2s1(void *a, void *b, real *c1, int *n)
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
real opencl_glsc2(void *a, void *b, int *n)
real opencl_glsum(void *a, int *n)
void opencl_copy(void *a, void *b, int *n)
void cuda_invcol1(void *a, int *n)
void cuda_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
void cuda_cadd2(void *a, void *b, real *c, int *n)
real cuda_vlsc3(void *u, void *v, void *w, int *n)
void cuda_add2s2(void *a, void *b, real *c1, int *n)
void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m)
void cuda_add3(void *a, void *b, void *c, int *n)
void cuda_col2(void *a, void *b, int *n)
void cuda_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
void cuda_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
void cuda_addcol3(void *a, void *b, void *c, int *n)
void cuda_subcol3(void *a, void *b, void *c, int *n)
void cuda_cmult(void *a, real *c, int *n)
void cuda_addsqr2s2(void *a, void *b, real *c1, int *n)
void cuda_add2s1(void *a, void *b, real *c1, int *n)
real cuda_glsum(void *a, int *n)
real cuda_glsc2(void *a, void *b, int *n)
void cuda_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
void cuda_rzero(void *a, int *n)
void cuda_addcol4(void *a, void *b, void *c, void *d, int *n)
void cuda_add2(void *a, void *b, int *n)
void cuda_copy(void *a, void *b, int *n)
void cuda_cfill_mask(void *a, real *c, int *size, int *mask, int *mask_size)
void cuda_invcol2(void *a, void *b, int *n)
void cuda_col3(void *a, void *b, void *c, int *n)
void cuda_cfill(void *a, real *c, int *n)
void cuda_cadd(void *a, real *c, int *n)
void cuda_sub2(void *a, void *b, int *n)
real cuda_glsc3(void *a, void *b, void *c, int *n)
void cuda_cmult2(void *a, void *b, real *c, int *n)
void cuda_sub3(void *a, void *b, void *c, int *n)
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.