37 use,
intrinsic :: iso_c_binding
44 bind(c, name=
'hip_copy')
45 use,
intrinsic :: iso_c_binding
46 type(c_ptr),
value :: a_d, b_d
53 bind(c, name=
'hip_masked_copy')
54 use,
intrinsic :: iso_c_binding
55 type(c_ptr),
value :: a_d, b_d, mask_d
56 integer(c_int) :: n, m
62 bind(c, name=
'hip_cmult')
63 use,
intrinsic :: iso_c_binding
65 type(c_ptr),
value :: a_d
73 bind(c, name=
'hip_cmult2')
74 use,
intrinsic :: iso_c_binding
76 type(c_ptr),
value :: a_d, b_d
84 bind(c, name=
'hip_cadd')
85 use,
intrinsic :: iso_c_binding
87 type(c_ptr),
value :: a_d
95 bind(c, name=
'hip_cfill')
96 use,
intrinsic :: iso_c_binding
98 type(c_ptr),
value :: a_d
106 bind(c, name=
'hip_rzero')
107 use,
intrinsic :: iso_c_binding
108 type(c_ptr),
value :: a_d
115 bind(c, name=
'hip_add2')
116 use,
intrinsic :: iso_c_binding
119 type(c_ptr),
value :: a_d, b_d
126 bind(c, name=
'hip_add2s1')
127 use,
intrinsic :: iso_c_binding
130 type(c_ptr),
value :: a_d, b_d
138 bind(c, name=
'hip_add2s2')
139 use,
intrinsic :: iso_c_binding
142 type(c_ptr),
value :: a_d, b_d
150 bind(c, name=
'hip_add2s2_many')
151 use,
intrinsic :: iso_c_binding
154 type(c_ptr),
value :: y_d, x_d_d, a_d
155 integer(c_int) :: j, n
161 bind(c, name=
'hip_addsqr2s2')
162 use,
intrinsic :: iso_c_binding
165 type(c_ptr),
value :: a_d, b_d
173 bind(c, name=
'hip_add3s2')
174 use,
intrinsic :: iso_c_binding
177 type(c_ptr),
value :: a_d, b_d, c_d
185 bind(c, name=
'hip_invcol1')
186 use,
intrinsic :: iso_c_binding
188 type(c_ptr),
value :: a_d
195 bind(c, name=
'hip_invcol2')
196 use,
intrinsic :: iso_c_binding
198 type(c_ptr),
value :: a_d, b_d
205 bind(c, name=
'hip_col2')
206 use,
intrinsic :: iso_c_binding
208 type(c_ptr),
value :: a_d, b_d
215 bind(c, name=
'hip_col3')
216 use,
intrinsic :: iso_c_binding
218 type(c_ptr),
value :: a_d, b_d, c_d
225 bind(c, name=
'hip_subcol3')
226 use,
intrinsic :: iso_c_binding
228 type(c_ptr),
value :: a_d, b_d, c_d
235 bind(c, name=
'hip_sub2')
236 use,
intrinsic :: iso_c_binding
238 type(c_ptr),
value :: a_d, b_d
245 bind(c, name=
'hip_sub3')
246 use,
intrinsic :: iso_c_binding
248 type(c_ptr),
value :: a_d, b_d, c_d
255 bind(c, name=
'hip_addcol3')
256 use,
intrinsic :: iso_c_binding
258 type(c_ptr),
value :: a_d, b_d, c_d
265 bind(c, name=
'hip_addcol4')
266 use,
intrinsic :: iso_c_binding
268 type(c_ptr),
value :: a_d, b_d, c_d, d_d
274 subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
275 bind(c, name=
'hip_vdot3')
276 use,
intrinsic :: iso_c_binding
278 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
285 bind(c, name=
'hip_vlsc3')
286 use,
intrinsic :: iso_c_binding
289 type(c_ptr),
value :: u_d, v_d, w_d
296 bind(c, name=
'hip_glsc3')
297 use,
intrinsic :: iso_c_binding
300 type(c_ptr),
value :: a_d, b_d, c_d
307 bind(c, name=
'hip_glsc3_many')
308 use,
intrinsic :: iso_c_binding
311 type(c_ptr),
value :: w_d, v_d_d, mult_d
312 integer(c_int) :: j, n
319 bind(c, name=
'hip_glsc2')
320 use,
intrinsic :: iso_c_binding
323 type(c_ptr),
value :: a_d, b_d
330 bind(c, name=
'hip_glsum')
331 use,
intrinsic :: iso_c_binding
334 type(c_ptr),
value :: a_d
341 bind(c, name=
'cuda_copy')
342 use,
intrinsic :: iso_c_binding
343 type(c_ptr),
value :: a_d, b_d
349 bind(c, name=
'cuda_masked_copy')
350 use,
intrinsic :: iso_c_binding
351 type(c_ptr),
value :: a_d, b_d, mask_d
352 integer(c_int) :: n, m
357 bind(c, name=
'cuda_cmult')
358 use,
intrinsic :: iso_c_binding
360 type(c_ptr),
value :: a_d
368 bind(c, name=
'cuda_cmult2')
369 use,
intrinsic :: iso_c_binding
371 type(c_ptr),
value :: a_d, b_d
380 bind(c, name=
'cuda_cadd')
381 use,
intrinsic :: iso_c_binding
383 type(c_ptr),
value :: a_d
391 bind(c, name=
'cuda_cfill')
392 use,
intrinsic :: iso_c_binding
394 type(c_ptr),
value :: a_d
402 bind(c, name=
'cuda_rzero')
403 use,
intrinsic :: iso_c_binding
404 type(c_ptr),
value :: a_d
411 bind(c, name=
'cuda_add2')
412 use,
intrinsic :: iso_c_binding
415 type(c_ptr),
value :: a_d, b_d
422 bind(c, name=
'cuda_add2s1')
423 use,
intrinsic :: iso_c_binding
426 type(c_ptr),
value :: a_d, b_d
434 bind(c, name=
'cuda_add2s2')
435 use,
intrinsic :: iso_c_binding
438 type(c_ptr),
value :: a_d, b_d
446 bind(c, name=
'cuda_addsqr2s2')
447 use,
intrinsic :: iso_c_binding
450 type(c_ptr),
value :: a_d, b_d
458 bind(c, name=
'cuda_add3s2')
459 use,
intrinsic :: iso_c_binding
462 type(c_ptr),
value :: a_d, b_d, c_d
470 bind(c, name=
'cuda_invcol1')
471 use,
intrinsic :: iso_c_binding
473 type(c_ptr),
value :: a_d
480 bind(c, name=
'cuda_invcol2')
481 use,
intrinsic :: iso_c_binding
483 type(c_ptr),
value :: a_d, b_d
490 bind(c, name=
'cuda_col2')
491 use,
intrinsic :: iso_c_binding
493 type(c_ptr),
value :: a_d, b_d
500 bind(c, name=
'cuda_col3')
501 use,
intrinsic :: iso_c_binding
503 type(c_ptr),
value :: a_d, b_d, c_d
510 bind(c, name=
'cuda_subcol3')
511 use,
intrinsic :: iso_c_binding
513 type(c_ptr),
value :: a_d, b_d, c_d
520 bind(c, name=
'cuda_sub2')
521 use,
intrinsic :: iso_c_binding
523 type(c_ptr),
value :: a_d, b_d
530 bind(c, name=
'cuda_sub3')
531 use,
intrinsic :: iso_c_binding
533 type(c_ptr),
value :: a_d, b_d, c_d
540 bind(c, name=
'cuda_addcol3')
541 use,
intrinsic :: iso_c_binding
543 type(c_ptr),
value :: a_d, b_d, c_d
550 bind(c, name=
'cuda_addcol4')
551 use,
intrinsic :: iso_c_binding
553 type(c_ptr),
value :: a_d, b_d, c_d, d_d
559 subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
560 bind(c, name=
'cuda_vdot3')
561 use,
intrinsic :: iso_c_binding
563 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
569 real(c_rp) function
cuda_vlsc3(u_d, v_d, w_d, n) &
570 bind(c, name=
'cuda_vlsc3')
571 use,
intrinsic :: iso_c_binding
574 type(c_ptr),
value :: u_d, v_d, w_d
581 bind(c, name=
'cuda_add2s2_many')
582 use,
intrinsic :: iso_c_binding
585 type(c_ptr),
value :: y_d, x_d_d, a_d
586 integer(c_int) :: j, n
591 real(c_rp) function
cuda_glsc3(a_d, b_d, c_d, n) &
592 bind(c, name=
'cuda_glsc3')
593 use,
intrinsic :: iso_c_binding
596 type(c_ptr),
value :: a_d, b_d, c_d
602 bind(c, name=
'cuda_glsc3_many')
603 use,
intrinsic :: iso_c_binding
606 type(c_ptr),
value :: w_d, v_d_d, mult_d
607 integer(c_int) :: j, n
614 bind(c, name=
'cuda_glsc2')
615 use,
intrinsic :: iso_c_binding
618 type(c_ptr),
value :: a_d, b_d
625 bind(c, name=
'cuda_glsum')
626 use,
intrinsic :: iso_c_binding
629 type(c_ptr),
value :: a_d
636 bind(c, name=
'opencl_copy')
637 use,
intrinsic :: iso_c_binding
638 type(c_ptr),
value :: a_d, b_d
645 bind(c, name=
'opencl_masked_copy')
646 use,
intrinsic :: iso_c_binding
647 type(c_ptr),
value :: a_d, b_d, mask_d
648 integer(c_int) :: n, m
654 bind(c, name=
'opencl_cmult')
655 use,
intrinsic :: iso_c_binding
657 type(c_ptr),
value :: a_d
665 bind(c, name=
'opencl_cmult2')
666 use,
intrinsic :: iso_c_binding
668 type(c_ptr),
value :: a_d, b_d
675 bind(c, name=
'opencl_cadd')
676 use,
intrinsic :: iso_c_binding
678 type(c_ptr),
value :: a_d
686 bind(c, name=
'opencl_cfill')
687 use,
intrinsic :: iso_c_binding
689 type(c_ptr),
value :: a_d
697 bind(c, name=
'opencl_rzero')
698 use,
intrinsic :: iso_c_binding
699 type(c_ptr),
value :: a_d
706 bind(c, name=
'opencl_rone')
707 use,
intrinsic :: iso_c_binding
708 type(c_ptr),
value :: a_d
715 bind(c, name=
'opencl_add2')
716 use,
intrinsic :: iso_c_binding
718 type(c_ptr),
value :: a_d, b_d
725 bind(c, name=
'opencl_add2s1')
726 use,
intrinsic :: iso_c_binding
729 type(c_ptr),
value :: a_d, b_d
737 bind(c, name=
'opencl_add2s2')
738 use,
intrinsic :: iso_c_binding
741 type(c_ptr),
value :: a_d, b_d
749 bind(c, name=
'opencl_add2s2_many')
750 use,
intrinsic :: iso_c_binding
753 type(c_ptr),
value :: y_d, x_d_d, a_d
754 integer(c_int) :: j, n
760 bind(c, name=
'opencl_addsqr2s2')
761 use,
intrinsic :: iso_c_binding
764 type(c_ptr),
value :: a_d, b_d
772 bind(c, name=
'opencl_add3s2')
773 use,
intrinsic :: iso_c_binding
776 type(c_ptr),
value :: a_d, b_d, c_d
784 bind(c, name=
'opencl_invcol1')
785 use,
intrinsic :: iso_c_binding
787 type(c_ptr),
value :: a_d
794 bind(c, name=
'opencl_invcol2')
795 use,
intrinsic :: iso_c_binding
797 type(c_ptr),
value :: a_d, b_d
804 bind(c, name=
'opencl_col2')
805 use,
intrinsic :: iso_c_binding
807 type(c_ptr),
value :: a_d, b_d
814 bind(c, name=
'opencl_col3')
815 use,
intrinsic :: iso_c_binding
817 type(c_ptr),
value :: a_d, b_d, c_d
824 bind(c, name=
'opencl_subcol3')
825 use,
intrinsic :: iso_c_binding
827 type(c_ptr),
value :: a_d, b_d, c_d
834 bind(c, name=
'opencl_sub2')
835 use,
intrinsic :: iso_c_binding
837 type(c_ptr),
value :: a_d, b_d
844 bind(c, name=
'opencl_sub3')
845 use,
intrinsic :: iso_c_binding
847 type(c_ptr),
value :: a_d, b_d, c_d
854 bind(c, name=
'opencl_addcol3')
855 use,
intrinsic :: iso_c_binding
857 type(c_ptr),
value :: a_d, b_d, c_d
864 bind(c, name=
'opencl_addcol4')
865 use,
intrinsic :: iso_c_binding
867 type(c_ptr),
value :: a_d, b_d, c_d, d_d
873 subroutine opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
874 bind(c, name=
'opencl_vdot3')
875 use,
intrinsic :: iso_c_binding
877 type(c_ptr),
value :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
884 bind(c, name=
'opencl_glsc3')
885 use,
intrinsic :: iso_c_binding
888 type(c_ptr),
value :: a_d, b_d, c_d
895 bind(c, name=
'opencl_glsc3_many')
896 use,
intrinsic :: iso_c_binding
899 integer(c_int) :: j, n
900 type(c_ptr),
value :: w_d, v_d_d, mult_d
907 bind(c, name=
'opencl_glsc2')
908 use,
intrinsic :: iso_c_binding
911 type(c_ptr),
value :: a_d, b_d
918 bind(c, name=
'opencl_glsum')
919 use,
intrinsic :: iso_c_binding
922 type(c_ptr),
value :: a_d
939 type(c_ptr) :: a_d, b_d
948 call neko_error(
'no device backend configured')
953 type(c_ptr) :: a_d, b_d, mask_d
962 call neko_error(
'no device backend configured')
977 call neko_error(
'No device backend configured')
984 real(kind=rp) :: one = 1.0_rp
985 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
990 call neko_error(
'No device backend configured')
996 real(kind=rp),
intent(in) :: c
1005 call neko_error(
'No device backend configured')
1010 type(c_ptr) :: a_d, b_d
1011 real(kind=rp),
intent(in) :: c
1020 call neko_error(
'No device backend configured')
1027 real(kind=rp),
intent(in) :: c
1036 call neko_error(
'No device backend configured')
1042 real(kind=rp),
intent(in) :: c
1051 call neko_error(
'No device backend configured')
1056 type(c_ptr) :: a_d, b_d
1065 call neko_error(
'No device backend configured')
1070 type(c_ptr) :: a_d, b_d
1080 call neko_error(
'No device backend configured')
1085 type(c_ptr) :: a_d, b_d
1095 call neko_error(
'No device backend configured')
1100 type(c_ptr) :: a_d, b_d
1110 call neko_error(
'No device backend configured')
1115 type(c_ptr) :: a_d, b_d, c_d
1116 real(kind=rp) :: c1, c2
1125 call neko_error(
'No device backend configured')
1139 call neko_error(
'No device backend configured')
1144 type(c_ptr) :: a_d, b_d
1153 call neko_error(
'No device backend configured')
1158 type(c_ptr) :: a_d, b_d
1167 call neko_error(
'No device backend configured')
1172 type(c_ptr) :: a_d, b_d, c_d
1181 call neko_error(
'No device backend configured')
1186 type(c_ptr) :: a_d, b_d, c_d
1195 call neko_error(
'No device backend configured')
1200 type(c_ptr) :: a_d, b_d
1209 call neko_error(
'No device backend configured')
1214 type(c_ptr) :: a_d, b_d, c_d
1223 call neko_error(
'No device backend configured')
1228 type(c_ptr) :: a_d, b_d, c_d
1237 call neko_error(
'No device backend configured')
1242 type(c_ptr) :: a_d, b_d, c_d, d_d
1251 call neko_error(
'No device backend configured')
1256 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1259 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1261 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1263 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
1265 call neko_error(
'No device backend configured')
1270 type(c_ptr) :: u_d, v_d, w_d
1272 real(kind=rp) :: res
1282 call neko_error(
'No device backend configured')
1287 type(c_ptr) :: a_d, b_d, c_d
1289 real(kind=rp) :: res
1297 call neko_error(
'No device backend configured')
1300 #ifndef HAVE_DEVICE_MPI
1301 if (pe_size .gt. 1)
then
1302 call mpi_allreduce(mpi_in_place, res, 1, &
1303 mpi_real_precision, mpi_sum, neko_comm, ierr)
1309 type(c_ptr),
value :: w_d, v_d_d, mult_d
1310 integer(c_int) :: j, n
1320 call neko_error(
'No device backend configured')
1323 #ifndef HAVE_DEVICE_MPI
1324 if (pe_size .gt. 1)
then
1325 call mpi_allreduce(mpi_in_place, h, j, &
1326 mpi_real_precision, mpi_sum, neko_comm, ierr)
1332 type(c_ptr),
value :: y_d, x_d_d, a_d
1333 integer(c_int) :: j, n
1341 call neko_error(
'No device backend configured')
1346 type(c_ptr) :: a_d, b_d
1348 real(kind=rp) :: res
1356 call neko_error(
'No device backend configured')
1359 #ifndef HAVE_DEVICE_MPI
1360 if (pe_size .gt. 1)
then
1361 call mpi_allreduce(mpi_in_place, res, 1, &
1362 mpi_real_precision, mpi_sum, neko_comm, ierr)
1370 real(kind=rp) :: res
1378 call neko_error(
'No device backend configured')
1381 #ifndef HAVE_DEVICE_MPI
1382 if (pe_size .gt. 1)
then
1383 call mpi_allreduce(mpi_in_place, res, 1, &
1384 mpi_real_precision, mpi_sum, neko_comm, ierr)
void opencl_addcol3(void *a, void *b, void *c, int *n)
void opencl_invcol1(void *a, int *n)
void opencl_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
void opencl_addsqr2s2(void *a, void *b, real *c1, int *n)
void opencl_cmult(void *a, real *c, int *n)
void opencl_sub3(void *a, void *b, void *c, int *n)
void opencl_rone(void *a, int *n)
void opencl_cadd(void *a, real *c, int *n)
void opencl_cmult2(void *a, void *b, real *c, int *n)
real opencl_glsc3(void *a, void *b, void *c, int *n)
void opencl_add2s2(void *a, void *b, real *c1, int *n)
void opencl_rzero(void *a, int *n)
void opencl_sub2(void *a, void *b, int *n)
void opencl_col2(void *a, void *b, int *n)
void opencl_addcol4(void *a, void *b, void *c, void *d, int *n)
void opencl_col3(void *a, void *b, void *c, int *n)
void opencl_subcol3(void *a, void *b, void *c, int *n)
void opencl_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
void opencl_add2s2_many(void *x, void *p, void *alpha, int *j, int *n)
void opencl_invcol2(void *a, void *b, int *n)
void opencl_add2(void *a, void *b, int *n)
void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m)
void opencl_cfill(void *a, real *c, int *n)
void opencl_add2s1(void *a, void *b, real *c1, int *n)
void opencl_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
real opencl_glsc2(void *a, void *b, int *n)
real opencl_glsum(void *a, int *n)
void opencl_copy(void *a, void *b, int *n)
void cuda_invcol1(void *a, int *n)
void cuda_add2s2_many(void *x, void **p, void *alpha, int *j, int *n)
real cuda_vlsc3(void *u, void *v, void *w, int *n)
void cuda_add2s2(void *a, void *b, real *c1, int *n)
void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m)
void cuda_col2(void *a, void *b, int *n)
void cuda_glsc3_many(real *h, void *w, void *v, void *mult, int *j, int *n)
void cuda_vdot3(void *dot, void *u1, void *u2, void *u3, void *v1, void *v2, void *v3, int *n)
void cuda_addcol3(void *a, void *b, void *c, int *n)
void cuda_subcol3(void *a, void *b, void *c, int *n)
void cuda_cmult(void *a, real *c, int *n)
void cuda_addsqr2s2(void *a, void *b, real *c1, int *n)
void cuda_add2s1(void *a, void *b, real *c1, int *n)
real cuda_glsum(void *a, int *n)
real cuda_glsc2(void *a, void *b, int *n)
void cuda_add3s2(void *a, void *b, void *c, real *c1, real *c2, int *n)
void cuda_rzero(void *a, int *n)
void cuda_addcol4(void *a, void *b, void *c, void *d, int *n)
void cuda_add2(void *a, void *b, int *n)
void cuda_copy(void *a, void *b, int *n)
void cuda_invcol2(void *a, void *b, int *n)
void cuda_col3(void *a, void *b, void *c, int *n)
void cuda_cfill(void *a, real *c, int *n)
void cuda_cadd(void *a, real *c, int *n)
void cuda_sub2(void *a, void *b, int *n)
real cuda_glsc3(void *a, void *b, void *c, int *n)
void cuda_cmult2(void *a, void *b, real *c, int *n)
void cuda_sub3(void *a, void *b, void *c, int *n)
subroutine, public device_add2(a_d, b_d, n)
subroutine, public device_addcol3(a_d, b_d, c_d, n)
subroutine, public device_col2(a_d, b_d, n)
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
subroutine, public device_rone(a_d, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
subroutine, public device_invcol1(a_d, n)
subroutine, public device_col3(a_d, b_d, c_d, n)
subroutine, public device_cadd(a_d, c, n)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
subroutine, public device_cmult2(a_d, b_d, c, n)
subroutine, public device_cmult(a_d, c, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
subroutine, public device_sub3(a_d, b_d, c_d, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
real(kind=rp) function, public device_glsum(a_d, n)
subroutine, public device_copy(a_d, b_d, n)
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
subroutine, public device_subcol3(a_d, b_d, c_d, n)
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
subroutine, public device_cfill(a_d, c, n)
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
subroutine, public device_invcol2(a_d, b_d, n)
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.