Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
mxm_wrapper.F90
Go to the documentation of this file.
1
3 use num_types, only : rp, sp, dp, qp
4 use utils, only : neko_error
5#ifdef HAVE_LIBXSMM
6 use libxsmm, only : libxsmm_available, libxsmm_dispatch, &
7 libxsmm_dmmcall_abc, libxsmm_dmmfunction, libxsmm_smmcall_abc, &
8 libxsmm_smmfunction, libxsmm_prefetch
9#endif
10 implicit none
11 private
12
13 public :: mxm
14
15 interface mxm_blas
16 module procedure mxm_blas_sp, mxm_blas_dp, mxm_blas_qp
17 end interface mxm_blas
18
21 end interface mxm_libxsmm
22
25
26contains
27
30 subroutine mxm(a, n1, b, n2, c, n3)
31 integer, intent(in) :: n1, n2, n3
32 real(kind=rp), intent(in) :: a(n1, n2)
33 real(kind=rp), intent(in) :: b(n2, n3)
34 real(kind=rp), intent(inout) :: c(n1, n3)
35
36#ifdef HAVE_LIBXSMM
37 call mxm_libxsmm(a, n1, b, n2, c, n3)
38#else
39 call mxm_blas(a, n1, b, n2, c, n3)
40#endif
41
42 end subroutine mxm
43
44 subroutine mxm_blas_sp(a, n1, b, n2, c, n3)
45 integer, intent(in) :: n1, n2, n3
46 real(kind=sp), intent(in) :: a(n1, n2)
47 real(kind=sp), intent(in) :: b(n2, n3)
48 real(kind=sp), intent(inout) :: c(n1, n3)
49
50 call sgemm('N', 'N', n1, n3, n2, 1.0, a, n1, b, n2, 0.0, c, n1)
51
52 end subroutine mxm_blas_sp
53
54 subroutine mxm_blas_dp(a, n1, b, n2, c, n3)
55 integer, intent(in) :: n1, n2, n3
56 real(kind=dp), intent(in) :: a(n1, n2)
57 real(kind=dp), intent(in) :: b(n2, n3)
58 real(kind=dp), intent(inout) :: c(n1, n3)
59
60 call dgemm('N', 'N', n1, n3, n2, 1d0, a, n1, b, n2, 0d0, c, n1)
61
62 end subroutine mxm_blas_dp
63
64 subroutine mxm_blas_qp(a, n1, b, n2, c, n3)
65 integer, intent(in) :: n1, n2, n3
66 real(kind=qp), intent(in) :: a(n1, n2)
67 real(kind=qp), intent(in) :: b(n2, n3)
68 real(kind=qp), intent(inout) :: c(n1, n3)
69
70 call neko_error('Not implemented yet!')
71
72 end subroutine mxm_blas_qp
73
74 subroutine mxm_libxsmm_sp(a, n1, b, n2, c, n3)
75 integer, intent(in) :: n1, n2, n3
76 real(kind=sp), intent(in) :: a(n1, n2)
77 real(kind=sp), intent(in) :: b(n2, n3)
78 real(kind=sp), intent(inout) :: c(n1, n3)
79#ifdef HAVE_LIBXSMM
80 type(libxsmm_smmfunction) :: xmm
81
82 call libxsmm_dispatch(xmm, n1, n3, n2, &
83 alpha = 1.0, beta = 0.0, prefetch = libxsmm_prefetch)
84 if (libxsmm_available(xmm)) then
85 call libxsmm_smmcall_abc(xmm, a, b, c)
86 return
87 end if
88#endif
89 end subroutine mxm_libxsmm_sp
90
91 subroutine mxm_libxsmm_dp(a, n1, b, n2, c, n3)
92 integer, intent(in) :: n1, n2, n3
93 real(kind=dp), intent(in) :: a(n1, n2)
94 real(kind=dp), intent(in) :: b(n2, n3)
95 real(kind=dp), intent(inout) :: c(n1, n3)
96#ifdef HAVE_LIBXSMM
97 type(libxsmm_dmmfunction) :: xmm
98
99 call libxsmm_dispatch(xmm, n1, n3, n2, &
100 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch)
101 if (libxsmm_available(xmm)) then
102 call libxsmm_dmmcall_abc(xmm, a, b, c)
103 return
104 end if
105#endif
106 end subroutine mxm_libxsmm_dp
107
108 subroutine mxm_libxsmm_qp(a, n1, b, n2, c, n3)
109 integer, intent(in) :: n1, n2, n3
110 real(kind=qp), intent(in) :: a(n1, n2)
111 real(kind=qp), intent(in) :: b(n2, n3)
112 real(kind=qp), intent(inout) :: c(n1, n3)
113
114 call neko_error('Not implemented yet!')
115
116 end subroutine mxm_libxsmm_qp
117
118end module mxm_wrapper
Wrapper for all matrix-matrix product implementations.
subroutine, private mxm_blas_sp(a, n1, b, n2, c, n3)
subroutine, private mxm_libxsmm_dp(a, n1, b, n2, c, n3)
subroutine, private mxm_blas_dp(a, n1, b, n2, c, n3)
subroutine, private mxm_libxsmm_sp(a, n1, b, n2, c, n3)
subroutine, private mxm_blas_qp(a, n1, b, n2, c, n3)
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
subroutine, private mxm_libxsmm_qp(a, n1, b, n2, c, n3)
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35