Neko  0.9.99
A portable framework for high-order spectral element flow simulations
mxm_wrapper.F90
Go to the documentation of this file.
1 
2 module mxm_wrapper
3  use num_types
4  use utils, only : neko_error
5 #ifdef HAVE_LIBXSMM
6  use libxsmm
7 #endif
8  implicit none
9  private
10 
11  public :: mxm
12 
13  interface mxm_blas
14  module procedure mxm_blas_sp, mxm_blas_dp, mxm_blas_qp
15  end interface mxm_blas
16 
17  interface mxm_libxsmm
19  end interface mxm_libxsmm
20 
23 
24 contains
25 
28  subroutine mxm(a,n1,b,n2,c,n3)
29  integer, intent(in) :: n1, n2, n3
30  real(kind=rp), intent(in) :: a(n1, n2)
31  real(kind=rp), intent(in) :: b(n2, n3)
32  real(kind=rp), intent(inout) :: c(n1, n3)
33 
34 #ifdef HAVE_LIBXSMM
35  call mxm_libxsmm(a,n1,b,n2,c,n3)
36 #else
37  call mxm_blas(a,n1,b,n2,c,n3)
38 #endif
39 
40  end subroutine mxm
41 
42  subroutine mxm_blas_sp(a,n1,b,n2,c,n3)
43  integer, intent(in) :: n1, n2, n3
44  real(kind=sp), intent(in) :: a(n1, n2)
45  real(kind=sp), intent(in) :: b(n2, n3)
46  real(kind=sp), intent(inout) :: c(n1, n3)
47 
48  call sgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
49 
50  end subroutine mxm_blas_sp
51 
52  subroutine mxm_blas_dp(a,n1,b,n2,c,n3)
53  integer, intent(in) :: n1, n2, n3
54  real(kind=dp), intent(in) :: a(n1, n2)
55  real(kind=dp), intent(in) :: b(n2, n3)
56  real(kind=dp), intent(inout) :: c(n1, n3)
57 
58  call dgemm('N','N',n1,n3,n2,1d0,a,n1,b,n2,0d0,c,n1)
59 
60  end subroutine mxm_blas_dp
61 
62  subroutine mxm_blas_qp(a,n1,b,n2,c,n3)
63  integer, intent(in) :: n1, n2, n3
64  real(kind=qp), intent(in) :: a(n1, n2)
65  real(kind=qp), intent(in) :: b(n2, n3)
66  real(kind=qp), intent(inout) :: c(n1, n3)
67 
68  call neko_error('Not implemented yet!')
69 
70  end subroutine mxm_blas_qp
71 
72  subroutine mxm_libxsmm_sp(a,n1,b,n2,c,n3)
73  integer, intent(in) :: n1, n2, n3
74  real(kind=sp), intent(in) :: a(n1, n2)
75  real(kind=sp), intent(in) :: b(n2, n3)
76  real(kind=sp), intent(inout) :: c(n1, n3)
77 #ifdef HAVE_LIBXSMM
78  type(libxsmm_smmfunction) :: xmm
79 
80  call libxsmm_dispatch(xmm, n1, n3, n2, &
81  alpha=1.0, beta=0.0, prefetch=libxsmm_prefetch)
82  if (libxsmm_available(xmm)) then
83  call libxsmm_smmcall_abc(xmm, a, b, c)
84  return
85  end if
86 #endif
87  end subroutine mxm_libxsmm_sp
88 
89  subroutine mxm_libxsmm_dp(a,n1,b,n2,c,n3)
90  integer, intent(in) :: n1, n2, n3
91  real(kind=dp), intent(in) :: a(n1, n2)
92  real(kind=dp), intent(in) :: b(n2, n3)
93  real(kind=dp), intent(inout) :: c(n1, n3)
94 #ifdef HAVE_LIBXSMM
95  type(libxsmm_dmmfunction) :: xmm
96 
97  call libxsmm_dispatch(xmm, n1, n3, n2, &
98  alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch)
99  if (libxsmm_available(xmm)) then
100  call libxsmm_dmmcall_abc(xmm, a, b, c)
101  return
102  end if
103 #endif
104  end subroutine mxm_libxsmm_dp
105 
106  subroutine mxm_libxsmm_qp(a,n1,b,n2,c,n3)
107  integer, intent(in) :: n1, n2, n3
108  real(kind=qp), intent(in) :: a(n1, n2)
109  real(kind=qp), intent(in) :: b(n2, n3)
110  real(kind=qp), intent(inout) :: c(n1, n3)
111 
112  call neko_error('Not implemented yet!')
113 
114  end subroutine mxm_libxsmm_qp
115 
116 end module mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, private mxm_blas_sp(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.F90:43
subroutine, private mxm_libxsmm_dp(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.F90:90
subroutine, private mxm_blas_dp(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.F90:53
subroutine, private mxm_libxsmm_sp(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.F90:73
subroutine, private mxm_blas_qp(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.F90:63
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
subroutine, private mxm_libxsmm_qp(a, n1, b, n2, c, n3)
integer, parameter, public qp
Definition: num_types.f90:10
integer, parameter, public dp
Definition: num_types.f90:9
integer, parameter, public sp
Definition: num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Utilities.
Definition: utils.f90:35