Neko  0.9.99
A portable framework for high-order spectral element flow simulations
tensor.f90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
61 module tensor
62  use tensor_xsmm
63  use tensor_cpu
64  use tensor_sx
65  use tensor_device
66  use num_types, only : rp
67  use mxm_wrapper
68  use neko_config
69  use device
70  use, intrinsic :: iso_c_binding, only : c_ptr
71  implicit none
72  private
73 
74  interface transpose
75  module procedure trsp, trsp1
76  end interface transpose
77 
80  end interface triple_tensor_product
81 
82  public :: tensr3, transpose, trsp, trsp1, &
85 
86 
87 contains
88 
90  subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
91  integer :: nv
92  integer :: nu
93  real(kind=rp), intent(inout) :: v(nv, nv, nv)
94  real(kind=rp), intent(inout) :: u(nu, nu, nu)
95  real(kind=rp), intent(inout) :: w(nu*nu*nv)
96  real(kind=rp), intent(inout) :: a(nv, nu)
97  real(kind=rp), intent(inout) :: bt(nu, nv)
98  real(kind=rp), intent(inout) :: ct(nu, nv)
99  integer :: j, k, l, nunu, nvnv, nunv
100 
101  nunu = nu**2
102  nvnv = nv**2
103  nunv = nu*nv
104 
106 
107  call mxm(a, nv, u, nu, v, nunu)
108  k = 1
109  l = 1
110  do j = 1, nu
111  call mxm(v(k, 1, 1), nv, bt, nu, w(l), nv)
112  k = k + nunv
113  l = l + nvnv
114  end do
115  call mxm(w, nvnv, ct, nu, v, nv)
116 
117  end subroutine tensr3
118 
120  subroutine trsp(a, lda, b, ldb)
121  integer, intent(in) :: lda
122  integer, intent(in) :: ldb
123  real(kind=rp), intent(inout) :: a(lda, ldb)
124  real(kind=rp), intent(in) :: b(ldb, lda)
125  integer :: i, j
126 
127  do j = 1, ldb
128  do i = 1, lda
129  a(i, j) = b(j, i)
130  end do
131  end do
132 
133  end subroutine trsp
134 
136  subroutine trsp1(a, n)
137  integer, intent(in) :: n
138  real(kind=rp), intent(inout) :: a(n, n)
139  real(kind=rp) :: tmp
140  integer :: i, j
141 
142  do j = 1, n
143  do i = j + 1, n
144  tmp = a(i, j)
145  a(i, j) = a(j, i)
146  a(j, i) = tmp
147  end do
148  end do
149 
150  end subroutine trsp1
151 
153  subroutine tnsr2d_el(v, nv, u, nu, A, Bt)
154  integer, intent(in) :: nv, nu
155  real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
156  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
157 
158  if (neko_bcknd_sx .eq. 1) then
159  call tnsr2d_el_sx(v, nv, u, nu, a, bt)
160  else if (neko_bcknd_xsmm .eq. 1) then
161  call tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
162  else
163  call tnsr2d_el_cpu(v, nv, u, nu, a, bt)
164  end if
165 
166  end subroutine tnsr2d_el
167 
170  subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
171  integer, intent(in) :: nv, nu
172  real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
173  real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
174 
175  if (neko_bcknd_sx .eq. 1) then
176  call tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
177  else if (neko_bcknd_xsmm .eq. 1) then
178  call tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
179  else
180  call tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
181  end if
182 
183  end subroutine tnsr3d_el
184 
187  subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)
188  integer, intent(in) :: nv, nu, n_pt, el_list(n_pt)
189  real(kind=rp), intent(inout) :: v(nv*nv*nv, n_pt), u(nu*nu*nu,1)
190  real(kind=rp), intent(inout) :: a(nv,nu,n_pt),bt(nu, nv,n_pt),ct(nu,nv,n_pt)
191  type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d, el_list_d
192  integer :: i
193 
194  if (neko_bcknd_sx .eq. 1) then
195  do i = 1, n_pt
196  call tnsr3d_el_sx(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
197  end do
198  else if (neko_bcknd_xsmm .eq. 1) then
199  do i = 1, n_pt
200  call tnsr3d_el_xsmm(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
201  end do
202  else if (neko_bcknd_device .eq. 1) then
203  v_d = device_get_ptr(v)
204  u_d = device_get_ptr(u)
205  a_d = device_get_ptr(a)
206  bt_d = device_get_ptr(bt)
207  ct_d = device_get_ptr(ct)
208  el_list_d = device_get_ptr(el_list)
209  call tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, el_list_d, n_pt)
210  else
211  do i = 1, n_pt
212  ! Note the use of el_list(i) + 1, because of the gslib C interface
213  call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
214  end do
215  end if
216 
217  end subroutine tnsr3d_el_list
218 
219 
222  subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
223  integer, intent(inout) :: nv, nu, nelv
224  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
225  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
226  type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d
227 
228 
229  if (neko_bcknd_sx .eq. 1) then
230  call tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
231  else if (neko_bcknd_xsmm .eq. 1) then
232  call tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
233  else if (neko_bcknd_device .eq. 1) then
234  v_d = device_get_ptr(v)
235  u_d = device_get_ptr(u)
236  a_d = device_get_ptr(a)
237  bt_d = device_get_ptr(bt)
238  ct_d = device_get_ptr(ct)
239  call tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
240  else
241  call tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
242  end if
243 
244  end subroutine tnsr3d
245 
247  subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)
248  integer, intent(inout) :: nv, nu, nelv
249  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
250  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
251 
252  if (neko_bcknd_sx .eq. 1) then
253  call tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
254  else if (neko_bcknd_xsmm .eq. 1) then
255  call tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
256  else
257  call tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
258  end if
259 
260  end subroutine tnsr1_3d
261 
264  subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
265 
266  integer, intent(in) :: nx, ny, nz
267  real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)
268  real(kind=rp), intent(inout) :: s(nx, ny, nz)
269  real(kind=rp) :: hh
270  integer :: ix, iy, iz
271 
272  do iz = 1,nz
273  do iy = 1,ny
274  hh = h2(iy)*h3(iz)
275  do ix = 1,nx
276  s(ix,iy,iz) = s(ix,iy,iz)+hh*h1(ix)
277  end do
278  end do
279  end do
280 
281  end subroutine addtnsr
282 
297  subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)
298  real(kind=rp), intent(inout) :: v
299  integer, intent(in) :: nu
300  real(kind=rp), intent(inout) :: u(nu,nu,nu)
301  real(kind=rp), intent(inout) :: hr(nu)
302  real(kind=rp), intent(inout) :: hs(nu)
303  real(kind=rp), intent(inout) :: ht(nu)
304 
305  ! Artificially reshape v into a 1-dimensional array
306  ! since this is what tnsr3d_el needs as input argument
307  real(kind=rp) :: vv(1)
308  ! vv(1) = v
309 
310  call tnsr3d_el(vv,1,u,nu,hr,hs,ht)
311 
312  v = vv(1)
313 
314  end subroutine triple_tensor_product_scalar
315 
333  subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)
334  real(kind=rp), intent(inout) :: v(3)
335  integer, intent(in) :: nu
336  real(kind=rp), intent(inout) :: u1(nu,nu,nu)
337  real(kind=rp), intent(inout) :: u2(nu,nu,nu)
338  real(kind=rp), intent(inout) :: u3(nu,nu,nu)
339  real(kind=rp), intent(inout) :: hr(nu)
340  real(kind=rp), intent(inout) :: hs(nu)
341  real(kind=rp), intent(inout) :: ht(nu)
342 
343  call triple_tensor_product_scalar(v(1), u1, nu, hr, hs, ht)
344  call triple_tensor_product_scalar(v(2), u2, nu, hr, hs, ht)
345  call triple_tensor_product_scalar(v(3), u3, nu, hr, hs, ht)
346 
347  end subroutine triple_tensor_product_vector
348 
349 end module tensor
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_sx
Definition: neko_config.f90:39
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter neko_bcknd_xsmm
Definition: neko_config.f90:40
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
subroutine, public tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
Definition: tensor_cpu.f90:12
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_cpu.f90:23
subroutine, public tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_cpu.f90:878
subroutine, public tnsr3d_el_list_device(v_d, nv, u_d, nu, A_d, Bt_d, Ct_d, elements, n_points)
subroutine, public tnsr3d_device(v_d, nv, u_d, nu, A_d, Bt_d, Ct_d, nelv)
Tensor operations SX-Aurora backend.
Definition: tensor_sx.f90:2
subroutine, public tnsr2d_el_sx(v, nv, u, nu, A, Bt)
Definition: tensor_sx.f90:13
subroutine, public tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:251
subroutine, public tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_sx.f90:24
subroutine, public tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:75
Tensor operations libxsmm backend.
Definition: tensor_xsmm.F90:61
subroutine, public tnsr3d_el_xsmm(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_xsmm.F90:83
subroutine, public tnsr2d_el_xsmm(v, nv, u, nu, A, Bt)
Definition: tensor_xsmm.F90:72
subroutine, public tnsr1_3d_xsmm(v, nv, nu, A, Bt, Ct, nelv)
subroutine, public tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv)
Tensor operations.
Definition: tensor.f90:61
subroutine, public tensr3(v, nv, u, nu, A, Bt, Ct, w)
Tensor product .
Definition: tensor.f90:91
subroutine, public tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
Tensor product performed on nelv elements.
Definition: tensor.f90:223
subroutine, public tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)
In place tensor product .
Definition: tensor.f90:248
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition: tensor.f90:137
subroutine, public addtnsr(s, h1, h2, h3, nx, ny, nz)
Maps and adds to S a tensor product form of the three functions H1,H2,H3. This is a single element ro...
Definition: tensor.f90:265
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition: tensor.f90:121
subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)
Computes the tensor product on a vector field . This operation is usually performed for spectral inte...
Definition: tensor.f90:334
subroutine, public tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)
Tensor product performed on a subset of the elements.
Definition: tensor.f90:188
subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)
Computes the tensor product . This operation is usually performed for spectral interpolation of a sca...
Definition: tensor.f90:298
subroutine, public tnsr2d_el(v, nv, u, nu, A, Bt)
Computes .
Definition: tensor.f90:154
subroutine, public tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
Tensor product performed on a single element.
Definition: tensor.f90:171