Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module tensor
62 use tensor_xsmm
63 use tensor_cpu
64 use tensor_sx
66 use num_types, only : rp
67 use mxm_wrapper
68 use neko_config
69 use device
70 use, intrinsic :: iso_c_binding, only : c_ptr
71 implicit none
72 private
73
74 interface transpose
75 module procedure trsp, trsp1
76 end interface transpose
77
80 end interface triple_tensor_product
81
82 public :: tensr3, transpose, trsp, trsp1, &
85
86
87contains
88
90 subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
91 integer :: nv
92 integer :: nu
93 real(kind=rp), intent(inout) :: v(nv, nv, nv)
94 real(kind=rp), intent(inout) :: u(nu, nu, nu)
95 real(kind=rp), intent(inout) :: w(nu*nu*nv)
96 real(kind=rp), intent(inout) :: a(nv, nu)
97 real(kind=rp), intent(inout) :: bt(nu, nv)
98 real(kind=rp), intent(inout) :: ct(nu, nv)
99 integer :: j, k, l, nunu, nvnv, nunv
100
101 nunu = nu**2
102 nvnv = nv**2
103 nunv = nu*nv
104
106
107 call mxm(a, nv, u, nu, v, nunu)
108 k = 1
109 l = 1
110 do j = 1, nu
111 call mxm(v(k, 1, 1), nv, bt, nu, w(l), nv)
112 k = k + nunv
113 l = l + nvnv
114 end do
115 call mxm(w, nvnv, ct, nu, v, nv)
116
117 end subroutine tensr3
118
120 subroutine trsp(a, lda, b, ldb)
121 integer, intent(in) :: lda
122 integer, intent(in) :: ldb
123 real(kind=rp), intent(inout) :: a(lda, ldb)
124 real(kind=rp), intent(in) :: b(ldb, lda)
125 integer :: i, j
126
127 do j = 1, ldb
128 do i = 1, lda
129 a(i, j) = b(j, i)
130 end do
131 end do
132
133 end subroutine trsp
134
136 subroutine trsp1(a, n)
137 integer, intent(in) :: n
138 real(kind=rp), intent(inout) :: a(n, n)
139 real(kind=rp) :: tmp
140 integer :: i, j
141
142 do j = 1, n
143 do i = j + 1, n
144 tmp = a(i, j)
145 a(i, j) = a(j, i)
146 a(j, i) = tmp
147 end do
148 end do
149
150 end subroutine trsp1
151
153 subroutine tnsr2d_el(v, nv, u, nu, A, Bt)
154 integer, intent(in) :: nv, nu
155 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
156 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
157
158 if (neko_bcknd_sx .eq. 1) then
159 call tnsr2d_el_sx(v, nv, u, nu, a, bt)
160 else if (neko_bcknd_xsmm .eq. 1) then
161 call tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
162 else
163 call tnsr2d_el_cpu(v, nv, u, nu, a, bt)
164 end if
165
166 end subroutine tnsr2d_el
167
170 subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
171 integer, intent(in) :: nv, nu
172 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
173 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
174
175 if (neko_bcknd_sx .eq. 1) then
176 call tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
177 else if (neko_bcknd_xsmm .eq. 1) then
178 call tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
179 else
180 call tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
181 end if
182
183 end subroutine tnsr3d_el
184
187 subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)
188 integer, intent(in) :: nv, nu, n_pt, el_list(n_pt)
189 real(kind=rp), intent(inout) :: v(nv*nv*nv, n_pt), u(nu*nu*nu,1)
190 real(kind=rp), intent(inout) :: a(nv,nu,n_pt),bt(nu, nv,n_pt),ct(nu,nv,n_pt)
191 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d, el_list_d
192 integer :: i
193
194 if (neko_bcknd_sx .eq. 1) then
195 do i = 1, n_pt
196 call tnsr3d_el_sx(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
197 end do
198 else if (neko_bcknd_xsmm .eq. 1) then
199 do i = 1, n_pt
200 call tnsr3d_el_xsmm(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
201 end do
202 else if (neko_bcknd_device .eq. 1) then
203 v_d = device_get_ptr(v)
204 u_d = device_get_ptr(u)
205 a_d = device_get_ptr(a)
206 bt_d = device_get_ptr(bt)
207 ct_d = device_get_ptr(ct)
208 el_list_d = device_get_ptr(el_list)
209 call tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, el_list_d, n_pt)
210 else
211 do i = 1, n_pt
212 ! Note the use of el_list(i) + 1, because of the gslib C interface
213 call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
214 end do
215 end if
216
217 end subroutine tnsr3d_el_list
218
219
222 subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
223 integer, intent(in) :: nv, nu, nelv
224 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
225 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
226 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
227 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d
228
229
230 if (neko_bcknd_sx .eq. 1) then
231 call tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
232 else if (neko_bcknd_xsmm .eq. 1) then
233 call tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
234 else if (neko_bcknd_device .eq. 1) then
235 v_d = device_get_ptr(v)
236 u_d = device_get_ptr(u)
237 a_d = device_get_ptr(a)
238 bt_d = device_get_ptr(bt)
239 ct_d = device_get_ptr(ct)
240 call tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
241 else
242 call tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
243 end if
244
245 end subroutine tnsr3d
246
248 subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)
249 integer, intent(inout) :: nv, nu, nelv
250 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
251 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
252
253 if (neko_bcknd_sx .eq. 1) then
254 call tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
255 else if (neko_bcknd_xsmm .eq. 1) then
256 call tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
257 else
258 call tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
259 end if
260
261 end subroutine tnsr1_3d
262
265 subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
266
267 integer, intent(in) :: nx, ny, nz
268 real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)
269 real(kind=rp), intent(inout) :: s(nx, ny, nz)
270 real(kind=rp) :: hh
271 integer :: ix, iy, iz
272
273 do iz = 1,nz
274 do iy = 1,ny
275 hh = h2(iy)*h3(iz)
276 do ix = 1,nx
277 s(ix,iy,iz) = s(ix,iy,iz)+hh*h1(ix)
278 end do
279 end do
280 end do
281
282 end subroutine addtnsr
283
298 subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)
299 real(kind=rp), intent(inout) :: v
300 integer, intent(in) :: nu
301 real(kind=rp), intent(inout) :: u(nu,nu,nu)
302 real(kind=rp), intent(inout) :: hr(nu)
303 real(kind=rp), intent(inout) :: hs(nu)
304 real(kind=rp), intent(inout) :: ht(nu)
305
306 ! Artificially reshape v into a 1-dimensional array
307 ! since this is what tnsr3d_el needs as input argument
308 real(kind=rp) :: vv(1)
309 ! vv(1) = v
310
311 call tnsr3d_el(vv,1,u,nu,hr,hs,ht)
312
313 v = vv(1)
314
315 end subroutine triple_tensor_product_scalar
316
334 subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)
335 real(kind=rp), intent(inout) :: v(3)
336 integer, intent(in) :: nu
337 real(kind=rp), intent(inout) :: u1(nu,nu,nu)
338 real(kind=rp), intent(inout) :: u2(nu,nu,nu)
339 real(kind=rp), intent(inout) :: u3(nu,nu,nu)
340 real(kind=rp), intent(inout) :: hr(nu)
341 real(kind=rp), intent(inout) :: hs(nu)
342 real(kind=rp), intent(inout) :: ht(nu)
343
344 call triple_tensor_product_scalar(v(1), u1, nu, hr, hs, ht)
345 call triple_tensor_product_scalar(v(2), u2, nu, hr, hs, ht)
346 call triple_tensor_product_scalar(v(3), u3, nu, hr, hs, ht)
347
348 end subroutine triple_tensor_product_vector
349
350end module tensor
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_xsmm
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, elements, n_points)
subroutine, public tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24
Tensor operations libxsmm backend.
subroutine, public tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
Tensor operations.
Definition tensor.f90:61
subroutine, public tnsr3d_el_list(v, nv, u, nu, a, bt, ct, el_list, n_pt)
Tensor product performed on a subset of the elements.
Definition tensor.f90:188
subroutine, public tnsr3d_el(v, nv, u, nu, a, bt, ct)
Tensor product performed on a single element.
Definition tensor.f90:171
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137
subroutine, public addtnsr(s, h1, h2, h3, nx, ny, nz)
Maps and adds to S a tensor product form of the three functions H1,H2,H3. This is a single element ro...
Definition tensor.f90:266
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:121
subroutine triple_tensor_product_scalar(v, u, nu, hr, hs, ht)
Computes the tensor product . This operation is usually performed for spectral interpolation of a sca...
Definition tensor.f90:299
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product performed on nelv elements.
Definition tensor.f90:223
subroutine, public tnsr1_3d(v, nv, nu, a, bt, ct, nelv)
In place tensor product .
Definition tensor.f90:249
subroutine, public tensr3(v, nv, u, nu, a, bt, ct, w)
Tensor product .
Definition tensor.f90:91
subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, hr, hs, ht)
Computes the tensor product on a vector field . This operation is usually performed for spectral inte...
Definition tensor.f90:335
subroutine, public tnsr2d_el(v, nv, u, nu, a, bt)
Computes .
Definition tensor.f90:154