Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module tensor
62 use tensor_xsmm
63 use tensor_cpu
64 use tensor_sx
66 use num_types, only : rp
67 use mxm_wrapper
68 use neko_config
69 use device
70 use, intrinsic :: iso_c_binding, only : c_ptr
71 implicit none
72 private
73
74 interface transpose
75 module procedure trsp, trsp1
76 end interface transpose
77
80 end interface triple_tensor_product
81
82 public :: tensr3, transpose, trsp, trsp1, &
85
86
87contains
88
90 subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
91 integer :: nv
92 integer :: nu
93 real(kind=rp), intent(inout) :: v(nv, nv, nv)
94 real(kind=rp), intent(inout) :: u(nu, nu, nu)
95 real(kind=rp), intent(inout) :: w(nu*nu*nv)
96 real(kind=rp), intent(inout) :: a(nv, nu)
97 real(kind=rp), intent(inout) :: bt(nu, nv)
98 real(kind=rp), intent(inout) :: ct(nu, nv)
99 integer :: j, k, l, nunu, nvnv, nunv
100
101 nunu = nu**2
102 nvnv = nv**2
103 nunv = nu*nv
104
106
107 call mxm(a, nv, u, nu, v, nunu)
108 k = 1
109 l = 1
110 do j = 1, nu
111 call mxm(v(k, 1, 1), nv, bt, nu, w(l), nv)
112 k = k + nunv
113 l = l + nvnv
114 end do
115 call mxm(w, nvnv, ct, nu, v, nv)
116
117 end subroutine tensr3
118
120 subroutine trsp(a, lda, b, ldb)
121 integer, intent(in) :: lda
122 integer, intent(in) :: ldb
123 real(kind=rp), intent(inout) :: a(lda, ldb)
124 real(kind=rp), intent(in) :: b(ldb, lda)
125 integer :: i, j
126
127 do j = 1, ldb
128 do i = 1, lda
129 a(i, j) = b(j, i)
130 end do
131 end do
132
133 end subroutine trsp
134
136 subroutine trsp1(a, n)
137 integer, intent(in) :: n
138 real(kind=rp), intent(inout) :: a(n, n)
139 real(kind=rp) :: tmp
140 integer :: i, j
141
142 do j = 1, n
143 do i = j + 1, n
144 tmp = a(i, j)
145 a(i, j) = a(j, i)
146 a(j, i) = tmp
147 end do
148 end do
149
150 end subroutine trsp1
151
153 subroutine tnsr2d_el(v, nv, u, nu, A, Bt)
154 integer, intent(in) :: nv, nu
155 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
156 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
157
158 if (neko_bcknd_sx .eq. 1) then
159 call tnsr2d_el_sx(v, nv, u, nu, a, bt)
160 else if (neko_bcknd_xsmm .eq. 1) then
161 call tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
162 else
163 call tnsr2d_el_cpu(v, nv, u, nu, a, bt)
164 end if
165
166 end subroutine tnsr2d_el
167
170 subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
171 integer, intent(in) :: nv, nu
172 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
173 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
174
175 if (neko_bcknd_sx .eq. 1) then
176 call tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
177 else if (neko_bcknd_xsmm .eq. 1) then
178 call tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
179 else
180 call tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
181 end if
182
183 end subroutine tnsr3d_el
184
187 subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)
188 integer, intent(in) :: nv, nu, n_pt, el_list(n_pt)
189 real(kind=rp), intent(inout) :: v(nv*nv*nv, n_pt), u(nu*nu*nu,1)
190 real(kind=rp), intent(inout) :: a(nv,nu,n_pt),bt(nu, nv,n_pt),ct(nu,nv,n_pt)
191 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d, el_list_d
192 integer :: i
193
194 if (n_pt .eq. 0) return
195
196 if (neko_bcknd_sx .eq. 1) then
197 do i = 1, n_pt
198 call tnsr3d_el_sx(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
199 end do
200 else if (neko_bcknd_xsmm .eq. 1) then
201 do i = 1, n_pt
202 call tnsr3d_el_xsmm(v(1,i), nv, u(1,el_list(i)), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
203 end do
204 else if (neko_bcknd_device .eq. 1) then
205 v_d = device_get_ptr(v)
206 u_d = device_get_ptr(u)
207 a_d = device_get_ptr(a)
208 bt_d = device_get_ptr(bt)
209 ct_d = device_get_ptr(ct)
210 el_list_d = device_get_ptr(el_list)
211 call tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, el_list_d, n_pt)
212 else
213 do i = 1, n_pt
214 ! Note the use of el_list(i) + 1, because of the gslib C interface
215 call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
216 end do
217 end if
218
219 end subroutine tnsr3d_el_list
220
221
224 subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
225 integer, intent(in) :: nv, nu, nelv
226 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
227 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
228 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
229 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d
230
231 if (nelv .eq. 0) return
232
233 if (neko_bcknd_sx .eq. 1) then
234 call tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
235 else if (neko_bcknd_xsmm .eq. 1) then
236 call tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
237 else if (neko_bcknd_device .eq. 1) then
238 v_d = device_get_ptr(v)
239 u_d = device_get_ptr(u)
240 a_d = device_get_ptr(a)
241 bt_d = device_get_ptr(bt)
242 ct_d = device_get_ptr(ct)
243 call tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
244 else
245 call tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
246 end if
247
248 end subroutine tnsr3d
249
251 subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)
252 integer, intent(inout) :: nv, nu, nelv
253 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
254 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
255
256 if (nelv .eq. 0) return
257
258 if (neko_bcknd_sx .eq. 1) then
259 call tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
260 else if (neko_bcknd_xsmm .eq. 1) then
261 call tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
262 else
263 call tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
264 end if
265
266 end subroutine tnsr1_3d
267
270 subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
271
272 integer, intent(in) :: nx, ny, nz
273 real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)
274 real(kind=rp), intent(inout) :: s(nx, ny, nz)
275 real(kind=rp) :: hh
276 integer :: ix, iy, iz
277
278 do iz = 1,nz
279 do iy = 1,ny
280 hh = h2(iy)*h3(iz)
281 do ix = 1,nx
282 s(ix,iy,iz) = s(ix,iy,iz)+hh*h1(ix)
283 end do
284 end do
285 end do
286
287 end subroutine addtnsr
288
303 subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)
304 real(kind=rp), intent(inout) :: v
305 integer, intent(in) :: nu
306 real(kind=rp), intent(inout) :: u(nu,nu,nu)
307 real(kind=rp), intent(inout) :: hr(nu)
308 real(kind=rp), intent(inout) :: hs(nu)
309 real(kind=rp), intent(inout) :: ht(nu)
310
311 ! Artificially reshape v into a 1-dimensional array
312 ! since this is what tnsr3d_el needs as input argument
313 real(kind=rp) :: vv(1)
314 ! vv(1) = v
315
316 call tnsr3d_el(vv,1,u,nu,hr,hs,ht)
317
318 v = vv(1)
319
320 end subroutine triple_tensor_product_scalar
321
339 subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)
340 real(kind=rp), intent(inout) :: v(3)
341 integer, intent(in) :: nu
342 real(kind=rp), intent(inout) :: u1(nu,nu,nu)
343 real(kind=rp), intent(inout) :: u2(nu,nu,nu)
344 real(kind=rp), intent(inout) :: u3(nu,nu,nu)
345 real(kind=rp), intent(inout) :: hr(nu)
346 real(kind=rp), intent(inout) :: hs(nu)
347 real(kind=rp), intent(inout) :: ht(nu)
348
349 call triple_tensor_product_scalar(v(1), u1, nu, hr, hs, ht)
350 call triple_tensor_product_scalar(v(2), u2, nu, hr, hs, ht)
351 call triple_tensor_product_scalar(v(3), u3, nu, hr, hs, ht)
352
353 end subroutine triple_tensor_product_vector
354
355end module tensor
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_xsmm
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, elements, n_points)
subroutine, public tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24
Tensor operations libxsmm backend.
subroutine, public tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
Tensor operations.
Definition tensor.f90:61
subroutine, public tnsr3d_el_list(v, nv, u, nu, a, bt, ct, el_list, n_pt)
Tensor product performed on a subset of the elements.
Definition tensor.f90:188
subroutine, public tnsr3d_el(v, nv, u, nu, a, bt, ct)
Tensor product performed on a single element.
Definition tensor.f90:171
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:137
subroutine, public addtnsr(s, h1, h2, h3, nx, ny, nz)
Maps and adds to S a tensor product form of the three functions H1,H2,H3. This is a single element ro...
Definition tensor.f90:271
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:121
subroutine triple_tensor_product_scalar(v, u, nu, hr, hs, ht)
Computes the tensor product . This operation is usually performed for spectral interpolation of a sca...
Definition tensor.f90:304
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product performed on nelv elements.
Definition tensor.f90:225
subroutine, public tnsr1_3d(v, nv, nu, a, bt, ct, nelv)
In place tensor product .
Definition tensor.f90:252
subroutine, public tensr3(v, nv, u, nu, a, bt, ct, w)
Tensor product .
Definition tensor.f90:91
subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, hr, hs, ht)
Computes the tensor product on a vector field . This operation is usually performed for spectral inte...
Definition tensor.f90:340
subroutine, public tnsr2d_el(v, nv, u, nu, a, bt)
Computes .
Definition tensor.f90:154