Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module tensor
64 use tensor_cpu, only : tnsr3d_cpu, tnsr1_3d_cpu, &
66 use tensor_sx, only : tnsr3d_sx, tnsr1_3d_sx, &
69 use num_types, only : rp
70 use mxm_wrapper, only : mxm
72 use device, only : device_get_ptr
73 use, intrinsic :: iso_c_binding, only : c_ptr
74 implicit none
75 private
76
77 interface transpose
78 module procedure trsp, trsp1
79 end interface transpose
80
83 end interface triple_tensor_product
84
85 public :: tensr3, transpose, trsp, trsp1, &
88
89
90contains
91
93 subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
94 integer :: nv
95 integer :: nu
96 real(kind=rp), intent(inout) :: v(nv, nv, nv)
97 real(kind=rp), intent(inout) :: u(nu, nu, nu)
98 real(kind=rp), intent(inout) :: w(nu*nu*nv)
99 real(kind=rp), intent(inout) :: a(nv, nu)
100 real(kind=rp), intent(inout) :: bt(nu, nv)
101 real(kind=rp), intent(inout) :: ct(nu, nv)
102 integer :: j, k, l, nunu, nvnv, nunv
103
104 nunu = nu**2
105 nvnv = nv**2
106 nunv = nu*nv
107
109
110 call mxm(a, nv, u, nu, v, nunu)
111 k = 1
112 l = 1
113 do j = 1, nu
114 call mxm(v(k, 1, 1), nv, bt, nu, w(l), nv)
115 k = k + nunv
116 l = l + nvnv
117 end do
118 call mxm(w, nvnv, ct, nu, v, nv)
119
120 end subroutine tensr3
121
123 subroutine trsp(a, lda, b, ldb)
124 integer, intent(in) :: lda
125 integer, intent(in) :: ldb
126 real(kind=rp), intent(inout) :: a(lda, ldb)
127 real(kind=rp), intent(in) :: b(ldb, lda)
128 integer :: i, j
129
130 do j = 1, ldb
131 do i = 1, lda
132 a(i, j) = b(j, i)
133 end do
134 end do
135
136 end subroutine trsp
137
139 subroutine trsp1(a, n)
140 integer, intent(in) :: n
141 real(kind=rp), intent(inout) :: a(n, n)
142 real(kind=rp) :: tmp
143 integer :: i, j
144
145 do j = 1, n
146 do i = j + 1, n
147 tmp = a(i, j)
148 a(i, j) = a(j, i)
149 a(j, i) = tmp
150 end do
151 end do
152
153 end subroutine trsp1
154
156 subroutine tnsr2d_el(v, nv, u, nu, A, Bt)
157 integer, intent(in) :: nv, nu
158 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
159 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv)
160
161 if (neko_bcknd_sx .eq. 1) then
162 call tnsr2d_el_sx(v, nv, u, nu, a, bt)
163 else if (neko_bcknd_xsmm .eq. 1) then
164 call tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
165 else
166 call tnsr2d_el_cpu(v, nv, u, nu, a, bt)
167 end if
168
169 end subroutine tnsr2d_el
170
173 subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
174 integer, intent(in) :: nv, nu
175 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
176 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
177
178 if (neko_bcknd_sx .eq. 1) then
179 call tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
180 else if (neko_bcknd_xsmm .eq. 1) then
181 call tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
182 else
183 call tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
184 end if
185
186 end subroutine tnsr3d_el
187
190 subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt, on_host)
191 integer, intent(in) :: nv, nu, n_pt, el_list(n_pt)
192 real(kind=rp), intent(inout) :: v(nv*nv*nv, n_pt), u(nu*nu*nu, 1)
193 real(kind=rp), intent(inout) :: a(nv, nu, n_pt)
194 real(kind=rp), intent(inout) :: bt(nu, nv, n_pt)
195 real(kind=rp), intent(inout) :: ct(nu, nv, n_pt)
196 logical, intent(in) :: on_host
197 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d, el_list_d
198 integer :: i
199
200 if (n_pt .eq. 0) return
201
202 if (neko_bcknd_sx .eq. 1) then
203 do i = 1, n_pt
204 call tnsr3d_el_sx(v(1,i), nv, u(1,el_list(i)), &
205 nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
206 end do
207 else if (neko_bcknd_xsmm .eq. 1) then
208 do i = 1, n_pt
209 call tnsr3d_el_xsmm(v(1,i), nv, u(1,el_list(i)), &
210 nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
211 end do
212 else if (neko_bcknd_device .eq. 1 .and. .not. on_host) then
213 v_d = device_get_ptr(v)
214 u_d = device_get_ptr(u)
215 a_d = device_get_ptr(a)
216 bt_d = device_get_ptr(bt)
217 ct_d = device_get_ptr(ct)
218 el_list_d = device_get_ptr(el_list)
219 call tnsr3d_el_list_device(v_d, nv, u_d, nu, &
220 a_d, bt_d, ct_d, el_list_d, n_pt)
221 else
222 do i = 1, n_pt
223 ! Note the use of el_list(i) + 1, because of the gslib C interface
224 call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), &
225 nu, a(1,1,i), bt(1,1,i), ct(1,1,i))
226 end do
227 end if
228
229 end subroutine tnsr3d_el_list
230
233 subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
234 integer, intent(in) :: nv, nu, nelv
235 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
236 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
237 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
238 type(c_ptr) :: v_d, u_d, a_d, bt_d, ct_d
239
240 if (nelv .eq. 0) return
241
242 if (neko_bcknd_sx .eq. 1) then
243 call tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
244 else if (neko_bcknd_xsmm .eq. 1) then
245 call tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
246 else if (neko_bcknd_device .eq. 1) then
247 v_d = device_get_ptr(v)
248 u_d = device_get_ptr(u)
249 a_d = device_get_ptr(a)
250 bt_d = device_get_ptr(bt)
251 ct_d = device_get_ptr(ct)
252 call tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
253 else
254 call tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
255 end if
256
257 end subroutine tnsr3d
258
260 subroutine tnsr1_3d(v, nv, nu, A, Bt, Ct, nelv)
261 integer, intent(inout) :: nv, nu, nelv
262 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
263 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
264
265 if (nelv .eq. 0) return
266
267 if (neko_bcknd_sx .eq. 1) then
268 call tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
269 else if (neko_bcknd_xsmm .eq. 1) then
270 call tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
271 else
272 call tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
273 end if
274
275 end subroutine tnsr1_3d
276
279 subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
280
281 integer, intent(in) :: nx, ny, nz
282 real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)
283 real(kind=rp), intent(inout) :: s(nx, ny, nz)
284 real(kind=rp) :: hh
285 integer :: ix, iy, iz
286
287 do iz = 1,nz
288 do iy = 1,ny
289 hh = h2(iy)*h3(iz)
290 do ix = 1,nx
291 s(ix,iy,iz) = s(ix,iy,iz)+hh*h1(ix)
292 end do
293 end do
294 end do
295
296 end subroutine addtnsr
297
312 subroutine triple_tensor_product_scalar(v, u, nu, Hr, Hs, Ht)
313 real(kind=rp), intent(inout) :: v
314 integer, intent(in) :: nu
315 real(kind=rp), intent(inout) :: u(nu, nu, nu)
316 real(kind=rp), intent(inout) :: hr(nu)
317 real(kind=rp), intent(inout) :: hs(nu)
318 real(kind=rp), intent(inout) :: ht(nu)
319
320 ! Artificially reshape v into a 1-dimensional array
321 ! since this is what tnsr3d_el needs as input argument
322 real(kind=rp) :: vv(1)
323 ! vv(1) = v
324
325 call tnsr3d_el(vv, 1, u, nu, hr, hs, ht)
326
327 v = vv(1)
328
329 end subroutine triple_tensor_product_scalar
330
348 subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, Hr, Hs, Ht)
349 real(kind=rp), intent(inout) :: v(3)
350 integer, intent(in) :: nu
351 real(kind=rp), intent(inout) :: u1(nu, nu, nu)
352 real(kind=rp), intent(inout) :: u2(nu, nu, nu)
353 real(kind=rp), intent(inout) :: u3(nu, nu, nu)
354 real(kind=rp), intent(inout) :: hr(nu)
355 real(kind=rp), intent(inout) :: hs(nu)
356 real(kind=rp), intent(inout) :: ht(nu)
357
358 call triple_tensor_product_scalar(v(1), u1, nu, hr, hs, ht)
359 call triple_tensor_product_scalar(v(2), u2, nu, hr, hs, ht)
360 call triple_tensor_product_scalar(v(3), u3, nu, hr, hs, ht)
361
362 end subroutine triple_tensor_product_vector
363
364end module tensor
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_xsmm
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_el_list_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, elements, n_points)
subroutine, public tnsr3d_device(v_d, nv, u_d, nu, a_d, bt_d, ct_d, nelv)
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24
Tensor operations libxsmm backend.
subroutine, public tnsr3d_el_xsmm(v, nv, u, nu, a, bt, ct)
subroutine, public tnsr3d_xsmm(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr2d_el_xsmm(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_xsmm(v, nv, nu, a, bt, ct, nelv)
Tensor operations.
Definition tensor.f90:61
subroutine, public tnsr3d_el(v, nv, u, nu, a, bt, ct)
Tensor product performed on a single element.
Definition tensor.f90:174
subroutine, public trsp1(a, n)
In-place transpose of a square tensor.
Definition tensor.f90:140
subroutine, public addtnsr(s, h1, h2, h3, nx, ny, nz)
Maps and adds to S a tensor product form of the three functions H1,H2,H3. This is a single element ro...
Definition tensor.f90:280
subroutine, public tnsr3d_el_list(v, nv, u, nu, a, bt, ct, el_list, n_pt, on_host)
Tensor product performed on a subset of the elements.
Definition tensor.f90:191
subroutine, public trsp(a, lda, b, ldb)
Transpose of a rectangular tensor .
Definition tensor.f90:124
subroutine triple_tensor_product_scalar(v, u, nu, hr, hs, ht)
Computes the tensor product . This operation is usually performed for spectral interpolation of a sca...
Definition tensor.f90:313
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product performed on nelv elements.
Definition tensor.f90:234
subroutine, public tnsr1_3d(v, nv, nu, a, bt, ct, nelv)
In place tensor product .
Definition tensor.f90:261
subroutine, public tensr3(v, nv, u, nu, a, bt, ct, w)
Tensor product .
Definition tensor.f90:94
subroutine triple_tensor_product_vector(v, u1, u2, u3, nu, hr, hs, ht)
Computes the tensor product on a vector field . This operation is usually performed for spectral inte...
Definition tensor.f90:349
subroutine, public tnsr2d_el(v, nv, u, nu, a, bt)
Computes .
Definition tensor.f90:157