Neko  0.8.99
A portable framework for high-order spectral element flow simulations
tensor_sx.f90
Go to the documentation of this file.
1 
2 module tensor_sx
3  use num_types
4  use mxm_wrapper
5  implicit none
6  private
7 
9 
10 contains
11 
12  subroutine tnsr2d_el_sx(v, nv, u, nu, A, Bt)
13  integer, intent(in) :: nv, nu
14  real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
15  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
16  real(kind=rp) :: work(0:nu**2*nv)
17 
18  call mxm(a, nv, u, nu, work, nu)
19  call mxm(work, nv, bt, nu, v, nv)
20 
21  end subroutine tnsr2d_el_sx
22 
23  subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
24  integer, intent(in) :: nv, nu
25  real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
26  real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
27  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
28  real(kind=rp) :: tmp
29  integer :: i, j, k, l, nunu, nvnu, nvnv
30  integer :: ii, jj
31  nvnu = nv * nu
32  nunu = nu * nu
33  nvnv = nv * nv
34 
35  do j = 1, nunu
36  do i = 1, nv
37  ii = i + nv * (j - 1)
38  tmp = 0.0_rp
39  do k = 1, nu
40  tmp = tmp + a(i,k) * u(k + nu * (j - 1))
41  end do
42  work(ii) = tmp
43  end do
44  end do
45 
46  do i = 1, nu
47  do j = 1, nv
48  do l = 1, nv
49  ii = l + nv * (j - 1) + nvnv * (i - 1)
50  tmp = 0.0_rp
51  do k = 1, nu
52  jj = l + nv * (k - 1) + nvnu * (i - 1)
53  tmp = tmp + work(jj) * bt(k,j)
54  end do
55  work2(ii) = tmp
56  end do
57  end do
58  end do
59 
60  do j = 1, nv
61  do i = 1, nvnv
62  jj = i + nvnv * (j - 1)
63  tmp = 0.0_rp
64  do k = 1, nu
65  ii = i + nvnv * (k - 1)
66  tmp = tmp + work2(ii) * ct(k, j)
67  end do
68  v(jj) = tmp
69  end do
70  end do
71 
72  end subroutine tnsr3d_el_sx
73 
74  subroutine tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
75  integer, intent(in) :: nv, nu, nelv
76  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
77  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
78 
79  if (nu .eq. 2 .and. nv .eq. 4) then
80  call tnsr3d_nu2nv4_sx(v, u, a, bt, ct, nelv)
81  else if (nu .eq. 4) then
82  call tnsr3d_nu4_sx(v, nv, u, a, bt, ct, nelv)
83  else
84  call tnsr3d_nvnu_sx(v, nv, u, nu, a, bt, ct, nelv)
85  end if
86 
87  end subroutine tnsr3d_sx
88 
89  subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
90  integer, intent(in) :: nv, nu, nelv
91  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
92  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
93  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
94  integer :: ie, i, j, k, l, ii, jj
95  integer :: nunu, nvnu, nvnv
96 
97  nvnu = nv * nu
98  nunu = nu * nu
99  nvnv = nv * nv
100 
101  do ie = 1, nelv
102  do j = 1, nunu
103  do i = 1, nv
104  ii = i + nv * (j - 1)
105  tmp = 0.0_rp
106  do k = 1, nu
107  tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
108  end do
109  work(ii) = tmp
110  end do
111  end do
112 
113  do i = 1, nu
114  do j = 1, nv
115  do l = 1, nv
116  ii = l + nv * (j - 1) + nvnv * (i - 1)
117  tmp = 0.0_rp
118  do k = 1, nu
119  jj = l + nv * (k - 1) + nvnu * (i - 1)
120  tmp = tmp + work(jj) * bt(k,j)
121  end do
122  work2(ii) = tmp
123  end do
124  end do
125  end do
126 
127  do j = 1, nv
128  do i = 1, nvnv
129  jj = i + nvnv * (j - 1)
130  tmp = 0.0_rp
131  do k = 1, nu
132  ii = i + nvnv * (k - 1)
133  tmp = tmp + work2(ii) * ct(k, j)
134  end do
135  v(jj, ie) = tmp
136  end do
137  end do
138  end do
139 
140  end subroutine tnsr3d_nvnu_sx
141 
142  subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
143  integer, parameter :: nu = 2
144  integer, parameter :: nv = 4
145  integer, parameter :: nunu = 4
146  integer, parameter :: nvnu = 8
147  integer, parameter :: nvnv = 16
148  integer, intent(in) :: nelv
149  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
150  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
151  real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv), tmp
152  integer :: ie, i, j, k, l, ii, jj
153 
154 
155  do j = 1, nunu
156  do i = 1, nv
157  do ie = 1, nelv
158  ii = i + nv * (j - 1)
159  work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
160  + a(i,2) * u(2 + nu * (j - 1), ie)
161  end do
162  end do
163  end do
164 
165  do i = 1, nu
166  do j = 1, nv
167  do l = 1, nv
168  do ie = 1, nelv
169  ii = l + nv * (j - 1) + nvnv * (i - 1)
170  tmp = 0.0_rp
171  !NEC$ unroll_completely
172  do k = 1, nu
173  jj = l + nv * (k - 1) + nvnu * (i - 1)
174  tmp = tmp + work(jj,ie) * bt(k,j)
175  end do
176  work2(ii, ie) = tmp
177  end do
178  end do
179  end do
180  end do
181 
182  do j = 1, nv
183  do i = 1, nvnv
184  do ie = 1, nelv
185  jj = i + nvnv * (j - 1)
186  v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
187  + work2(i + nvnv * (2 - 1),ie) * ct(2, j)
188  end do
189  end do
190  end do
191 
192  end subroutine tnsr3d_nu2nv4_sx
193 
194  subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
195  integer, parameter :: nu = 4
196  integer, parameter :: nunu = 16
197  integer, intent(in) :: nv, nelv
198  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
199  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
200  real(kind=rp) :: work(nu**2*nv, nelv), work2(nu*nv**2, nelv), tmp
201  integer :: ie, i, j, k, l, ii, jj
202  integer :: nvnu, nvnv
203 
204  nvnu = nv * nu
205  nvnv = nv * nv
206 
207  do j = 1, nunu
208  do i = 1, nv
209  do ie = 1, nelv
210  ii = i + nv * (j - 1)
211  work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
212  + a(i,2) * u(2 + nu * (j - 1), ie) &
213  + a(i,3) * u(3 + nu * (j - 1), ie) &
214  + a(i,4) * u(4 + nu * (j - 1), ie)
215  end do
216  end do
217  end do
218 
219  do i = 1, nu
220  do j = 1, nv
221  do l = 1, nv
222  do ie = 1, nelv
223  ii = l + nv * (j - 1) + nvnv * (i - 1)
224  tmp = 0.0_rp
225  !NEC$ unroll_completely
226  do k = 1, nu
227  jj = l + nv * (k - 1) + nvnu * (i - 1)
228  tmp = tmp + work(jj,ie) * bt(k,j)
229  end do
230  work2(ii, ie) = tmp
231  end do
232  end do
233  end do
234  end do
235 
236  do j = 1, nv
237  do i = 1, nvnv
238  do ie = 1, nelv
239  jj = i + nvnv * (j - 1)
240  v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
241  + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
242  + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
243  + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
244  end do
245  end do
246  end do
247 
248  end subroutine tnsr3d_nu4_sx
249 
250  subroutine tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
251  integer, intent(in) :: nv, nu, nelv
252  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
253  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
254 
255 
256  if (nu .eq. 4 .and. nv .eq. 2) then
257  call tnsr1_3d_nu4nv2_sx(v, a, bt, ct, nelv)
258  else
259  call tnsr1_3d_nvnu_sx(v, nv, nu, a, bt, ct, nelv)
260  end if
261 
262  end subroutine tnsr1_3d_sx
263 
264  subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
265  integer, intent(in) :: nv, nu, nelv
266  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
267  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
268  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
269  integer :: e, e0, ee, es, iu, iv, nu3, nv3
270  integer :: i, j, k, l, ii, jj, kk
271  integer :: nunu, nvnu, nvnv
272  real(kind=rp) :: tmp
273 
274  nvnu = nv * nu
275  nunu = nu * nu
276  nvnv = nv * nv
277 
278  e0 = 1
279  es = 1
280  ee = nelv
281 
282  if (nv.gt.nu) then
283  e0 = nelv
284  es = -1
285  ee = 1
286  endif
287 
288  nu3 = nu**3
289  nv3 = nv**3
290 
291  do e = e0,ee,es
292  iu = (e-1)*nu3
293  iv = (e-1)*nv3
294 
295  do j = 1, nunu
296  do i = 1, nv
297  ii = i + nv * (j - 1)
298  tmp = 0.0_rp
299  do k = 1, nu
300  kk = k + nu * (j - 1) + iu
301  tmp = tmp + a(i,k) * v(kk)
302  end do
303  work(ii) = tmp
304  end do
305  end do
306 
307  do i = 1, nu
308  do j = 1, nv
309  do l = 1, nv
310  ii = l + nv * (j - 1) + nvnv * (i - 1)
311  tmp = 0.0_rp
312  do k = 1, nu
313  jj = l + nv * (k - 1) + nvnu * (i - 1)
314  tmp = tmp + work(jj) * bt(k,j)
315  end do
316  work2(ii) = tmp
317  end do
318  end do
319  end do
320 
321  do j = 1, nv
322  do i = 1, nvnv
323  jj = i + nvnv * (j - 1) + iv
324  tmp = 0.0_rp
325  do k = 1, nu
326  ii = i + nvnv * (k - 1)
327  tmp = tmp + work2(ii) * ct(k, j)
328  end do
329  v(jj) = tmp
330  end do
331  end do
332  end do
333 
334  end subroutine tnsr1_3d_nvnu_sx
335 
336  subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
337  integer, parameter :: nu = 4
338  integer, parameter :: nv = 2
339  integer, parameter :: nunu = 16
340  integer, parameter :: nvnu = 8
341  integer, parameter :: nvnv = 4
342  integer, parameter :: nununu = 64
343  integer, parameter :: nvnvnv = 8
344  integer, intent(in) :: nelv
345  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
346  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
347  real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv)
348  integer :: ie, iu, iv
349  integer :: i, j, k, l, ii, jj
350  real(kind=rp) :: tmp
351 
352  do j = 1, nunu
353  do i = 1, nv
354  do ie = 1, nelv
355  iu = (ie-1)*nununu
356  ii = i + nv * (j - 1)
357  work(ii, ie) = a(i,1) * v(1 + nu * (j - 1) + iu) &
358  + a(i,2) * v(2 + nu * (j - 1) + iu) &
359  + a(i,3) * v(3 + nu * (j - 1) + iu) &
360  + a(i,4) * v(4 + nu * (j - 1) + iu)
361  end do
362  end do
363  end do
364 
365  do i = 1, nu
366  do j = 1, nv
367  do l = 1, nv
368  do ie = 1, nelv
369  ii = l + nv * (j - 1) + nvnv * (i - 1)
370  tmp = 0.0_rp
371  !NEC$ unroll_completely
372  do k = 1, nu
373  jj = l + nv * (k - 1) + nvnu * (i - 1)
374  tmp = tmp + work(jj,ie) * bt(k,j)
375  end do
376  work2(ii,ie) = tmp
377  end do
378  end do
379  end do
380  end do
381 
382  do j = 1, nv
383  do i = 1, nvnv
384  do ie = 1, nelv
385  iv = (ie-1)*nvnvnv
386  jj = i + nvnv * (j - 1) + iv
387  v(jj) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
388  + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
389  + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
390  + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
391  end do
392  end do
393  end do
394 
395  end subroutine tnsr1_3d_nu4nv2_sx
396 
397 end module tensor_sx
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Tensor operations SX-Aurora backend.
Definition: tensor_sx.f90:2
subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:143
subroutine, public tnsr2d_el_sx(v, nv, u, nu, A, Bt)
Definition: tensor_sx.f90:13
subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:90
subroutine, public tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:251
subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:265
subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:337
subroutine, public tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_sx.f90:24
subroutine, public tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:75
subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
Definition: tensor_sx.f90:195