Neko  0.9.0
A portable framework for high-order spectral element flow simulations
tensor_cpu.f90
Go to the documentation of this file.
1 module tensor_cpu
2  use num_types, only : rp
3  use mxm_wrapper, only : mxm
4  implicit none
5  private
6 
8 
9 contains
10 
11  subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12  integer, intent(in) :: nv, nu
13  real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
15  real(kind=rp) :: work(0:nu**2*nv)
16 
17  call mxm(a, nv, u, nu, work, nu)
18  call mxm(work, nv, bt, nu, v, nv)
19 
20  end subroutine tnsr2d_el_cpu
21 
22  subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23  integer, intent(in) :: nv, nu
24  real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25  real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
26 
27  if (nv .eq. nu) then
28  select case (nv)
29  case (14)
30  call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31  case (13)
32  call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33  case (12)
34  call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35  case (11)
36  call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37  case (10)
38  call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39  case (9)
40  call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41  case (8)
42  call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43  case (7)
44  call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45  case (6)
46  call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47  case (5)
48  call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49  case (4)
50  call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51  case (3)
52  call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53  case (2)
54  call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55  case default
56  call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57  end select
58  else
59  call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
60  end if
61 
62  end subroutine tnsr3d_el_cpu
63 
64  subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
65  integer, intent(in) :: nv, nu
66  real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
67  real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
68  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
69  real(kind=rp) :: tmp
70  integer :: i, j, k, l, nunu, nvnu, nvnv
71  integer :: ii, jj
72  nvnu = nv * nu
73  nunu = nu * nu
74  nvnv = nv * nv
75 
76  do j = 1, nunu
77  do i = 1, nv
78  ii = i + nv * (j - 1)
79  tmp = 0.0_rp
80  do k = 1, nu
81  tmp = tmp + a(i,k) * u(k + nu * (j - 1))
82  end do
83  work(ii) = tmp
84  end do
85  end do
86 
87  do i = 1, nu
88  do j = 1, nv
89  do l = 1, nv
90  ii = l + nv * (j - 1) + nvnv * (i - 1)
91  tmp = 0.0_rp
92  do k = 1, nu
93  jj = l + nv * (k - 1) + nvnu * (i - 1)
94  tmp = tmp + work(jj) * bt(k,j)
95  end do
96  work2(ii) = tmp
97  end do
98  end do
99  end do
100 
101  do j = 1, nv
102  do i = 1, nvnv
103  jj = i + nvnv * (j - 1)
104  tmp = 0.0_rp
105  do k = 1, nu
106  ii = i + nvnv * (k - 1)
107  tmp = tmp + work2(ii) * ct(k, j)
108  end do
109  v(jj) = tmp
110  end do
111  end do
112 
113  end subroutine tnsr3d_el_nvnu_cpu
114 
115  subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
116  integer, intent(in) :: n
117  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
118  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
119  real(kind=rp) :: work(n**3), work2(n**3), tmp
120  integer :: i, j, l, k
121  integer :: ii, jj, nn
122 
123  nn = n**2
124 
125  do j = 1, nn
126  do i = 1, n
127  ii = i + n * (j - 1)
128  tmp = 0.0_rp
129  do k = 1, n
130  tmp = tmp + a(i,k) * u(k + n * (j - 1))
131  end do
132  work(ii) = tmp
133  end do
134  end do
135 
136  do i = 1, n
137  do j = 1, n
138  do l = 1, n
139  ii = l + n * (j - 1) + nn * (i - 1)
140  tmp = 0.0_rp
141  do k = 1, n
142  tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
143  end do
144  work2(ii) = tmp
145  end do
146  end do
147  end do
148 
149  do j = 1, n
150  do i = 1, nn
151  jj = i + nn * (j - 1)
152  tmp = 0.0_rp
153  do k = 1, n
154  tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
155  end do
156  v(jj) = tmp
157  end do
158  end do
159 
160  end subroutine tnsr3d_el_n_cpu
161 
162  subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
163  integer, parameter :: n = 14
164  integer, parameter :: nn = n**2
165  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
166  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
167  real(kind=rp) :: work(n**3), work2(n**3)
168  integer :: i, j, l
169  integer :: ii, jj
170 
171  do j = 1, nn
172  do i = 1, n
173  ii = i + n * (j - 1)
174  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
175  + a(i,2) * u(2 + n * (j - 1)) &
176  + a(i,3) * u(3 + n * (j - 1)) &
177  + a(i,4) * u(4 + n * (j - 1)) &
178  + a(i,5) * u(5 + n * (j - 1)) &
179  + a(i,6) * u(6 + n * (j - 1)) &
180  + a(i,7) * u(7 + n * (j - 1)) &
181  + a(i,8) * u(8 + n * (j - 1)) &
182  + a(i,9) * u(9 + n * (j - 1)) &
183  + a(i,10) * u(10 + n * (j - 1)) &
184  + a(i,11) * u(11 + n * (j - 1)) &
185  + a(i,12) * u(12 + n * (j - 1)) &
186  + a(i,13) * u(13 + n * (j - 1)) &
187  + a(i,14) * u(14 + n * (j - 1))
188  end do
189  end do
190 
191  do i = 1, n
192  do j = 1, n
193  do l = 1, n
194  ii = l + n * (j - 1) + nn * (i - 1)
195  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
196  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
197  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
198  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
199  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
200  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
201  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
202  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
203  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
204  + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
205  + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
206  + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
207  + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
208  + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
209  end do
210  end do
211  end do
212 
213  do j = 1, n
214  do i = 1, nn
215  jj = i + nn * (j - 1)
216  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
217  + work2(i + nn * (2 - 1)) * ct(2, j) &
218  + work2(i + nn * (3 - 1)) * ct(3, j) &
219  + work2(i + nn * (4 - 1)) * ct(4, j) &
220  + work2(i + nn * (5 - 1)) * ct(5, j) &
221  + work2(i + nn * (6 - 1)) * ct(6, j) &
222  + work2(i + nn * (7 - 1)) * ct(7, j) &
223  + work2(i + nn * (8 - 1)) * ct(8, j) &
224  + work2(i + nn * (9 - 1)) * ct(9, j) &
225  + work2(i + nn * (10 - 1)) * ct(10, j) &
226  + work2(i + nn * (11 - 1)) * ct(11, j) &
227  + work2(i + nn * (12 - 1)) * ct(12, j) &
228  + work2(i + nn * (13 - 1)) * ct(13, j) &
229  + work2(i + nn * (14 - 1)) * ct(14, j)
230  end do
231  end do
232 
233  end subroutine tnsr3d_el_n14_cpu
234 
235  subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
236  integer, parameter :: n = 13
237  integer, parameter :: nn = n**2
238  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
239  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
240  real(kind=rp) :: work(n**3), work2(n**3)
241  integer :: i, j, l
242  integer :: ii, jj
243 
244  do j = 1, nn
245  do i = 1, n
246  ii = i + n * (j - 1)
247  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
248  + a(i,2) * u(2 + n * (j - 1)) &
249  + a(i,3) * u(3 + n * (j - 1)) &
250  + a(i,4) * u(4 + n * (j - 1)) &
251  + a(i,5) * u(5 + n * (j - 1)) &
252  + a(i,6) * u(6 + n * (j - 1)) &
253  + a(i,7) * u(7 + n * (j - 1)) &
254  + a(i,8) * u(8 + n * (j - 1)) &
255  + a(i,9) * u(9 + n * (j - 1)) &
256  + a(i,10) * u(10 + n * (j - 1)) &
257  + a(i,11) * u(11 + n * (j - 1)) &
258  + a(i,12) * u(12 + n * (j - 1)) &
259  + a(i,13) * u(13 + n * (j - 1))
260  end do
261  end do
262 
263  do i = 1, n
264  do j = 1, n
265  do l = 1, n
266  ii = l + n * (j - 1) + nn * (i - 1)
267  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
268  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
269  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
270  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
271  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
272  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
273  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
274  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
275  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
276  + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
277  + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
278  + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
279  + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
280  end do
281  end do
282  end do
283 
284  do j = 1, n
285  do i = 1, nn
286  jj = i + nn * (j - 1)
287  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
288  + work2(i + nn * (2 - 1)) * ct(2, j) &
289  + work2(i + nn * (3 - 1)) * ct(3, j) &
290  + work2(i + nn * (4 - 1)) * ct(4, j) &
291  + work2(i + nn * (5 - 1)) * ct(5, j) &
292  + work2(i + nn * (6 - 1)) * ct(6, j) &
293  + work2(i + nn * (7 - 1)) * ct(7, j) &
294  + work2(i + nn * (8 - 1)) * ct(8, j) &
295  + work2(i + nn * (9 - 1)) * ct(9, j) &
296  + work2(i + nn * (10 - 1)) * ct(10, j) &
297  + work2(i + nn * (11 - 1)) * ct(11, j) &
298  + work2(i + nn * (12 - 1)) * ct(12, j) &
299  + work2(i + nn * (13 - 1)) * ct(13, j)
300  end do
301  end do
302 
303  end subroutine tnsr3d_el_n13_cpu
304 
305  subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
306  integer, parameter :: n = 12
307  integer, parameter :: nn = n**2
308  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
309  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
310  real(kind=rp) :: work(n**3), work2(n**3)
311  integer :: i, j, l
312  integer :: ii, jj
313 
314  do j = 1, nn
315  do i = 1, n
316  ii = i + n * (j - 1)
317  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
318  + a(i,2) * u(2 + n * (j - 1)) &
319  + a(i,3) * u(3 + n * (j - 1)) &
320  + a(i,4) * u(4 + n * (j - 1)) &
321  + a(i,5) * u(5 + n * (j - 1)) &
322  + a(i,6) * u(6 + n * (j - 1)) &
323  + a(i,7) * u(7 + n * (j - 1)) &
324  + a(i,8) * u(8 + n * (j - 1)) &
325  + a(i,9) * u(9 + n * (j - 1)) &
326  + a(i,10) * u(10 + n * (j - 1)) &
327  + a(i,11) * u(11 + n * (j - 1)) &
328  + a(i,12) * u(12 + n * (j - 1))
329  end do
330  end do
331 
332  do i = 1, n
333  do j = 1, n
334  do l = 1, n
335  ii = l + n * (j - 1) + nn * (i - 1)
336  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
337  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
338  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
339  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
340  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
341  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
342  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
343  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
344  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
345  + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
346  + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
347  + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
348  end do
349  end do
350  end do
351 
352  do j = 1, n
353  do i = 1, nn
354  jj = i + nn * (j - 1)
355  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
356  + work2(i + nn * (2 - 1)) * ct(2, j) &
357  + work2(i + nn * (3 - 1)) * ct(3, j) &
358  + work2(i + nn * (4 - 1)) * ct(4, j) &
359  + work2(i + nn * (5 - 1)) * ct(5, j) &
360  + work2(i + nn * (6 - 1)) * ct(6, j) &
361  + work2(i + nn * (7 - 1)) * ct(7, j) &
362  + work2(i + nn * (8 - 1)) * ct(8, j) &
363  + work2(i + nn * (9 - 1)) * ct(9, j) &
364  + work2(i + nn * (10 - 1)) * ct(10, j) &
365  + work2(i + nn * (11 - 1)) * ct(11, j) &
366  + work2(i + nn * (12 - 1)) * ct(12, j)
367  end do
368  end do
369 
370  end subroutine tnsr3d_el_n12_cpu
371 
372  subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
373  integer, parameter :: n = 11
374  integer, parameter :: nn = n**2
375  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
376  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
377  real(kind=rp) :: work(n**3), work2(n**3)
378  integer :: i, j, l
379  integer :: ii, jj
380 
381  do j = 1, nn
382  do i = 1, n
383  ii = i + n * (j - 1)
384  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
385  + a(i,2) * u(2 + n * (j - 1)) &
386  + a(i,3) * u(3 + n * (j - 1)) &
387  + a(i,4) * u(4 + n * (j - 1)) &
388  + a(i,5) * u(5 + n * (j - 1)) &
389  + a(i,6) * u(6 + n * (j - 1)) &
390  + a(i,7) * u(7 + n * (j - 1)) &
391  + a(i,8) * u(8 + n * (j - 1)) &
392  + a(i,9) * u(9 + n * (j - 1)) &
393  + a(i,10) * u(10 + n * (j - 1)) &
394  + a(i,11) * u(11 + n * (j - 1))
395  end do
396  end do
397 
398  do i = 1, n
399  do j = 1, n
400  do l = 1, n
401  ii = l + n * (j - 1) + nn * (i - 1)
402  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
403  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
404  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
405  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
406  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
407  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
408  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
409  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
410  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
411  + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
412  + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
413  end do
414  end do
415  end do
416 
417  do j = 1, n
418  do i = 1, nn
419  jj = i + nn * (j - 1)
420  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
421  + work2(i + nn * (2 - 1)) * ct(2, j) &
422  + work2(i + nn * (3 - 1)) * ct(3, j) &
423  + work2(i + nn * (4 - 1)) * ct(4, j) &
424  + work2(i + nn * (5 - 1)) * ct(5, j) &
425  + work2(i + nn * (6 - 1)) * ct(6, j) &
426  + work2(i + nn * (7 - 1)) * ct(7, j) &
427  + work2(i + nn * (8 - 1)) * ct(8, j) &
428  + work2(i + nn * (9 - 1)) * ct(9, j) &
429  + work2(i + nn * (10 - 1)) * ct(10, j) &
430  + work2(i + nn * (11 - 1)) * ct(11, j)
431  end do
432  end do
433 
434  end subroutine tnsr3d_el_n11_cpu
435 
436  subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
437  integer, parameter :: n = 10
438  integer, parameter :: nn = n**2
439  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
440  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
441  real(kind=rp) :: work(n**3), work2(n**3)
442  integer :: i, j, l
443  integer :: ii, jj
444 
445  do j = 1, nn
446  do i = 1, n
447  ii = i + n * (j - 1)
448  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
449  + a(i,2) * u(2 + n * (j - 1)) &
450  + a(i,3) * u(3 + n * (j - 1)) &
451  + a(i,4) * u(4 + n * (j - 1)) &
452  + a(i,5) * u(5 + n * (j - 1)) &
453  + a(i,6) * u(6 + n * (j - 1)) &
454  + a(i,7) * u(7 + n * (j - 1)) &
455  + a(i,8) * u(8 + n * (j - 1)) &
456  + a(i,9) * u(9 + n * (j - 1)) &
457  + a(i,10) * u(10 + n * (j - 1))
458  end do
459  end do
460 
461  do i = 1, n
462  do j = 1, n
463  do l = 1, n
464  ii = l + n * (j - 1) + nn * (i - 1)
465  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
466  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
467  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
468  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
469  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
470  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
471  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
472  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
473  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
474  + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
475  end do
476  end do
477  end do
478 
479  do j = 1, n
480  do i = 1, nn
481  jj = i + nn * (j - 1)
482  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
483  + work2(i + nn * (2 - 1)) * ct(2, j) &
484  + work2(i + nn * (3 - 1)) * ct(3, j) &
485  + work2(i + nn * (4 - 1)) * ct(4, j) &
486  + work2(i + nn * (5 - 1)) * ct(5, j) &
487  + work2(i + nn * (6 - 1)) * ct(6, j) &
488  + work2(i + nn * (7 - 1)) * ct(7, j) &
489  + work2(i + nn * (8 - 1)) * ct(8, j) &
490  + work2(i + nn * (9 - 1)) * ct(9, j) &
491  + work2(i + nn * (10 - 1)) * ct(10, j)
492  end do
493  end do
494 
495  end subroutine tnsr3d_el_n10_cpu
496 
497  subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
498  integer, parameter :: n = 9
499  integer, parameter :: nn = n**2
500  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
501  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
502  real(kind=rp) :: work(n**3), work2(n**3)
503  integer :: i, j, l
504  integer :: ii, jj
505 
506  do j = 1, nn
507  do i = 1, n
508  ii = i + n * (j - 1)
509  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
510  + a(i,2) * u(2 + n * (j - 1)) &
511  + a(i,3) * u(3 + n * (j - 1)) &
512  + a(i,4) * u(4 + n * (j - 1)) &
513  + a(i,5) * u(5 + n * (j - 1)) &
514  + a(i,6) * u(6 + n * (j - 1)) &
515  + a(i,7) * u(7 + n * (j - 1)) &
516  + a(i,8) * u(8 + n * (j - 1)) &
517  + a(i,9) * u(9 + n * (j - 1))
518  end do
519  end do
520 
521  do i = 1, n
522  do j = 1, n
523  do l = 1, n
524  ii = l + n * (j - 1) + nn * (i - 1)
525  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
526  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
527  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
528  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
529  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
530  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
531  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
532  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
533  + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
534  end do
535  end do
536  end do
537 
538  do j = 1, n
539  do i = 1, nn
540  jj = i + nn * (j - 1)
541  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
542  + work2(i + nn * (2 - 1)) * ct(2, j) &
543  + work2(i + nn * (3 - 1)) * ct(3, j) &
544  + work2(i + nn * (4 - 1)) * ct(4, j) &
545  + work2(i + nn * (5 - 1)) * ct(5, j) &
546  + work2(i + nn * (6 - 1)) * ct(6, j) &
547  + work2(i + nn * (7 - 1)) * ct(7, j) &
548  + work2(i + nn * (8 - 1)) * ct(8, j) &
549  + work2(i + nn * (9 - 1)) * ct(9, j)
550  end do
551  end do
552 
553  end subroutine tnsr3d_el_n9_cpu
554 
555  subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
556  integer, parameter :: n = 8
557  integer, parameter :: nn = n**2
558  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
559  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
560  real(kind=rp) :: work(n**3), work2(n**3)
561  integer :: i, j, l
562  integer :: ii, jj
563 
564  do j = 1, nn
565  do i = 1, n
566  ii = i + n * (j - 1)
567  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
568  + a(i,2) * u(2 + n * (j - 1)) &
569  + a(i,3) * u(3 + n * (j - 1)) &
570  + a(i,4) * u(4 + n * (j - 1)) &
571  + a(i,5) * u(5 + n * (j - 1)) &
572  + a(i,6) * u(6 + n * (j - 1)) &
573  + a(i,7) * u(7 + n * (j - 1)) &
574  + a(i,8) * u(8 + n * (j - 1))
575  end do
576  end do
577 
578  do i = 1, n
579  do j = 1, n
580  do l = 1, n
581  ii = l + n * (j - 1) + nn * (i - 1)
582  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
583  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
584  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
585  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
586  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
587  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
588  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
589  + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
590  end do
591  end do
592  end do
593 
594  do j = 1, n
595  do i = 1, nn
596  jj = i + nn * (j - 1)
597  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
598  + work2(i + nn * (2 - 1)) * ct(2, j) &
599  + work2(i + nn * (3 - 1)) * ct(3, j) &
600  + work2(i + nn * (4 - 1)) * ct(4, j) &
601  + work2(i + nn * (5 - 1)) * ct(5, j) &
602  + work2(i + nn * (6 - 1)) * ct(6, j) &
603  + work2(i + nn * (7 - 1)) * ct(7, j) &
604  + work2(i + nn * (8 - 1)) * ct(8, j)
605  end do
606  end do
607 
608  end subroutine tnsr3d_el_n8_cpu
609 
610  subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
611  integer, parameter :: n = 7
612  integer, parameter :: nn = n**2
613  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
614  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
615  real(kind=rp) :: work(n**3), work2(n**3)
616  integer :: i, j, l
617  integer :: ii, jj
618 
619  do j = 1, nn
620  do i = 1, n
621  ii = i + n * (j - 1)
622  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
623  + a(i,2) * u(2 + n * (j - 1)) &
624  + a(i,3) * u(3 + n * (j - 1)) &
625  + a(i,4) * u(4 + n * (j - 1)) &
626  + a(i,5) * u(5 + n * (j - 1)) &
627  + a(i,6) * u(6 + n * (j - 1)) &
628  + a(i,7) * u(7 + n * (j - 1))
629  end do
630  end do
631 
632  do i = 1, n
633  do j = 1, n
634  do l = 1, n
635  ii = l + n * (j - 1) + nn * (i - 1)
636  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
637  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
638  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
639  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
640  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
641  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
642  + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
643  end do
644  end do
645  end do
646 
647  do j = 1, n
648  do i = 1, nn
649  jj = i + nn * (j - 1)
650  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
651  + work2(i + nn * (2 - 1)) * ct(2, j) &
652  + work2(i + nn * (3 - 1)) * ct(3, j) &
653  + work2(i + nn * (4 - 1)) * ct(4, j) &
654  + work2(i + nn * (5 - 1)) * ct(5, j) &
655  + work2(i + nn * (6 - 1)) * ct(6, j) &
656  + work2(i + nn * (7 - 1)) * ct(7, j)
657  end do
658  end do
659 
660  end subroutine tnsr3d_el_n7_cpu
661 
662  subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
663  integer, parameter :: n = 6
664  integer, parameter :: nn = n**2
665  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
666  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
667  real(kind=rp) :: work(n**3), work2(n**3)
668  integer :: i, j, l
669  integer :: ii, jj
670 
671  do j = 1, nn
672  do i = 1, n
673  ii = i + n * (j - 1)
674  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
675  + a(i,2) * u(2 + n * (j - 1)) &
676  + a(i,3) * u(3 + n * (j - 1)) &
677  + a(i,4) * u(4 + n * (j - 1)) &
678  + a(i,5) * u(5 + n * (j - 1)) &
679  + a(i,6) * u(6 + n * (j - 1))
680  end do
681  end do
682 
683  do i = 1, n
684  do j = 1, n
685  do l = 1, n
686  ii = l + n * (j - 1) + nn * (i - 1)
687  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
688  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
689  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
690  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
691  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
692  + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
693  end do
694  end do
695  end do
696 
697  do j = 1, n
698  do i = 1, nn
699  jj = i + nn * (j - 1)
700  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
701  + work2(i + nn * (2 - 1)) * ct(2, j) &
702  + work2(i + nn * (3 - 1)) * ct(3, j) &
703  + work2(i + nn * (4 - 1)) * ct(4, j) &
704  + work2(i + nn * (5 - 1)) * ct(5, j) &
705  + work2(i + nn * (6 - 1)) * ct(6, j)
706  end do
707  end do
708 
709  end subroutine tnsr3d_el_n6_cpu
710 
711  subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
712  integer, parameter :: n = 5
713  integer, parameter :: nn = n**2
714  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
715  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
716  real(kind=rp) :: work(n**3), work2(n**3)
717  integer :: i, j, l
718  integer :: ii, jj
719 
720  do j = 1, nn
721  do i = 1, n
722  ii = i + n * (j - 1)
723  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
724  + a(i,2) * u(2 + n * (j - 1)) &
725  + a(i,3) * u(3 + n * (j - 1)) &
726  + a(i,4) * u(4 + n * (j - 1)) &
727  + a(i,5) * u(5 + n * (j - 1))
728  end do
729  end do
730 
731  do i = 1, n
732  do j = 1, n
733  do l = 1, n
734  ii = l + n * (j - 1) + nn * (i - 1)
735  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
736  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
737  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
738  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
739  + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
740  end do
741  end do
742  end do
743 
744  do j = 1, n
745  do i = 1, nn
746  jj = i + nn * (j - 1)
747  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
748  + work2(i + nn * (2 - 1)) * ct(2, j) &
749  + work2(i + nn * (3 - 1)) * ct(3, j) &
750  + work2(i + nn * (4 - 1)) * ct(4, j) &
751  + work2(i + nn * (5 - 1)) * ct(5, j)
752  end do
753  end do
754 
755  end subroutine tnsr3d_el_n5_cpu
756 
757  subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
758  integer, parameter :: n = 4
759  integer, parameter :: nn = n**2
760  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
761  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
762  real(kind=rp) :: work(n**3), work2(n**3)
763  integer :: i, j, l
764  integer :: ii, jj
765 
766  do j = 1, nn
767  do i = 1, n
768  ii = i + n * (j - 1)
769  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
770  + a(i,2) * u(2 + n * (j - 1)) &
771  + a(i,3) * u(3 + n * (j - 1)) &
772  + a(i,4) * u(4 + n * (j - 1))
773  end do
774  end do
775 
776  do i = 1, n
777  do j = 1, n
778  do l = 1, n
779  ii = l + n * (j - 1) + nn * (i - 1)
780  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
781  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
782  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
783  + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
784  end do
785  end do
786  end do
787 
788  do j = 1, n
789  do i = 1, nn
790  jj = i + nn * (j - 1)
791  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
792  + work2(i + nn * (2 - 1)) * ct(2, j) &
793  + work2(i + nn * (3 - 1)) * ct(3, j) &
794  + work2(i + nn * (4 - 1)) * ct(4, j)
795  end do
796  end do
797 
798  end subroutine tnsr3d_el_n4_cpu
799 
800  subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
801  integer, parameter :: n = 3
802  integer, parameter :: nn = n**2
803  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
804  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
805  real(kind=rp) :: work(n**3), work2(n**3)
806  integer :: i, j, l
807  integer :: ii, jj
808 
809  do j = 1, nn
810  do i = 1, n
811  ii = i + n * (j - 1)
812  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
813  + a(i,2) * u(2 + n * (j - 1)) &
814  + a(i,3) * u(3 + n * (j - 1))
815  end do
816  end do
817 
818  do i = 1, n
819  do j = 1, n
820  do l = 1, n
821  ii = l + n * (j - 1) + nn * (i - 1)
822  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
823  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
824  + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
825  end do
826  end do
827  end do
828 
829  do j = 1, n
830  do i = 1, nn
831  jj = i + nn * (j - 1)
832  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
833  + work2(i + nn * (2 - 1)) * ct(2, j) &
834  + work2(i + nn * (3 - 1)) * ct(3, j)
835  end do
836  end do
837 
838  end subroutine tnsr3d_el_n3_cpu
839 
840  subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
841  integer, parameter :: n = 2
842  integer, parameter :: nn = n**2
843  real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
844  real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
845  real(kind=rp) :: work(n**3), work2(n**3)
846  integer :: i, j, l
847  integer :: ii, jj
848 
849  do j = 1, nn
850  do i = 1, n
851  ii = i + n * (j - 1)
852  work(ii) = a(i,1) * u(1 + n * (j - 1)) &
853  + a(i,2) * u(2 + n * (j - 1))
854  end do
855  end do
856 
857  do i = 1, n
858  do j = 1, n
859  do l = 1, n
860  ii = l + n * (j - 1) + nn * (i - 1)
861  work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
862  + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
863  end do
864  end do
865  end do
866 
867  do j = 1, n
868  do i = 1, nn
869  jj = i + nn * (j - 1)
870  v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
871  + work2(i + nn * (2 - 1)) * ct(2, j)
872  end do
873  end do
874 
875  end subroutine tnsr3d_el_n2_cpu
876 
877  subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
878  integer, intent(in) :: nv, nu, nelv
879  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
880  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
881 
882  if (nu .eq. 2 .and. nv .eq. 4) then
883  call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
884  else if (nu .eq. 4) then
885  call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
886  else
887  call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
888  end if
889 
890  end subroutine tnsr3d_cpu
891 
892  subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
893  integer, intent(in) :: nv, nu, nelv
894  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
895  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
896  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
897  integer :: ie, i, j, k, l, ii, jj
898  integer :: nunu, nvnu, nvnv
899 
900  nvnu = nv * nu
901  nunu = nu * nu
902  nvnv = nv * nv
903 
904  do ie = 1,nelv
905  do j = 1, nunu
906  do i = 1, nv
907  ii = i + nv * (j - 1)
908  tmp = 0.0_rp
909  do k = 1, nu
910  tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
911  end do
912  work(ii) = tmp
913  end do
914  end do
915 
916  do i = 1, nu
917  do j = 1, nv
918  do l = 1, nv
919  ii = l + nv * (j - 1) + nvnv * (i - 1)
920  tmp = 0.0_rp
921  do k = 1, nu
922  jj = l + nv * (k - 1) + nvnu * (i - 1)
923  tmp = tmp + work(jj) * bt(k,j)
924  end do
925  work2(ii) = tmp
926  end do
927  end do
928  end do
929 
930  do j = 1, nv
931  do i = 1, nvnv
932  jj = i + nvnv * (j - 1)
933  tmp = 0.0_rp
934  do k = 1, nu
935  ii = i + nvnv * (k - 1)
936  tmp = tmp + work2(ii) * ct(k, j)
937  end do
938  v(jj, ie) = tmp
939  end do
940  end do
941  end do
942 
943  end subroutine tnsr3d_nvnu_cpu
944 
945  subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
946  integer, parameter :: nu = 2
947  integer, parameter :: nv = 4
948  integer, parameter :: nunu = 4
949  integer, parameter :: nvnu = 8
950  integer, parameter :: nvnv = 16
951  integer, intent(in) :: nelv
952  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
953  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
954  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
955  integer :: ie, i, j, k, l, ii, jj
956 
957  do ie = 1,nelv
958  do j = 1, nunu
959  do i = 1, nv
960  ii = i + nv * (j - 1)
961  work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
962  + a(i,2) * u(2 + nu * (j - 1), ie)
963  end do
964  end do
965 
966  do i = 1, nu
967  do j = 1, nv
968  do l = 1, nv
969  ii = l + nv * (j - 1) + nvnv * (i - 1)
970  tmp = 0.0_rp
971  do k = 1, nu
972  jj = l + nv * (k - 1) + nvnu * (i - 1)
973  tmp = tmp + work(jj) * bt(k,j)
974  end do
975  work2(ii) = tmp
976  end do
977  end do
978  end do
979 
980  do j = 1, nv
981  do i = 1, nvnv
982  jj = i + nvnv * (j - 1)
983  v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
984  + work2(i + nvnv * (2 - 1)) * ct(2, j)
985  end do
986  end do
987  end do
988 
989  end subroutine tnsr3d_nu2nv4_cpu
990 
991  subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
992  integer, parameter :: nu = 4
993  integer, parameter :: nunu = 16
994  integer, intent(in) :: nv, nelv
995  real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
996  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
997  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
998  integer :: ie, i, j, k, l, ii, jj
999  integer :: nvnu, nvnv
1000 
1001  nvnu = nv * nu
1002  nvnv = nv * nv
1003 
1004  do ie = 1,nelv
1005  do j = 1, nunu
1006  do i = 1, nv
1007  ii = i + nv * (j - 1)
1008  work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1009  + a(i,2) * u(2 + nu * (j - 1), ie) &
1010  + a(i,3) * u(3 + nu * (j - 1), ie) &
1011  + a(i,4) * u(4 + nu * (j - 1), ie)
1012  end do
1013  end do
1014 
1015  do i = 1, nu
1016  do j = 1, nv
1017  do l = 1, nv
1018  ii = l + nv * (j - 1) + nvnv * (i - 1)
1019  tmp = 0.0_rp
1020  do k = 1, nu
1021  jj = l + nv * (k - 1) + nvnu * (i - 1)
1022  tmp = tmp + work(jj) * bt(k,j)
1023  end do
1024  work2(ii) = tmp
1025  end do
1026  end do
1027  end do
1028 
1029  do j = 1, nv
1030  do i = 1, nvnv
1031  jj = i + nvnv * (j - 1)
1032  v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1033  + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1034  + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1035  + work2(i + nvnv * (4 - 1)) * ct(4, j)
1036  end do
1037  end do
1038  end do
1039 
1040  end subroutine tnsr3d_nu4_cpu
1041 
1042  subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1043  integer, intent(in) :: nv, nu, nelv
1044  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1045  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1046 
1047  if (nu .eq. 4 .and. nv .eq. 2) then
1048  call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1049  else
1050  call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1051  end if
1052 
1053  end subroutine tnsr1_3d_cpu
1054 
1055  subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1056  integer, intent(in) :: nv, nu, nelv
1057  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1058  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1059  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1060  integer :: e, e0, ee, es, iu, iv, nu3, nv3
1061  integer :: i, j, k, l, ii, jj, kk
1062  integer :: nunu, nvnu, nvnv
1063  real(kind=rp) :: tmp
1064 
1065  nvnu = nv * nu
1066  nunu = nu * nu
1067  nvnv = nv * nv
1068 
1069  e0 = 1
1070  es = 1
1071  ee = nelv
1072 
1073  if (nv.gt.nu) then
1074  e0 = nelv
1075  es = -1
1076  ee = 1
1077  endif
1078 
1079  nu3 = nu**3
1080  nv3 = nv**3
1081 
1082  do e = e0,ee,es
1083  iu = (e-1)*nu3
1084  iv = (e-1)*nv3
1085 
1086  do j = 1, nunu
1087  do i = 1, nv
1088  ii = i + nv * (j - 1)
1089  tmp = 0.0_rp
1090  do k = 1, nu
1091  kk = k + nu * (j - 1) + iu
1092  tmp = tmp + a(i,k) * v(kk)
1093  end do
1094  work(ii) = tmp
1095  end do
1096  end do
1097 
1098  do i = 1, nu
1099  do j = 1, nv
1100  do l = 1, nv
1101  ii = l + nv * (j - 1) + nvnv * (i - 1)
1102  tmp = 0.0_rp
1103  do k = 1, nu
1104  jj = l + nv * (k - 1) + nvnu * (i - 1)
1105  tmp = tmp + work(jj) * bt(k,j)
1106  end do
1107  work2(ii) = tmp
1108  end do
1109  end do
1110  end do
1111 
1112  do j = 1, nv
1113  do i = 1, nvnv
1114  jj = i + nvnv * (j - 1) + iv
1115  tmp = 0.0_rp
1116  do k = 1, nu
1117  ii = i + nvnv * (k - 1)
1118  tmp = tmp + work2(ii) * ct(k, j)
1119  end do
1120  v(jj) = tmp
1121  end do
1122  end do
1123  end do
1124 
1125  end subroutine tnsr1_3d_nvnu_cpu
1126 
1127  subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1128  integer, parameter :: nu = 4
1129  integer, parameter :: nv = 2
1130  integer, parameter :: nunu = 16
1131  integer, parameter :: nvnu = 8
1132  integer, parameter :: nvnv = 4
1133  integer, parameter :: nununu = 64
1134  integer, parameter :: nvnvnv = 8
1135  integer, intent(in) :: nelv
1136  real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1137  real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1138  real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1139  integer :: e, iu, iv
1140  integer :: i, j, k, l, ii, jj
1141  real(kind=rp) :: tmp
1142 
1143  do e = 1,nelv
1144  iu = (e-1)*nununu
1145  iv = (e-1)*nvnvnv
1146 
1147  do j = 1, nunu
1148  do i = 1, nv
1149  ii = i + nv * (j - 1)
1150  work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1151  + a(i,2) * v(2 + nu * (j - 1) + iu) &
1152  + a(i,3) * v(3 + nu * (j - 1) + iu) &
1153  + a(i,4) * v(4 + nu * (j - 1) + iu)
1154  end do
1155  end do
1156 
1157  do i = 1, nu
1158  do j = 1, nv
1159  do l = 1, nv
1160  ii = l + nv * (j - 1) + nvnv * (i - 1)
1161  tmp = 0.0_rp
1162  do k = 1, nu
1163  jj = l + nv * (k - 1) + nvnu * (i - 1)
1164  tmp = tmp + work(jj) * bt(k,j)
1165  end do
1166  work2(ii) = tmp
1167  end do
1168  end do
1169  end do
1170 
1171  do j = 1, nv
1172  do i = 1, nvnv
1173  jj = i + nvnv * (j - 1) + iv
1174  v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1175  + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1176  + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1177  + work2(i + nvnv * (4 - 1)) * ct(4, j)
1178 
1179  end do
1180  end do
1181  end do
1182 
1183  end subroutine tnsr1_3d_nu4nv2_cpu
1184 
1185 end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:758
subroutine, public tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
Definition: tensor_cpu.f90:12
subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:611
subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:663
subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:556
subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:163
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_cpu.f90:65
subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:712
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_cpu.f90:893
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:437
subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:373
subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:306
subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:801
subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
Definition: tensor_cpu.f90:946
subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:841
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
Definition: tensor_cpu.f90:23
subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
Definition: tensor_cpu.f90:116
subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:498
subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
Definition: tensor_cpu.f90:992
subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
Definition: tensor_cpu.f90:236
subroutine, public tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
Definition: tensor_cpu.f90:878