Neko 0.9.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else
59 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
60 end if
61
62 end subroutine tnsr3d_el_cpu
63
64 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
65 integer, intent(in) :: nv, nu
66 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
67 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
68 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
69 real(kind=rp) :: tmp
70 integer :: i, j, k, l, nunu, nvnu, nvnv
71 integer :: ii, jj
72 nvnu = nv * nu
73 nunu = nu * nu
74 nvnv = nv * nv
75
76 do j = 1, nunu
77 do i = 1, nv
78 ii = i + nv * (j - 1)
79 tmp = 0.0_rp
80 do k = 1, nu
81 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
82 end do
83 work(ii) = tmp
84 end do
85 end do
86
87 do i = 1, nu
88 do j = 1, nv
89 do l = 1, nv
90 ii = l + nv * (j - 1) + nvnv * (i - 1)
91 tmp = 0.0_rp
92 do k = 1, nu
93 jj = l + nv * (k - 1) + nvnu * (i - 1)
94 tmp = tmp + work(jj) * bt(k,j)
95 end do
96 work2(ii) = tmp
97 end do
98 end do
99 end do
100
101 do j = 1, nv
102 do i = 1, nvnv
103 jj = i + nvnv * (j - 1)
104 tmp = 0.0_rp
105 do k = 1, nu
106 ii = i + nvnv * (k - 1)
107 tmp = tmp + work2(ii) * ct(k, j)
108 end do
109 v(jj) = tmp
110 end do
111 end do
112
113 end subroutine tnsr3d_el_nvnu_cpu
114
115 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
116 integer, intent(in) :: n
117 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
118 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
119 real(kind=rp) :: work(n**3), work2(n**3), tmp
120 integer :: i, j, l, k
121 integer :: ii, jj, nn
122
123 nn = n**2
124
125 do j = 1, nn
126 do i = 1, n
127 ii = i + n * (j - 1)
128 tmp = 0.0_rp
129 do k = 1, n
130 tmp = tmp + a(i,k) * u(k + n * (j - 1))
131 end do
132 work(ii) = tmp
133 end do
134 end do
135
136 do i = 1, n
137 do j = 1, n
138 do l = 1, n
139 ii = l + n * (j - 1) + nn * (i - 1)
140 tmp = 0.0_rp
141 do k = 1, n
142 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
143 end do
144 work2(ii) = tmp
145 end do
146 end do
147 end do
148
149 do j = 1, n
150 do i = 1, nn
151 jj = i + nn * (j - 1)
152 tmp = 0.0_rp
153 do k = 1, n
154 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
155 end do
156 v(jj) = tmp
157 end do
158 end do
159
160 end subroutine tnsr3d_el_n_cpu
161
162 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
163 integer, parameter :: n = 14
164 integer, parameter :: nn = n**2
165 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
166 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
167 real(kind=rp) :: work(n**3), work2(n**3)
168 integer :: i, j, l
169 integer :: ii, jj
170
171 do j = 1, nn
172 do i = 1, n
173 ii = i + n * (j - 1)
174 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
175 + a(i,2) * u(2 + n * (j - 1)) &
176 + a(i,3) * u(3 + n * (j - 1)) &
177 + a(i,4) * u(4 + n * (j - 1)) &
178 + a(i,5) * u(5 + n * (j - 1)) &
179 + a(i,6) * u(6 + n * (j - 1)) &
180 + a(i,7) * u(7 + n * (j - 1)) &
181 + a(i,8) * u(8 + n * (j - 1)) &
182 + a(i,9) * u(9 + n * (j - 1)) &
183 + a(i,10) * u(10 + n * (j - 1)) &
184 + a(i,11) * u(11 + n * (j - 1)) &
185 + a(i,12) * u(12 + n * (j - 1)) &
186 + a(i,13) * u(13 + n * (j - 1)) &
187 + a(i,14) * u(14 + n * (j - 1))
188 end do
189 end do
190
191 do i = 1, n
192 do j = 1, n
193 do l = 1, n
194 ii = l + n * (j - 1) + nn * (i - 1)
195 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
196 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
197 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
198 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
199 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
200 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
201 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
202 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
203 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
204 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
205 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
206 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
207 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
208 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
209 end do
210 end do
211 end do
212
213 do j = 1, n
214 do i = 1, nn
215 jj = i + nn * (j - 1)
216 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
217 + work2(i + nn * (2 - 1)) * ct(2, j) &
218 + work2(i + nn * (3 - 1)) * ct(3, j) &
219 + work2(i + nn * (4 - 1)) * ct(4, j) &
220 + work2(i + nn * (5 - 1)) * ct(5, j) &
221 + work2(i + nn * (6 - 1)) * ct(6, j) &
222 + work2(i + nn * (7 - 1)) * ct(7, j) &
223 + work2(i + nn * (8 - 1)) * ct(8, j) &
224 + work2(i + nn * (9 - 1)) * ct(9, j) &
225 + work2(i + nn * (10 - 1)) * ct(10, j) &
226 + work2(i + nn * (11 - 1)) * ct(11, j) &
227 + work2(i + nn * (12 - 1)) * ct(12, j) &
228 + work2(i + nn * (13 - 1)) * ct(13, j) &
229 + work2(i + nn * (14 - 1)) * ct(14, j)
230 end do
231 end do
232
233 end subroutine tnsr3d_el_n14_cpu
234
235 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
236 integer, parameter :: n = 13
237 integer, parameter :: nn = n**2
238 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
239 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
240 real(kind=rp) :: work(n**3), work2(n**3)
241 integer :: i, j, l
242 integer :: ii, jj
243
244 do j = 1, nn
245 do i = 1, n
246 ii = i + n * (j - 1)
247 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
248 + a(i,2) * u(2 + n * (j - 1)) &
249 + a(i,3) * u(3 + n * (j - 1)) &
250 + a(i,4) * u(4 + n * (j - 1)) &
251 + a(i,5) * u(5 + n * (j - 1)) &
252 + a(i,6) * u(6 + n * (j - 1)) &
253 + a(i,7) * u(7 + n * (j - 1)) &
254 + a(i,8) * u(8 + n * (j - 1)) &
255 + a(i,9) * u(9 + n * (j - 1)) &
256 + a(i,10) * u(10 + n * (j - 1)) &
257 + a(i,11) * u(11 + n * (j - 1)) &
258 + a(i,12) * u(12 + n * (j - 1)) &
259 + a(i,13) * u(13 + n * (j - 1))
260 end do
261 end do
262
263 do i = 1, n
264 do j = 1, n
265 do l = 1, n
266 ii = l + n * (j - 1) + nn * (i - 1)
267 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
268 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
269 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
270 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
271 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
272 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
273 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
274 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
275 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
276 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
277 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
278 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
279 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
280 end do
281 end do
282 end do
283
284 do j = 1, n
285 do i = 1, nn
286 jj = i + nn * (j - 1)
287 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
288 + work2(i + nn * (2 - 1)) * ct(2, j) &
289 + work2(i + nn * (3 - 1)) * ct(3, j) &
290 + work2(i + nn * (4 - 1)) * ct(4, j) &
291 + work2(i + nn * (5 - 1)) * ct(5, j) &
292 + work2(i + nn * (6 - 1)) * ct(6, j) &
293 + work2(i + nn * (7 - 1)) * ct(7, j) &
294 + work2(i + nn * (8 - 1)) * ct(8, j) &
295 + work2(i + nn * (9 - 1)) * ct(9, j) &
296 + work2(i + nn * (10 - 1)) * ct(10, j) &
297 + work2(i + nn * (11 - 1)) * ct(11, j) &
298 + work2(i + nn * (12 - 1)) * ct(12, j) &
299 + work2(i + nn * (13 - 1)) * ct(13, j)
300 end do
301 end do
302
303 end subroutine tnsr3d_el_n13_cpu
304
305 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
306 integer, parameter :: n = 12
307 integer, parameter :: nn = n**2
308 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
309 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
310 real(kind=rp) :: work(n**3), work2(n**3)
311 integer :: i, j, l
312 integer :: ii, jj
313
314 do j = 1, nn
315 do i = 1, n
316 ii = i + n * (j - 1)
317 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
318 + a(i,2) * u(2 + n * (j - 1)) &
319 + a(i,3) * u(3 + n * (j - 1)) &
320 + a(i,4) * u(4 + n * (j - 1)) &
321 + a(i,5) * u(5 + n * (j - 1)) &
322 + a(i,6) * u(6 + n * (j - 1)) &
323 + a(i,7) * u(7 + n * (j - 1)) &
324 + a(i,8) * u(8 + n * (j - 1)) &
325 + a(i,9) * u(9 + n * (j - 1)) &
326 + a(i,10) * u(10 + n * (j - 1)) &
327 + a(i,11) * u(11 + n * (j - 1)) &
328 + a(i,12) * u(12 + n * (j - 1))
329 end do
330 end do
331
332 do i = 1, n
333 do j = 1, n
334 do l = 1, n
335 ii = l + n * (j - 1) + nn * (i - 1)
336 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
337 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
338 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
339 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
340 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
341 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
342 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
343 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
344 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
345 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
346 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
347 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
348 end do
349 end do
350 end do
351
352 do j = 1, n
353 do i = 1, nn
354 jj = i + nn * (j - 1)
355 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
356 + work2(i + nn * (2 - 1)) * ct(2, j) &
357 + work2(i + nn * (3 - 1)) * ct(3, j) &
358 + work2(i + nn * (4 - 1)) * ct(4, j) &
359 + work2(i + nn * (5 - 1)) * ct(5, j) &
360 + work2(i + nn * (6 - 1)) * ct(6, j) &
361 + work2(i + nn * (7 - 1)) * ct(7, j) &
362 + work2(i + nn * (8 - 1)) * ct(8, j) &
363 + work2(i + nn * (9 - 1)) * ct(9, j) &
364 + work2(i + nn * (10 - 1)) * ct(10, j) &
365 + work2(i + nn * (11 - 1)) * ct(11, j) &
366 + work2(i + nn * (12 - 1)) * ct(12, j)
367 end do
368 end do
369
370 end subroutine tnsr3d_el_n12_cpu
371
372 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
373 integer, parameter :: n = 11
374 integer, parameter :: nn = n**2
375 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
376 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
377 real(kind=rp) :: work(n**3), work2(n**3)
378 integer :: i, j, l
379 integer :: ii, jj
380
381 do j = 1, nn
382 do i = 1, n
383 ii = i + n * (j - 1)
384 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
385 + a(i,2) * u(2 + n * (j - 1)) &
386 + a(i,3) * u(3 + n * (j - 1)) &
387 + a(i,4) * u(4 + n * (j - 1)) &
388 + a(i,5) * u(5 + n * (j - 1)) &
389 + a(i,6) * u(6 + n * (j - 1)) &
390 + a(i,7) * u(7 + n * (j - 1)) &
391 + a(i,8) * u(8 + n * (j - 1)) &
392 + a(i,9) * u(9 + n * (j - 1)) &
393 + a(i,10) * u(10 + n * (j - 1)) &
394 + a(i,11) * u(11 + n * (j - 1))
395 end do
396 end do
397
398 do i = 1, n
399 do j = 1, n
400 do l = 1, n
401 ii = l + n * (j - 1) + nn * (i - 1)
402 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
403 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
404 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
405 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
406 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
407 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
408 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
409 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
410 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
411 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
412 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
413 end do
414 end do
415 end do
416
417 do j = 1, n
418 do i = 1, nn
419 jj = i + nn * (j - 1)
420 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
421 + work2(i + nn * (2 - 1)) * ct(2, j) &
422 + work2(i + nn * (3 - 1)) * ct(3, j) &
423 + work2(i + nn * (4 - 1)) * ct(4, j) &
424 + work2(i + nn * (5 - 1)) * ct(5, j) &
425 + work2(i + nn * (6 - 1)) * ct(6, j) &
426 + work2(i + nn * (7 - 1)) * ct(7, j) &
427 + work2(i + nn * (8 - 1)) * ct(8, j) &
428 + work2(i + nn * (9 - 1)) * ct(9, j) &
429 + work2(i + nn * (10 - 1)) * ct(10, j) &
430 + work2(i + nn * (11 - 1)) * ct(11, j)
431 end do
432 end do
433
434 end subroutine tnsr3d_el_n11_cpu
435
436 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
437 integer, parameter :: n = 10
438 integer, parameter :: nn = n**2
439 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
440 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
441 real(kind=rp) :: work(n**3), work2(n**3)
442 integer :: i, j, l
443 integer :: ii, jj
444
445 do j = 1, nn
446 do i = 1, n
447 ii = i + n * (j - 1)
448 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
449 + a(i,2) * u(2 + n * (j - 1)) &
450 + a(i,3) * u(3 + n * (j - 1)) &
451 + a(i,4) * u(4 + n * (j - 1)) &
452 + a(i,5) * u(5 + n * (j - 1)) &
453 + a(i,6) * u(6 + n * (j - 1)) &
454 + a(i,7) * u(7 + n * (j - 1)) &
455 + a(i,8) * u(8 + n * (j - 1)) &
456 + a(i,9) * u(9 + n * (j - 1)) &
457 + a(i,10) * u(10 + n * (j - 1))
458 end do
459 end do
460
461 do i = 1, n
462 do j = 1, n
463 do l = 1, n
464 ii = l + n * (j - 1) + nn * (i - 1)
465 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
466 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
467 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
468 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
469 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
470 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
471 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
472 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
473 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
474 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
475 end do
476 end do
477 end do
478
479 do j = 1, n
480 do i = 1, nn
481 jj = i + nn * (j - 1)
482 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
483 + work2(i + nn * (2 - 1)) * ct(2, j) &
484 + work2(i + nn * (3 - 1)) * ct(3, j) &
485 + work2(i + nn * (4 - 1)) * ct(4, j) &
486 + work2(i + nn * (5 - 1)) * ct(5, j) &
487 + work2(i + nn * (6 - 1)) * ct(6, j) &
488 + work2(i + nn * (7 - 1)) * ct(7, j) &
489 + work2(i + nn * (8 - 1)) * ct(8, j) &
490 + work2(i + nn * (9 - 1)) * ct(9, j) &
491 + work2(i + nn * (10 - 1)) * ct(10, j)
492 end do
493 end do
494
495 end subroutine tnsr3d_el_n10_cpu
496
497 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
498 integer, parameter :: n = 9
499 integer, parameter :: nn = n**2
500 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
501 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
502 real(kind=rp) :: work(n**3), work2(n**3)
503 integer :: i, j, l
504 integer :: ii, jj
505
506 do j = 1, nn
507 do i = 1, n
508 ii = i + n * (j - 1)
509 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
510 + a(i,2) * u(2 + n * (j - 1)) &
511 + a(i,3) * u(3 + n * (j - 1)) &
512 + a(i,4) * u(4 + n * (j - 1)) &
513 + a(i,5) * u(5 + n * (j - 1)) &
514 + a(i,6) * u(6 + n * (j - 1)) &
515 + a(i,7) * u(7 + n * (j - 1)) &
516 + a(i,8) * u(8 + n * (j - 1)) &
517 + a(i,9) * u(9 + n * (j - 1))
518 end do
519 end do
520
521 do i = 1, n
522 do j = 1, n
523 do l = 1, n
524 ii = l + n * (j - 1) + nn * (i - 1)
525 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
526 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
527 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
528 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
529 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
530 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
531 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
532 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
533 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
534 end do
535 end do
536 end do
537
538 do j = 1, n
539 do i = 1, nn
540 jj = i + nn * (j - 1)
541 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
542 + work2(i + nn * (2 - 1)) * ct(2, j) &
543 + work2(i + nn * (3 - 1)) * ct(3, j) &
544 + work2(i + nn * (4 - 1)) * ct(4, j) &
545 + work2(i + nn * (5 - 1)) * ct(5, j) &
546 + work2(i + nn * (6 - 1)) * ct(6, j) &
547 + work2(i + nn * (7 - 1)) * ct(7, j) &
548 + work2(i + nn * (8 - 1)) * ct(8, j) &
549 + work2(i + nn * (9 - 1)) * ct(9, j)
550 end do
551 end do
552
553 end subroutine tnsr3d_el_n9_cpu
554
555 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
556 integer, parameter :: n = 8
557 integer, parameter :: nn = n**2
558 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
559 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
560 real(kind=rp) :: work(n**3), work2(n**3)
561 integer :: i, j, l
562 integer :: ii, jj
563
564 do j = 1, nn
565 do i = 1, n
566 ii = i + n * (j - 1)
567 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
568 + a(i,2) * u(2 + n * (j - 1)) &
569 + a(i,3) * u(3 + n * (j - 1)) &
570 + a(i,4) * u(4 + n * (j - 1)) &
571 + a(i,5) * u(5 + n * (j - 1)) &
572 + a(i,6) * u(6 + n * (j - 1)) &
573 + a(i,7) * u(7 + n * (j - 1)) &
574 + a(i,8) * u(8 + n * (j - 1))
575 end do
576 end do
577
578 do i = 1, n
579 do j = 1, n
580 do l = 1, n
581 ii = l + n * (j - 1) + nn * (i - 1)
582 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
583 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
584 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
585 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
586 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
587 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
588 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
589 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
590 end do
591 end do
592 end do
593
594 do j = 1, n
595 do i = 1, nn
596 jj = i + nn * (j - 1)
597 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
598 + work2(i + nn * (2 - 1)) * ct(2, j) &
599 + work2(i + nn * (3 - 1)) * ct(3, j) &
600 + work2(i + nn * (4 - 1)) * ct(4, j) &
601 + work2(i + nn * (5 - 1)) * ct(5, j) &
602 + work2(i + nn * (6 - 1)) * ct(6, j) &
603 + work2(i + nn * (7 - 1)) * ct(7, j) &
604 + work2(i + nn * (8 - 1)) * ct(8, j)
605 end do
606 end do
607
608 end subroutine tnsr3d_el_n8_cpu
609
610 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
611 integer, parameter :: n = 7
612 integer, parameter :: nn = n**2
613 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
614 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
615 real(kind=rp) :: work(n**3), work2(n**3)
616 integer :: i, j, l
617 integer :: ii, jj
618
619 do j = 1, nn
620 do i = 1, n
621 ii = i + n * (j - 1)
622 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
623 + a(i,2) * u(2 + n * (j - 1)) &
624 + a(i,3) * u(3 + n * (j - 1)) &
625 + a(i,4) * u(4 + n * (j - 1)) &
626 + a(i,5) * u(5 + n * (j - 1)) &
627 + a(i,6) * u(6 + n * (j - 1)) &
628 + a(i,7) * u(7 + n * (j - 1))
629 end do
630 end do
631
632 do i = 1, n
633 do j = 1, n
634 do l = 1, n
635 ii = l + n * (j - 1) + nn * (i - 1)
636 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
637 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
638 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
639 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
640 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
641 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
642 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
643 end do
644 end do
645 end do
646
647 do j = 1, n
648 do i = 1, nn
649 jj = i + nn * (j - 1)
650 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
651 + work2(i + nn * (2 - 1)) * ct(2, j) &
652 + work2(i + nn * (3 - 1)) * ct(3, j) &
653 + work2(i + nn * (4 - 1)) * ct(4, j) &
654 + work2(i + nn * (5 - 1)) * ct(5, j) &
655 + work2(i + nn * (6 - 1)) * ct(6, j) &
656 + work2(i + nn * (7 - 1)) * ct(7, j)
657 end do
658 end do
659
660 end subroutine tnsr3d_el_n7_cpu
661
662 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
663 integer, parameter :: n = 6
664 integer, parameter :: nn = n**2
665 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
666 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
667 real(kind=rp) :: work(n**3), work2(n**3)
668 integer :: i, j, l
669 integer :: ii, jj
670
671 do j = 1, nn
672 do i = 1, n
673 ii = i + n * (j - 1)
674 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
675 + a(i,2) * u(2 + n * (j - 1)) &
676 + a(i,3) * u(3 + n * (j - 1)) &
677 + a(i,4) * u(4 + n * (j - 1)) &
678 + a(i,5) * u(5 + n * (j - 1)) &
679 + a(i,6) * u(6 + n * (j - 1))
680 end do
681 end do
682
683 do i = 1, n
684 do j = 1, n
685 do l = 1, n
686 ii = l + n * (j - 1) + nn * (i - 1)
687 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
688 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
689 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
690 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
691 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
692 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
693 end do
694 end do
695 end do
696
697 do j = 1, n
698 do i = 1, nn
699 jj = i + nn * (j - 1)
700 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
701 + work2(i + nn * (2 - 1)) * ct(2, j) &
702 + work2(i + nn * (3 - 1)) * ct(3, j) &
703 + work2(i + nn * (4 - 1)) * ct(4, j) &
704 + work2(i + nn * (5 - 1)) * ct(5, j) &
705 + work2(i + nn * (6 - 1)) * ct(6, j)
706 end do
707 end do
708
709 end subroutine tnsr3d_el_n6_cpu
710
711 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
712 integer, parameter :: n = 5
713 integer, parameter :: nn = n**2
714 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
715 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
716 real(kind=rp) :: work(n**3), work2(n**3)
717 integer :: i, j, l
718 integer :: ii, jj
719
720 do j = 1, nn
721 do i = 1, n
722 ii = i + n * (j - 1)
723 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
724 + a(i,2) * u(2 + n * (j - 1)) &
725 + a(i,3) * u(3 + n * (j - 1)) &
726 + a(i,4) * u(4 + n * (j - 1)) &
727 + a(i,5) * u(5 + n * (j - 1))
728 end do
729 end do
730
731 do i = 1, n
732 do j = 1, n
733 do l = 1, n
734 ii = l + n * (j - 1) + nn * (i - 1)
735 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
736 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
737 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
738 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
739 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
740 end do
741 end do
742 end do
743
744 do j = 1, n
745 do i = 1, nn
746 jj = i + nn * (j - 1)
747 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
748 + work2(i + nn * (2 - 1)) * ct(2, j) &
749 + work2(i + nn * (3 - 1)) * ct(3, j) &
750 + work2(i + nn * (4 - 1)) * ct(4, j) &
751 + work2(i + nn * (5 - 1)) * ct(5, j)
752 end do
753 end do
754
755 end subroutine tnsr3d_el_n5_cpu
756
757 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
758 integer, parameter :: n = 4
759 integer, parameter :: nn = n**2
760 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
761 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
762 real(kind=rp) :: work(n**3), work2(n**3)
763 integer :: i, j, l
764 integer :: ii, jj
765
766 do j = 1, nn
767 do i = 1, n
768 ii = i + n * (j - 1)
769 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
770 + a(i,2) * u(2 + n * (j - 1)) &
771 + a(i,3) * u(3 + n * (j - 1)) &
772 + a(i,4) * u(4 + n * (j - 1))
773 end do
774 end do
775
776 do i = 1, n
777 do j = 1, n
778 do l = 1, n
779 ii = l + n * (j - 1) + nn * (i - 1)
780 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
781 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
782 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
783 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
784 end do
785 end do
786 end do
787
788 do j = 1, n
789 do i = 1, nn
790 jj = i + nn * (j - 1)
791 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
792 + work2(i + nn * (2 - 1)) * ct(2, j) &
793 + work2(i + nn * (3 - 1)) * ct(3, j) &
794 + work2(i + nn * (4 - 1)) * ct(4, j)
795 end do
796 end do
797
798 end subroutine tnsr3d_el_n4_cpu
799
800 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
801 integer, parameter :: n = 3
802 integer, parameter :: nn = n**2
803 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
804 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
805 real(kind=rp) :: work(n**3), work2(n**3)
806 integer :: i, j, l
807 integer :: ii, jj
808
809 do j = 1, nn
810 do i = 1, n
811 ii = i + n * (j - 1)
812 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
813 + a(i,2) * u(2 + n * (j - 1)) &
814 + a(i,3) * u(3 + n * (j - 1))
815 end do
816 end do
817
818 do i = 1, n
819 do j = 1, n
820 do l = 1, n
821 ii = l + n * (j - 1) + nn * (i - 1)
822 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
823 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
824 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
825 end do
826 end do
827 end do
828
829 do j = 1, n
830 do i = 1, nn
831 jj = i + nn * (j - 1)
832 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
833 + work2(i + nn * (2 - 1)) * ct(2, j) &
834 + work2(i + nn * (3 - 1)) * ct(3, j)
835 end do
836 end do
837
838 end subroutine tnsr3d_el_n3_cpu
839
840 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
841 integer, parameter :: n = 2
842 integer, parameter :: nn = n**2
843 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
844 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
845 real(kind=rp) :: work(n**3), work2(n**3)
846 integer :: i, j, l
847 integer :: ii, jj
848
849 do j = 1, nn
850 do i = 1, n
851 ii = i + n * (j - 1)
852 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
853 + a(i,2) * u(2 + n * (j - 1))
854 end do
855 end do
856
857 do i = 1, n
858 do j = 1, n
859 do l = 1, n
860 ii = l + n * (j - 1) + nn * (i - 1)
861 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
862 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
863 end do
864 end do
865 end do
866
867 do j = 1, n
868 do i = 1, nn
869 jj = i + nn * (j - 1)
870 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
871 + work2(i + nn * (2 - 1)) * ct(2, j)
872 end do
873 end do
874
875 end subroutine tnsr3d_el_n2_cpu
876
877 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
878 integer, intent(in) :: nv, nu, nelv
879 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
880 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
881
882 if (nu .eq. 2 .and. nv .eq. 4) then
883 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
884 else if (nu .eq. 4) then
885 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
886 else
887 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
888 end if
889
890 end subroutine tnsr3d_cpu
891
892 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
893 integer, intent(in) :: nv, nu, nelv
894 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
895 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
896 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
897 integer :: ie, i, j, k, l, ii, jj
898 integer :: nunu, nvnu, nvnv
899
900 nvnu = nv * nu
901 nunu = nu * nu
902 nvnv = nv * nv
903
904 do ie = 1,nelv
905 do j = 1, nunu
906 do i = 1, nv
907 ii = i + nv * (j - 1)
908 tmp = 0.0_rp
909 do k = 1, nu
910 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
911 end do
912 work(ii) = tmp
913 end do
914 end do
915
916 do i = 1, nu
917 do j = 1, nv
918 do l = 1, nv
919 ii = l + nv * (j - 1) + nvnv * (i - 1)
920 tmp = 0.0_rp
921 do k = 1, nu
922 jj = l + nv * (k - 1) + nvnu * (i - 1)
923 tmp = tmp + work(jj) * bt(k,j)
924 end do
925 work2(ii) = tmp
926 end do
927 end do
928 end do
929
930 do j = 1, nv
931 do i = 1, nvnv
932 jj = i + nvnv * (j - 1)
933 tmp = 0.0_rp
934 do k = 1, nu
935 ii = i + nvnv * (k - 1)
936 tmp = tmp + work2(ii) * ct(k, j)
937 end do
938 v(jj, ie) = tmp
939 end do
940 end do
941 end do
942
943 end subroutine tnsr3d_nvnu_cpu
944
945 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
946 integer, parameter :: nu = 2
947 integer, parameter :: nv = 4
948 integer, parameter :: nunu = 4
949 integer, parameter :: nvnu = 8
950 integer, parameter :: nvnv = 16
951 integer, intent(in) :: nelv
952 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
953 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
954 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
955 integer :: ie, i, j, k, l, ii, jj
956
957 do ie = 1,nelv
958 do j = 1, nunu
959 do i = 1, nv
960 ii = i + nv * (j - 1)
961 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
962 + a(i,2) * u(2 + nu * (j - 1), ie)
963 end do
964 end do
965
966 do i = 1, nu
967 do j = 1, nv
968 do l = 1, nv
969 ii = l + nv * (j - 1) + nvnv * (i - 1)
970 tmp = 0.0_rp
971 do k = 1, nu
972 jj = l + nv * (k - 1) + nvnu * (i - 1)
973 tmp = tmp + work(jj) * bt(k,j)
974 end do
975 work2(ii) = tmp
976 end do
977 end do
978 end do
979
980 do j = 1, nv
981 do i = 1, nvnv
982 jj = i + nvnv * (j - 1)
983 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
984 + work2(i + nvnv * (2 - 1)) * ct(2, j)
985 end do
986 end do
987 end do
988
989 end subroutine tnsr3d_nu2nv4_cpu
990
991 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
992 integer, parameter :: nu = 4
993 integer, parameter :: nunu = 16
994 integer, intent(in) :: nv, nelv
995 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
996 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
997 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
998 integer :: ie, i, j, k, l, ii, jj
999 integer :: nvnu, nvnv
1000
1001 nvnu = nv * nu
1002 nvnv = nv * nv
1003
1004 do ie = 1,nelv
1005 do j = 1, nunu
1006 do i = 1, nv
1007 ii = i + nv * (j - 1)
1008 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1009 + a(i,2) * u(2 + nu * (j - 1), ie) &
1010 + a(i,3) * u(3 + nu * (j - 1), ie) &
1011 + a(i,4) * u(4 + nu * (j - 1), ie)
1012 end do
1013 end do
1014
1015 do i = 1, nu
1016 do j = 1, nv
1017 do l = 1, nv
1018 ii = l + nv * (j - 1) + nvnv * (i - 1)
1019 tmp = 0.0_rp
1020 do k = 1, nu
1021 jj = l + nv * (k - 1) + nvnu * (i - 1)
1022 tmp = tmp + work(jj) * bt(k,j)
1023 end do
1024 work2(ii) = tmp
1025 end do
1026 end do
1027 end do
1028
1029 do j = 1, nv
1030 do i = 1, nvnv
1031 jj = i + nvnv * (j - 1)
1032 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1033 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1034 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1035 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1036 end do
1037 end do
1038 end do
1039
1040 end subroutine tnsr3d_nu4_cpu
1041
1042 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1043 integer, intent(in) :: nv, nu, nelv
1044 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1045 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1046
1047 if (nu .eq. 4 .and. nv .eq. 2) then
1048 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1049 else
1050 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1051 end if
1052
1053 end subroutine tnsr1_3d_cpu
1054
1055 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1056 integer, intent(in) :: nv, nu, nelv
1057 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1058 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1059 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1060 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1061 integer :: i, j, k, l, ii, jj, kk
1062 integer :: nunu, nvnu, nvnv
1063 real(kind=rp) :: tmp
1064
1065 nvnu = nv * nu
1066 nunu = nu * nu
1067 nvnv = nv * nv
1068
1069 e0 = 1
1070 es = 1
1071 ee = nelv
1072
1073 if (nv.gt.nu) then
1074 e0 = nelv
1075 es = -1
1076 ee = 1
1077 endif
1078
1079 nu3 = nu**3
1080 nv3 = nv**3
1081
1082 do e = e0,ee,es
1083 iu = (e-1)*nu3
1084 iv = (e-1)*nv3
1085
1086 do j = 1, nunu
1087 do i = 1, nv
1088 ii = i + nv * (j - 1)
1089 tmp = 0.0_rp
1090 do k = 1, nu
1091 kk = k + nu * (j - 1) + iu
1092 tmp = tmp + a(i,k) * v(kk)
1093 end do
1094 work(ii) = tmp
1095 end do
1096 end do
1097
1098 do i = 1, nu
1099 do j = 1, nv
1100 do l = 1, nv
1101 ii = l + nv * (j - 1) + nvnv * (i - 1)
1102 tmp = 0.0_rp
1103 do k = 1, nu
1104 jj = l + nv * (k - 1) + nvnu * (i - 1)
1105 tmp = tmp + work(jj) * bt(k,j)
1106 end do
1107 work2(ii) = tmp
1108 end do
1109 end do
1110 end do
1111
1112 do j = 1, nv
1113 do i = 1, nvnv
1114 jj = i + nvnv * (j - 1) + iv
1115 tmp = 0.0_rp
1116 do k = 1, nu
1117 ii = i + nvnv * (k - 1)
1118 tmp = tmp + work2(ii) * ct(k, j)
1119 end do
1120 v(jj) = tmp
1121 end do
1122 end do
1123 end do
1124
1125 end subroutine tnsr1_3d_nvnu_cpu
1126
1127 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1128 integer, parameter :: nu = 4
1129 integer, parameter :: nv = 2
1130 integer, parameter :: nunu = 16
1131 integer, parameter :: nvnu = 8
1132 integer, parameter :: nvnv = 4
1133 integer, parameter :: nununu = 64
1134 integer, parameter :: nvnvnv = 8
1135 integer, intent(in) :: nelv
1136 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1137 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1138 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1139 integer :: e, iu, iv
1140 integer :: i, j, k, l, ii, jj
1141 real(kind=rp) :: tmp
1142
1143 do e = 1,nelv
1144 iu = (e-1)*nununu
1145 iv = (e-1)*nvnvnv
1146
1147 do j = 1, nunu
1148 do i = 1, nv
1149 ii = i + nv * (j - 1)
1150 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1151 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1152 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1153 + a(i,4) * v(4 + nu * (j - 1) + iu)
1154 end do
1155 end do
1156
1157 do i = 1, nu
1158 do j = 1, nv
1159 do l = 1, nv
1160 ii = l + nv * (j - 1) + nvnv * (i - 1)
1161 tmp = 0.0_rp
1162 do k = 1, nu
1163 jj = l + nv * (k - 1) + nvnu * (i - 1)
1164 tmp = tmp + work(jj) * bt(k,j)
1165 end do
1166 work2(ii) = tmp
1167 end do
1168 end do
1169 end do
1170
1171 do j = 1, nv
1172 do i = 1, nvnv
1173 jj = i + nvnv * (j - 1) + iv
1174 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1175 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1176 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1177 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1178
1179 end do
1180 end do
1181 end do
1182
1183 end subroutine tnsr1_3d_nu4nv2_cpu
1184
1185end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)