Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else if (nv .eq. 1) then
59 select case (nu)
60 case (4)
61 call tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
62 case (6)
63 call tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
64 case (8)
65 call tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
66 case (10)
67 call tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
68 case (12)
69 call tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
70 case default
71 call tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
72 end select
73 else
74 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
75 end if
76
77 end subroutine tnsr3d_el_cpu
78
79 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
80 integer, intent(in) :: nv, nu
81 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
82 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
83 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
84 real(kind=rp) :: tmp
85 integer :: i, j, k, l, nunu, nvnu, nvnv
86 integer :: ii, jj
87 nvnu = nv * nu
88 nunu = nu * nu
89 nvnv = nv * nv
90
91 do j = 1, nunu
92 do i = 1, nv
93 ii = i + nv * (j - 1)
94 tmp = 0.0_rp
95 do k = 1, nu
96 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
97 end do
98 work(ii) = tmp
99 end do
100 end do
101
102 do i = 1, nu
103 do j = 1, nv
104 do l = 1, nv
105 ii = l + nv * (j - 1) + nvnv * (i - 1)
106 tmp = 0.0_rp
107 do k = 1, nu
108 jj = l + nv * (k - 1) + nvnu * (i - 1)
109 tmp = tmp + work(jj) * bt(k,j)
110 end do
111 work2(ii) = tmp
112 end do
113 end do
114 end do
115
116 do j = 1, nv
117 do i = 1, nvnv
118 jj = i + nvnv * (j - 1)
119 tmp = 0.0_rp
120 do k = 1, nu
121 ii = i + nvnv * (k - 1)
122 tmp = tmp + work2(ii) * ct(k, j)
123 end do
124 v(jj) = tmp
125 end do
126 end do
127
128 end subroutine tnsr3d_el_nvnu_cpu
129
130 subroutine tnsr3d_el_1_nu_cpu(v, u, nu, A, Bt, Ct)
131 integer, intent(in) :: nu
132 real(kind=rp), intent(inout) :: v(1), u(nu*nu*nu)
133 real(kind=rp), intent(inout) :: a(1,nu),bt(nu, 1),ct(nu,1)
134 real(kind=rp) :: work(nu**2), work2(nu)
135 real(kind=rp) :: tmp
136 integer :: i, j, k, l, nunu
137 integer :: ii, jj
138 nunu = nu * nu
139
140 do j = 1, nunu
141 tmp = 0.0_rp
142 do k = 1, nu
143 tmp = tmp + a(1,k) * u(k + nu * (j - 1))
144 end do
145 work(j) = tmp
146 end do
147
148 do i = 1, nu
149 tmp = 0.0_rp
150 do k = 1, nu
151 jj = k + nu * (i - 1)
152 tmp = tmp + work(jj) * bt(k,1)
153 end do
154 work2(i) = tmp
155 end do
156
157 tmp = 0.0_rp
158 do k = 1, nu
159 tmp = tmp + work2(k) * ct(k, 1)
160 end do
161 v(1) = tmp
162
163 end subroutine tnsr3d_el_1_nu_cpu
164
165 subroutine tnsr3d_el_1_4_cpu(v, u, A, Bt, Ct)
166 integer, parameter :: n = 4
167 integer, parameter :: nn = n**2
168 real(kind=rp), intent(inout) :: v(1), u(n*n*n)
169 real(kind=rp), intent(inout) :: a(1,n), bt(n,1), ct(n,1)
170 real(kind=rp) :: work(n**2), work2(n)
171 integer :: i, j, l
172 integer :: ii, jj
173
174 do j = 1, nn
175 work(j) = a(1,1) * u(1 + n * (j - 1)) &
176 + a(1,2) * u(2 + n * (j - 1)) &
177 + a(1,3) * u(3 + n * (j - 1)) &
178 + a(1,4) * u(4 + n * (j - 1))
179 end do
180
181 do i = 1, n
182 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
183 + work(2 + n * (i - 1)) * bt(2,1) &
184 + work(3 + n * (i - 1)) * bt(3,1) &
185 + work(4 + n * (i - 1)) * bt(4,1)
186 end do
187
188 v(1) = work2(1) * ct(1, 1) &
189 + work2(2) * ct(2, 1) &
190 + work2(3) * ct(3, 1) &
191 + work2(4) * ct(4, 1)
192
193 end subroutine tnsr3d_el_1_4_cpu
194
195 subroutine tnsr3d_el_1_6_cpu(v, u, A, Bt, Ct)
196 integer, parameter :: n = 6
197 integer, parameter :: nn = n**2
198 real(kind=rp), intent(inout) :: v(1), u(n*n*n)
199 real(kind=rp), intent(inout) :: a(1,n), bt(n,1), ct(n,1)
200 real(kind=rp) :: work(n**2), work2(n)
201 integer :: i, j, l
202 integer :: ii, jj
203
204 do j = 1, nn
205 work(j) = a(1,1) * u(1 + n * (j - 1)) &
206 + a(1,2) * u(2 + n * (j - 1)) &
207 + a(1,3) * u(3 + n * (j - 1)) &
208 + a(1,4) * u(4 + n * (j - 1)) &
209 + a(1,5) * u(5 + n * (j - 1)) &
210 + a(1,6) * u(6 + n * (j - 1))
211 end do
212
213 do i = 1, n
214 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
215 + work(2 + n * (i - 1)) * bt(2,1) &
216 + work(3 + n * (i - 1)) * bt(3,1) &
217 + work(4 + n * (i - 1)) * bt(4,1) &
218 + work(5 + n * (i - 1)) * bt(5,1) &
219 + work(6 + n * (i - 1)) * bt(6,1)
220 end do
221
222 v(1) = work2(1) * ct(1, 1) &
223 + work2(2) * ct(2, 1) &
224 + work2(3) * ct(3, 1) &
225 + work2(4) * ct(4, 1) &
226 + work2(5) * ct(5, 1) &
227 + work2(6) * ct(6, 1)
228
229 end subroutine tnsr3d_el_1_6_cpu
230
231 subroutine tnsr3d_el_1_8_cpu(v, u, A, Bt, Ct)
232 integer, parameter :: n = 8
233 integer, parameter :: nn = n**2
234 real(kind=rp), intent(inout) :: v(1), u(n*n*n)
235 real(kind=rp), intent(inout) :: a(1,n), bt(n,1), ct(n,1)
236 real(kind=rp) :: work(n**2), work2(n)
237 integer :: i, j, l
238 integer :: ii, jj
239
240 do j = 1, nn
241 work(j) = a(1,1) * u(1 + n * (j - 1)) &
242 + a(1,2) * u(2 + n * (j - 1)) &
243 + a(1,3) * u(3 + n * (j - 1)) &
244 + a(1,4) * u(4 + n * (j - 1)) &
245 + a(1,5) * u(5 + n * (j - 1)) &
246 + a(1,6) * u(6 + n * (j - 1)) &
247 + a(1,7) * u(7 + n * (j - 1)) &
248 + a(1,8) * u(8 + n * (j - 1))
249 end do
250
251 do i = 1, n
252 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
253 + work(2 + n * (i - 1)) * bt(2,1) &
254 + work(3 + n * (i - 1)) * bt(3,1) &
255 + work(4 + n * (i - 1)) * bt(4,1) &
256 + work(5 + n * (i - 1)) * bt(5,1) &
257 + work(6 + n * (i - 1)) * bt(6,1) &
258 + work(7 + n * (i - 1)) * bt(7,1) &
259 + work(8 + n * (i - 1)) * bt(8,1)
260 end do
261
262 v(1) = work2(1) * ct(1, 1) &
263 + work2(2) * ct(2, 1) &
264 + work2(3) * ct(3, 1) &
265 + work2(4) * ct(4, 1) &
266 + work2(5) * ct(5, 1) &
267 + work2(6) * ct(6, 1) &
268 + work2(7) * ct(7, 1) &
269 + work2(8) * ct(8, 1)
270
271
272 end subroutine tnsr3d_el_1_8_cpu
273
274
275 subroutine tnsr3d_el_1_10_cpu(v, u, A, Bt, Ct)
276 integer, parameter :: n = 10
277 integer, parameter :: nn = n**2
278 real(kind=rp), intent(inout) :: v(1), u(n*n*n)
279 real(kind=rp), intent(inout) :: a(1,n), bt(n,1), ct(n,1)
280 real(kind=rp) :: work(n**2), work2(n)
281 integer :: i, j, l
282 integer :: ii, jj
283
284 do j = 1, nn
285 work(j) = a(1,1) * u(1 + n * (j - 1)) &
286 + a(1,2) * u(2 + n * (j - 1)) &
287 + a(1,3) * u(3 + n * (j - 1)) &
288 + a(1,4) * u(4 + n * (j - 1)) &
289 + a(1,5) * u(5 + n * (j - 1)) &
290 + a(1,6) * u(6 + n * (j - 1)) &
291 + a(1,7) * u(7 + n * (j - 1)) &
292 + a(1,8) * u(8 + n * (j - 1)) &
293 + a(1,9) * u(9 + n * (j - 1)) &
294 + a(1,10) * u(10 + n * (j - 1))
295 end do
296
297 do i = 1, n
298 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
299 + work(2 + n * (i - 1)) * bt(2,1) &
300 + work(3 + n * (i - 1)) * bt(3,1) &
301 + work(4 + n * (i - 1)) * bt(4,1) &
302 + work(5 + n * (i - 1)) * bt(5,1) &
303 + work(6 + n * (i - 1)) * bt(6,1) &
304 + work(7 + n * (i - 1)) * bt(7,1) &
305 + work(8 + n * (i - 1)) * bt(8,1) &
306 + work(9 + n * (i - 1)) * bt(9,1) &
307 + work(10 + n * (i - 1)) * bt(10,1)
308 end do
309
310 v(1) = work2(1) * ct(1, 1) &
311 + work2(2) * ct(2, 1) &
312 + work2(3) * ct(3, 1) &
313 + work2(4) * ct(4, 1) &
314 + work2(5) * ct(5, 1) &
315 + work2(6) * ct(6, 1) &
316 + work2(7) * ct(7, 1) &
317 + work2(8) * ct(8, 1) &
318 + work2(9) * ct(9, 1) &
319 + work2(10) * ct(10, 1)
320
321 end subroutine tnsr3d_el_1_10_cpu
322
323
324 subroutine tnsr3d_el_1_12_cpu(v, u, A, Bt, Ct)
325 integer, parameter :: n = 12
326 integer, parameter :: nn = n**2
327 real(kind=rp), intent(inout) :: v(1), u(n*n*n)
328 real(kind=rp), intent(inout) :: a(1,n), bt(n,1), ct(n,1)
329 real(kind=rp) :: work(n**2), work2(n)
330 integer :: i, j, l
331 integer :: ii, jj
332
333 do j = 1, nn
334 work(j) = a(1,1) * u(1 + n * (j - 1)) &
335 + a(1,2) * u(2 + n * (j - 1)) &
336 + a(1,3) * u(3 + n * (j - 1)) &
337 + a(1,4) * u(4 + n * (j - 1)) &
338 + a(1,5) * u(5 + n * (j - 1)) &
339 + a(1,6) * u(6 + n * (j - 1)) &
340 + a(1,7) * u(7 + n * (j - 1)) &
341 + a(1,8) * u(8 + n * (j - 1)) &
342 + a(1,9) * u(9 + n * (j - 1)) &
343 + a(1,10) * u(10 + n * (j - 1)) &
344 + a(1,11) * u(11 + n * (j - 1)) &
345 + a(1,12) * u(12 + n * (j - 1))
346 end do
347
348 do i = 1, n
349 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
350 + work(2 + n * (i - 1)) * bt(2,1) &
351 + work(3 + n * (i - 1)) * bt(3,1) &
352 + work(4 + n * (i - 1)) * bt(4,1) &
353 + work(5 + n * (i - 1)) * bt(5,1) &
354 + work(6 + n * (i - 1)) * bt(6,1) &
355 + work(7 + n * (i - 1)) * bt(7,1) &
356 + work(8 + n * (i - 1)) * bt(8,1) &
357 + work(9 + n * (i - 1)) * bt(9,1) &
358 + work(10 + n * (i - 1)) * bt(10,1) &
359 + work(11 + n * (i - 1)) * bt(11,1) &
360 + work(12 + n * (i - 1)) * bt(12,1)
361 end do
362
363 v(1) = work2(1) * ct(1, 1) &
364 + work2(2) * ct(2, 1) &
365 + work2(3) * ct(3, 1) &
366 + work2(4) * ct(4, 1) &
367 + work2(5) * ct(5, 1) &
368 + work2(6) * ct(6, 1) &
369 + work2(7) * ct(7, 1) &
370 + work2(8) * ct(8, 1) &
371 + work2(9) * ct(9, 1) &
372 + work2(10) * ct(10, 1) &
373 + work2(11) * ct(11, 1) &
374 + work2(12) * ct(12, 1)
375
376 end subroutine tnsr3d_el_1_12_cpu
377
378
379
380 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
381 integer, intent(in) :: n
382 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
383 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
384 real(kind=rp) :: work(n**3), work2(n**3), tmp
385 integer :: i, j, l, k
386 integer :: ii, jj, nn
387
388 nn = n**2
389
390 do j = 1, nn
391 do i = 1, n
392 ii = i + n * (j - 1)
393 tmp = 0.0_rp
394 do k = 1, n
395 tmp = tmp + a(i,k) * u(k + n * (j - 1))
396 end do
397 work(ii) = tmp
398 end do
399 end do
400
401 do i = 1, n
402 do j = 1, n
403 do l = 1, n
404 ii = l + n * (j - 1) + nn * (i - 1)
405 tmp = 0.0_rp
406 do k = 1, n
407 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
408 end do
409 work2(ii) = tmp
410 end do
411 end do
412 end do
413
414 do j = 1, n
415 do i = 1, nn
416 jj = i + nn * (j - 1)
417 tmp = 0.0_rp
418 do k = 1, n
419 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
420 end do
421 v(jj) = tmp
422 end do
423 end do
424
425 end subroutine tnsr3d_el_n_cpu
426
427 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
428 integer, parameter :: n = 14
429 integer, parameter :: nn = n**2
430 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
431 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
432 real(kind=rp) :: work(n**3), work2(n**3)
433 integer :: i, j, l
434 integer :: ii, jj
435
436 do j = 1, nn
437 do i = 1, n
438 ii = i + n * (j - 1)
439 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
440 + a(i,2) * u(2 + n * (j - 1)) &
441 + a(i,3) * u(3 + n * (j - 1)) &
442 + a(i,4) * u(4 + n * (j - 1)) &
443 + a(i,5) * u(5 + n * (j - 1)) &
444 + a(i,6) * u(6 + n * (j - 1)) &
445 + a(i,7) * u(7 + n * (j - 1)) &
446 + a(i,8) * u(8 + n * (j - 1)) &
447 + a(i,9) * u(9 + n * (j - 1)) &
448 + a(i,10) * u(10 + n * (j - 1)) &
449 + a(i,11) * u(11 + n * (j - 1)) &
450 + a(i,12) * u(12 + n * (j - 1)) &
451 + a(i,13) * u(13 + n * (j - 1)) &
452 + a(i,14) * u(14 + n * (j - 1))
453 end do
454 end do
455
456 do i = 1, n
457 do j = 1, n
458 do l = 1, n
459 ii = l + n * (j - 1) + nn * (i - 1)
460 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
461 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
462 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
463 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
464 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
465 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
466 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
467 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
468 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
469 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
470 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
471 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
472 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
473 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
474 end do
475 end do
476 end do
477
478 do j = 1, n
479 do i = 1, nn
480 jj = i + nn * (j - 1)
481 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
482 + work2(i + nn * (2 - 1)) * ct(2, j) &
483 + work2(i + nn * (3 - 1)) * ct(3, j) &
484 + work2(i + nn * (4 - 1)) * ct(4, j) &
485 + work2(i + nn * (5 - 1)) * ct(5, j) &
486 + work2(i + nn * (6 - 1)) * ct(6, j) &
487 + work2(i + nn * (7 - 1)) * ct(7, j) &
488 + work2(i + nn * (8 - 1)) * ct(8, j) &
489 + work2(i + nn * (9 - 1)) * ct(9, j) &
490 + work2(i + nn * (10 - 1)) * ct(10, j) &
491 + work2(i + nn * (11 - 1)) * ct(11, j) &
492 + work2(i + nn * (12 - 1)) * ct(12, j) &
493 + work2(i + nn * (13 - 1)) * ct(13, j) &
494 + work2(i + nn * (14 - 1)) * ct(14, j)
495 end do
496 end do
497
498 end subroutine tnsr3d_el_n14_cpu
499
500 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
501 integer, parameter :: n = 13
502 integer, parameter :: nn = n**2
503 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
504 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
505 real(kind=rp) :: work(n**3), work2(n**3)
506 integer :: i, j, l
507 integer :: ii, jj
508
509 do j = 1, nn
510 do i = 1, n
511 ii = i + n * (j - 1)
512 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
513 + a(i,2) * u(2 + n * (j - 1)) &
514 + a(i,3) * u(3 + n * (j - 1)) &
515 + a(i,4) * u(4 + n * (j - 1)) &
516 + a(i,5) * u(5 + n * (j - 1)) &
517 + a(i,6) * u(6 + n * (j - 1)) &
518 + a(i,7) * u(7 + n * (j - 1)) &
519 + a(i,8) * u(8 + n * (j - 1)) &
520 + a(i,9) * u(9 + n * (j - 1)) &
521 + a(i,10) * u(10 + n * (j - 1)) &
522 + a(i,11) * u(11 + n * (j - 1)) &
523 + a(i,12) * u(12 + n * (j - 1)) &
524 + a(i,13) * u(13 + n * (j - 1))
525 end do
526 end do
527
528 do i = 1, n
529 do j = 1, n
530 do l = 1, n
531 ii = l + n * (j - 1) + nn * (i - 1)
532 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
533 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
534 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
535 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
536 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
537 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
538 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
539 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
540 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
541 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
542 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
543 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
544 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
545 end do
546 end do
547 end do
548
549 do j = 1, n
550 do i = 1, nn
551 jj = i + nn * (j - 1)
552 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
553 + work2(i + nn * (2 - 1)) * ct(2, j) &
554 + work2(i + nn * (3 - 1)) * ct(3, j) &
555 + work2(i + nn * (4 - 1)) * ct(4, j) &
556 + work2(i + nn * (5 - 1)) * ct(5, j) &
557 + work2(i + nn * (6 - 1)) * ct(6, j) &
558 + work2(i + nn * (7 - 1)) * ct(7, j) &
559 + work2(i + nn * (8 - 1)) * ct(8, j) &
560 + work2(i + nn * (9 - 1)) * ct(9, j) &
561 + work2(i + nn * (10 - 1)) * ct(10, j) &
562 + work2(i + nn * (11 - 1)) * ct(11, j) &
563 + work2(i + nn * (12 - 1)) * ct(12, j) &
564 + work2(i + nn * (13 - 1)) * ct(13, j)
565 end do
566 end do
567
568 end subroutine tnsr3d_el_n13_cpu
569
570 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
571 integer, parameter :: n = 12
572 integer, parameter :: nn = n**2
573 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
574 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
575 real(kind=rp) :: work(n**3), work2(n**3)
576 integer :: i, j, l
577 integer :: ii, jj
578
579 do j = 1, nn
580 do i = 1, n
581 ii = i + n * (j - 1)
582 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
583 + a(i,2) * u(2 + n * (j - 1)) &
584 + a(i,3) * u(3 + n * (j - 1)) &
585 + a(i,4) * u(4 + n * (j - 1)) &
586 + a(i,5) * u(5 + n * (j - 1)) &
587 + a(i,6) * u(6 + n * (j - 1)) &
588 + a(i,7) * u(7 + n * (j - 1)) &
589 + a(i,8) * u(8 + n * (j - 1)) &
590 + a(i,9) * u(9 + n * (j - 1)) &
591 + a(i,10) * u(10 + n * (j - 1)) &
592 + a(i,11) * u(11 + n * (j - 1)) &
593 + a(i,12) * u(12 + n * (j - 1))
594 end do
595 end do
596
597 do i = 1, n
598 do j = 1, n
599 do l = 1, n
600 ii = l + n * (j - 1) + nn * (i - 1)
601 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
602 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
603 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
604 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
605 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
606 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
607 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
608 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
609 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
610 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
611 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
612 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
613 end do
614 end do
615 end do
616
617 do j = 1, n
618 do i = 1, nn
619 jj = i + nn * (j - 1)
620 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
621 + work2(i + nn * (2 - 1)) * ct(2, j) &
622 + work2(i + nn * (3 - 1)) * ct(3, j) &
623 + work2(i + nn * (4 - 1)) * ct(4, j) &
624 + work2(i + nn * (5 - 1)) * ct(5, j) &
625 + work2(i + nn * (6 - 1)) * ct(6, j) &
626 + work2(i + nn * (7 - 1)) * ct(7, j) &
627 + work2(i + nn * (8 - 1)) * ct(8, j) &
628 + work2(i + nn * (9 - 1)) * ct(9, j) &
629 + work2(i + nn * (10 - 1)) * ct(10, j) &
630 + work2(i + nn * (11 - 1)) * ct(11, j) &
631 + work2(i + nn * (12 - 1)) * ct(12, j)
632 end do
633 end do
634
635 end subroutine tnsr3d_el_n12_cpu
636
637 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
638 integer, parameter :: n = 11
639 integer, parameter :: nn = n**2
640 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
641 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
642 real(kind=rp) :: work(n**3), work2(n**3)
643 integer :: i, j, l
644 integer :: ii, jj
645
646 do j = 1, nn
647 do i = 1, n
648 ii = i + n * (j - 1)
649 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
650 + a(i,2) * u(2 + n * (j - 1)) &
651 + a(i,3) * u(3 + n * (j - 1)) &
652 + a(i,4) * u(4 + n * (j - 1)) &
653 + a(i,5) * u(5 + n * (j - 1)) &
654 + a(i,6) * u(6 + n * (j - 1)) &
655 + a(i,7) * u(7 + n * (j - 1)) &
656 + a(i,8) * u(8 + n * (j - 1)) &
657 + a(i,9) * u(9 + n * (j - 1)) &
658 + a(i,10) * u(10 + n * (j - 1)) &
659 + a(i,11) * u(11 + n * (j - 1))
660 end do
661 end do
662
663 do i = 1, n
664 do j = 1, n
665 do l = 1, n
666 ii = l + n * (j - 1) + nn * (i - 1)
667 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
668 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
669 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
670 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
671 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
672 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
673 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
674 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
675 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
676 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
677 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
678 end do
679 end do
680 end do
681
682 do j = 1, n
683 do i = 1, nn
684 jj = i + nn * (j - 1)
685 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
686 + work2(i + nn * (2 - 1)) * ct(2, j) &
687 + work2(i + nn * (3 - 1)) * ct(3, j) &
688 + work2(i + nn * (4 - 1)) * ct(4, j) &
689 + work2(i + nn * (5 - 1)) * ct(5, j) &
690 + work2(i + nn * (6 - 1)) * ct(6, j) &
691 + work2(i + nn * (7 - 1)) * ct(7, j) &
692 + work2(i + nn * (8 - 1)) * ct(8, j) &
693 + work2(i + nn * (9 - 1)) * ct(9, j) &
694 + work2(i + nn * (10 - 1)) * ct(10, j) &
695 + work2(i + nn * (11 - 1)) * ct(11, j)
696 end do
697 end do
698
699 end subroutine tnsr3d_el_n11_cpu
700
701 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
702 integer, parameter :: n = 10
703 integer, parameter :: nn = n**2
704 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
705 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
706 real(kind=rp) :: work(n**3), work2(n**3)
707 integer :: i, j, l
708 integer :: ii, jj
709
710 do j = 1, nn
711 do i = 1, n
712 ii = i + n * (j - 1)
713 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
714 + a(i,2) * u(2 + n * (j - 1)) &
715 + a(i,3) * u(3 + n * (j - 1)) &
716 + a(i,4) * u(4 + n * (j - 1)) &
717 + a(i,5) * u(5 + n * (j - 1)) &
718 + a(i,6) * u(6 + n * (j - 1)) &
719 + a(i,7) * u(7 + n * (j - 1)) &
720 + a(i,8) * u(8 + n * (j - 1)) &
721 + a(i,9) * u(9 + n * (j - 1)) &
722 + a(i,10) * u(10 + n * (j - 1))
723 end do
724 end do
725
726 do i = 1, n
727 do j = 1, n
728 do l = 1, n
729 ii = l + n * (j - 1) + nn * (i - 1)
730 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
731 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
732 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
733 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
734 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
735 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
736 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
737 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
738 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
739 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
740 end do
741 end do
742 end do
743
744 do j = 1, n
745 do i = 1, nn
746 jj = i + nn * (j - 1)
747 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
748 + work2(i + nn * (2 - 1)) * ct(2, j) &
749 + work2(i + nn * (3 - 1)) * ct(3, j) &
750 + work2(i + nn * (4 - 1)) * ct(4, j) &
751 + work2(i + nn * (5 - 1)) * ct(5, j) &
752 + work2(i + nn * (6 - 1)) * ct(6, j) &
753 + work2(i + nn * (7 - 1)) * ct(7, j) &
754 + work2(i + nn * (8 - 1)) * ct(8, j) &
755 + work2(i + nn * (9 - 1)) * ct(9, j) &
756 + work2(i + nn * (10 - 1)) * ct(10, j)
757 end do
758 end do
759
760 end subroutine tnsr3d_el_n10_cpu
761
762 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
763 integer, parameter :: n = 9
764 integer, parameter :: nn = n**2
765 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
766 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
767 real(kind=rp) :: work(n**3), work2(n**3)
768 integer :: i, j, l
769 integer :: ii, jj
770
771 do j = 1, nn
772 do i = 1, n
773 ii = i + n * (j - 1)
774 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
775 + a(i,2) * u(2 + n * (j - 1)) &
776 + a(i,3) * u(3 + n * (j - 1)) &
777 + a(i,4) * u(4 + n * (j - 1)) &
778 + a(i,5) * u(5 + n * (j - 1)) &
779 + a(i,6) * u(6 + n * (j - 1)) &
780 + a(i,7) * u(7 + n * (j - 1)) &
781 + a(i,8) * u(8 + n * (j - 1)) &
782 + a(i,9) * u(9 + n * (j - 1))
783 end do
784 end do
785
786 do i = 1, n
787 do j = 1, n
788 do l = 1, n
789 ii = l + n * (j - 1) + nn * (i - 1)
790 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
791 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
792 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
793 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
794 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
795 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
796 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
797 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
798 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
799 end do
800 end do
801 end do
802
803 do j = 1, n
804 do i = 1, nn
805 jj = i + nn * (j - 1)
806 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
807 + work2(i + nn * (2 - 1)) * ct(2, j) &
808 + work2(i + nn * (3 - 1)) * ct(3, j) &
809 + work2(i + nn * (4 - 1)) * ct(4, j) &
810 + work2(i + nn * (5 - 1)) * ct(5, j) &
811 + work2(i + nn * (6 - 1)) * ct(6, j) &
812 + work2(i + nn * (7 - 1)) * ct(7, j) &
813 + work2(i + nn * (8 - 1)) * ct(8, j) &
814 + work2(i + nn * (9 - 1)) * ct(9, j)
815 end do
816 end do
817
818 end subroutine tnsr3d_el_n9_cpu
819
820 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
821 integer, parameter :: n = 8
822 integer, parameter :: nn = n**2
823 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
824 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
825 real(kind=rp) :: work(n**3), work2(n**3)
826 integer :: i, j, l
827 integer :: ii, jj
828
829 do j = 1, nn
830 do i = 1, n
831 ii = i + n * (j - 1)
832 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
833 + a(i,2) * u(2 + n * (j - 1)) &
834 + a(i,3) * u(3 + n * (j - 1)) &
835 + a(i,4) * u(4 + n * (j - 1)) &
836 + a(i,5) * u(5 + n * (j - 1)) &
837 + a(i,6) * u(6 + n * (j - 1)) &
838 + a(i,7) * u(7 + n * (j - 1)) &
839 + a(i,8) * u(8 + n * (j - 1))
840 end do
841 end do
842
843 do i = 1, n
844 do j = 1, n
845 do l = 1, n
846 ii = l + n * (j - 1) + nn * (i - 1)
847 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
848 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
849 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
850 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
851 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
852 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
853 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
854 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
855 end do
856 end do
857 end do
858
859 do j = 1, n
860 do i = 1, nn
861 jj = i + nn * (j - 1)
862 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
863 + work2(i + nn * (2 - 1)) * ct(2, j) &
864 + work2(i + nn * (3 - 1)) * ct(3, j) &
865 + work2(i + nn * (4 - 1)) * ct(4, j) &
866 + work2(i + nn * (5 - 1)) * ct(5, j) &
867 + work2(i + nn * (6 - 1)) * ct(6, j) &
868 + work2(i + nn * (7 - 1)) * ct(7, j) &
869 + work2(i + nn * (8 - 1)) * ct(8, j)
870 end do
871 end do
872
873 end subroutine tnsr3d_el_n8_cpu
874
875 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
876 integer, parameter :: n = 7
877 integer, parameter :: nn = n**2
878 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
879 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
880 real(kind=rp) :: work(n**3), work2(n**3)
881 integer :: i, j, l
882 integer :: ii, jj
883
884 do j = 1, nn
885 do i = 1, n
886 ii = i + n * (j - 1)
887 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
888 + a(i,2) * u(2 + n * (j - 1)) &
889 + a(i,3) * u(3 + n * (j - 1)) &
890 + a(i,4) * u(4 + n * (j - 1)) &
891 + a(i,5) * u(5 + n * (j - 1)) &
892 + a(i,6) * u(6 + n * (j - 1)) &
893 + a(i,7) * u(7 + n * (j - 1))
894 end do
895 end do
896
897 do i = 1, n
898 do j = 1, n
899 do l = 1, n
900 ii = l + n * (j - 1) + nn * (i - 1)
901 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
902 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
903 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
904 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
905 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
906 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
907 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
908 end do
909 end do
910 end do
911
912 do j = 1, n
913 do i = 1, nn
914 jj = i + nn * (j - 1)
915 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
916 + work2(i + nn * (2 - 1)) * ct(2, j) &
917 + work2(i + nn * (3 - 1)) * ct(3, j) &
918 + work2(i + nn * (4 - 1)) * ct(4, j) &
919 + work2(i + nn * (5 - 1)) * ct(5, j) &
920 + work2(i + nn * (6 - 1)) * ct(6, j) &
921 + work2(i + nn * (7 - 1)) * ct(7, j)
922 end do
923 end do
924
925 end subroutine tnsr3d_el_n7_cpu
926
927 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
928 integer, parameter :: n = 6
929 integer, parameter :: nn = n**2
930 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
931 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
932 real(kind=rp) :: work(n**3), work2(n**3)
933 integer :: i, j, l
934 integer :: ii, jj
935
936 do j = 1, nn
937 do i = 1, n
938 ii = i + n * (j - 1)
939 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
940 + a(i,2) * u(2 + n * (j - 1)) &
941 + a(i,3) * u(3 + n * (j - 1)) &
942 + a(i,4) * u(4 + n * (j - 1)) &
943 + a(i,5) * u(5 + n * (j - 1)) &
944 + a(i,6) * u(6 + n * (j - 1))
945 end do
946 end do
947
948 do i = 1, n
949 do j = 1, n
950 do l = 1, n
951 ii = l + n * (j - 1) + nn * (i - 1)
952 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
953 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
954 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
955 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
956 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
957 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
958 end do
959 end do
960 end do
961
962 do j = 1, n
963 do i = 1, nn
964 jj = i + nn * (j - 1)
965 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
966 + work2(i + nn * (2 - 1)) * ct(2, j) &
967 + work2(i + nn * (3 - 1)) * ct(3, j) &
968 + work2(i + nn * (4 - 1)) * ct(4, j) &
969 + work2(i + nn * (5 - 1)) * ct(5, j) &
970 + work2(i + nn * (6 - 1)) * ct(6, j)
971 end do
972 end do
973
974 end subroutine tnsr3d_el_n6_cpu
975
976 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
977 integer, parameter :: n = 5
978 integer, parameter :: nn = n**2
979 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
980 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
981 real(kind=rp) :: work(n**3), work2(n**3)
982 integer :: i, j, l
983 integer :: ii, jj
984
985 do j = 1, nn
986 do i = 1, n
987 ii = i + n * (j - 1)
988 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
989 + a(i,2) * u(2 + n * (j - 1)) &
990 + a(i,3) * u(3 + n * (j - 1)) &
991 + a(i,4) * u(4 + n * (j - 1)) &
992 + a(i,5) * u(5 + n * (j - 1))
993 end do
994 end do
995
996 do i = 1, n
997 do j = 1, n
998 do l = 1, n
999 ii = l + n * (j - 1) + nn * (i - 1)
1000 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1001 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1002 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1003 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
1004 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
1005 end do
1006 end do
1007 end do
1008
1009 do j = 1, n
1010 do i = 1, nn
1011 jj = i + nn * (j - 1)
1012 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1013 + work2(i + nn * (2 - 1)) * ct(2, j) &
1014 + work2(i + nn * (3 - 1)) * ct(3, j) &
1015 + work2(i + nn * (4 - 1)) * ct(4, j) &
1016 + work2(i + nn * (5 - 1)) * ct(5, j)
1017 end do
1018 end do
1019
1020 end subroutine tnsr3d_el_n5_cpu
1021
1022 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
1023 integer, parameter :: n = 4
1024 integer, parameter :: nn = n**2
1025 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1026 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1027 real(kind=rp) :: work(n**3), work2(n**3)
1028 integer :: i, j, l
1029 integer :: ii, jj
1030
1031 do j = 1, nn
1032 do i = 1, n
1033 ii = i + n * (j - 1)
1034 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1035 + a(i,2) * u(2 + n * (j - 1)) &
1036 + a(i,3) * u(3 + n * (j - 1)) &
1037 + a(i,4) * u(4 + n * (j - 1))
1038 end do
1039 end do
1040
1041 do i = 1, n
1042 do j = 1, n
1043 do l = 1, n
1044 ii = l + n * (j - 1) + nn * (i - 1)
1045 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1046 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1047 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1048 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
1049 end do
1050 end do
1051 end do
1052
1053 do j = 1, n
1054 do i = 1, nn
1055 jj = i + nn * (j - 1)
1056 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1057 + work2(i + nn * (2 - 1)) * ct(2, j) &
1058 + work2(i + nn * (3 - 1)) * ct(3, j) &
1059 + work2(i + nn * (4 - 1)) * ct(4, j)
1060 end do
1061 end do
1062
1063 end subroutine tnsr3d_el_n4_cpu
1064
1065 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
1066 integer, parameter :: n = 3
1067 integer, parameter :: nn = n**2
1068 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1069 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1070 real(kind=rp) :: work(n**3), work2(n**3)
1071 integer :: i, j, l
1072 integer :: ii, jj
1073
1074 do j = 1, nn
1075 do i = 1, n
1076 ii = i + n * (j - 1)
1077 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1078 + a(i,2) * u(2 + n * (j - 1)) &
1079 + a(i,3) * u(3 + n * (j - 1))
1080 end do
1081 end do
1082
1083 do i = 1, n
1084 do j = 1, n
1085 do l = 1, n
1086 ii = l + n * (j - 1) + nn * (i - 1)
1087 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1088 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1089 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
1090 end do
1091 end do
1092 end do
1093
1094 do j = 1, n
1095 do i = 1, nn
1096 jj = i + nn * (j - 1)
1097 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1098 + work2(i + nn * (2 - 1)) * ct(2, j) &
1099 + work2(i + nn * (3 - 1)) * ct(3, j)
1100 end do
1101 end do
1102
1103 end subroutine tnsr3d_el_n3_cpu
1104
1105 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
1106 integer, parameter :: n = 2
1107 integer, parameter :: nn = n**2
1108 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1109 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1110 real(kind=rp) :: work(n**3), work2(n**3)
1111 integer :: i, j, l
1112 integer :: ii, jj
1113
1114 do j = 1, nn
1115 do i = 1, n
1116 ii = i + n * (j - 1)
1117 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1118 + a(i,2) * u(2 + n * (j - 1))
1119 end do
1120 end do
1121
1122 do i = 1, n
1123 do j = 1, n
1124 do l = 1, n
1125 ii = l + n * (j - 1) + nn * (i - 1)
1126 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1127 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
1128 end do
1129 end do
1130 end do
1131
1132 do j = 1, n
1133 do i = 1, nn
1134 jj = i + nn * (j - 1)
1135 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1136 + work2(i + nn * (2 - 1)) * ct(2, j)
1137 end do
1138 end do
1139
1140 end subroutine tnsr3d_el_n2_cpu
1141
1142 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1143 integer, intent(in) :: nv, nu, nelv
1144 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1145 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1146 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1147
1148 if (nu .eq. 2 .and. nv .eq. 4) then
1149 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
1150 else if (nu .eq. 4) then
1151 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
1152 else
1153 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
1154 end if
1155
1156 end subroutine tnsr3d_cpu
1157
1158 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1159 integer, intent(in) :: nv, nu, nelv
1160 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1161 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1162 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1163 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1164 integer :: ie, i, j, k, l, ii, jj
1165 integer :: nunu, nvnu, nvnv
1166
1167 nvnu = nv * nu
1168 nunu = nu * nu
1169 nvnv = nv * nv
1170
1171 do ie = 1,nelv
1172 do j = 1, nunu
1173 do i = 1, nv
1174 ii = i + nv * (j - 1)
1175 tmp = 0.0_rp
1176 do k = 1, nu
1177 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
1178 end do
1179 work(ii) = tmp
1180 end do
1181 end do
1182
1183 do i = 1, nu
1184 do j = 1, nv
1185 do l = 1, nv
1186 ii = l + nv * (j - 1) + nvnv * (i - 1)
1187 tmp = 0.0_rp
1188 do k = 1, nu
1189 jj = l + nv * (k - 1) + nvnu * (i - 1)
1190 tmp = tmp + work(jj) * bt(k,j)
1191 end do
1192 work2(ii) = tmp
1193 end do
1194 end do
1195 end do
1196
1197 do j = 1, nv
1198 do i = 1, nvnv
1199 jj = i + nvnv * (j - 1)
1200 tmp = 0.0_rp
1201 do k = 1, nu
1202 ii = i + nvnv * (k - 1)
1203 tmp = tmp + work2(ii) * ct(k, j)
1204 end do
1205 v(jj, ie) = tmp
1206 end do
1207 end do
1208 end do
1209
1210 end subroutine tnsr3d_nvnu_cpu
1211
1212 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
1213 integer, parameter :: nu = 2
1214 integer, parameter :: nv = 4
1215 integer, parameter :: nunu = 4
1216 integer, parameter :: nvnu = 8
1217 integer, parameter :: nvnv = 16
1218 integer, intent(in) :: nelv
1219 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1220 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1221 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1222 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1223 integer :: ie, i, j, k, l, ii, jj
1224
1225 do ie = 1,nelv
1226 do j = 1, nunu
1227 do i = 1, nv
1228 ii = i + nv * (j - 1)
1229 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1230 + a(i,2) * u(2 + nu * (j - 1), ie)
1231 end do
1232 end do
1233
1234 do i = 1, nu
1235 do j = 1, nv
1236 do l = 1, nv
1237 ii = l + nv * (j - 1) + nvnv * (i - 1)
1238 tmp = 0.0_rp
1239 do k = 1, nu
1240 jj = l + nv * (k - 1) + nvnu * (i - 1)
1241 tmp = tmp + work(jj) * bt(k,j)
1242 end do
1243 work2(ii) = tmp
1244 end do
1245 end do
1246 end do
1247
1248 do j = 1, nv
1249 do i = 1, nvnv
1250 jj = i + nvnv * (j - 1)
1251 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1252 + work2(i + nvnv * (2 - 1)) * ct(2, j)
1253 end do
1254 end do
1255 end do
1256
1257 end subroutine tnsr3d_nu2nv4_cpu
1258
1259 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
1260 integer, parameter :: nu = 4
1261 integer, parameter :: nunu = 16
1262 integer, intent(in) :: nv, nelv
1263 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1264 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1265 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1266 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1267 integer :: ie, i, j, k, l, ii, jj
1268 integer :: nvnu, nvnv
1269
1270 nvnu = nv * nu
1271 nvnv = nv * nv
1272
1273 do ie = 1,nelv
1274 do j = 1, nunu
1275 do i = 1, nv
1276 ii = i + nv * (j - 1)
1277 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1278 + a(i,2) * u(2 + nu * (j - 1), ie) &
1279 + a(i,3) * u(3 + nu * (j - 1), ie) &
1280 + a(i,4) * u(4 + nu * (j - 1), ie)
1281 end do
1282 end do
1283
1284 do i = 1, nu
1285 do j = 1, nv
1286 do l = 1, nv
1287 ii = l + nv * (j - 1) + nvnv * (i - 1)
1288 tmp = 0.0_rp
1289 do k = 1, nu
1290 jj = l + nv * (k - 1) + nvnu * (i - 1)
1291 tmp = tmp + work(jj) * bt(k,j)
1292 end do
1293 work2(ii) = tmp
1294 end do
1295 end do
1296 end do
1297
1298 do j = 1, nv
1299 do i = 1, nvnv
1300 jj = i + nvnv * (j - 1)
1301 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1302 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1303 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1304 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1305 end do
1306 end do
1307 end do
1308
1309 end subroutine tnsr3d_nu4_cpu
1310
1311 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1312 integer, intent(in) :: nv, nu, nelv
1313 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1314 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1315
1316 if (nu .eq. 4 .and. nv .eq. 2) then
1317 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1318 else
1319 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1320 end if
1321
1322 end subroutine tnsr1_3d_cpu
1323
1324 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1325 integer, intent(in) :: nv, nu, nelv
1326 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1327 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1328 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1329 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1330 integer :: i, j, k, l, ii, jj, kk
1331 integer :: nunu, nvnu, nvnv
1332 real(kind=rp) :: tmp
1333
1334 nvnu = nv * nu
1335 nunu = nu * nu
1336 nvnv = nv * nv
1337
1338 e0 = 1
1339 es = 1
1340 ee = nelv
1341
1342 if (nv.gt.nu) then
1343 e0 = nelv
1344 es = -1
1345 ee = 1
1346 endif
1347
1348 nu3 = nu**3
1349 nv3 = nv**3
1350
1351 do e = e0,ee,es
1352 iu = (e-1)*nu3
1353 iv = (e-1)*nv3
1354
1355 do j = 1, nunu
1356 do i = 1, nv
1357 ii = i + nv * (j - 1)
1358 tmp = 0.0_rp
1359 do k = 1, nu
1360 kk = k + nu * (j - 1) + iu
1361 tmp = tmp + a(i,k) * v(kk)
1362 end do
1363 work(ii) = tmp
1364 end do
1365 end do
1366
1367 do i = 1, nu
1368 do j = 1, nv
1369 do l = 1, nv
1370 ii = l + nv * (j - 1) + nvnv * (i - 1)
1371 tmp = 0.0_rp
1372 do k = 1, nu
1373 jj = l + nv * (k - 1) + nvnu * (i - 1)
1374 tmp = tmp + work(jj) * bt(k,j)
1375 end do
1376 work2(ii) = tmp
1377 end do
1378 end do
1379 end do
1380
1381 do j = 1, nv
1382 do i = 1, nvnv
1383 jj = i + nvnv * (j - 1) + iv
1384 tmp = 0.0_rp
1385 do k = 1, nu
1386 ii = i + nvnv * (k - 1)
1387 tmp = tmp + work2(ii) * ct(k, j)
1388 end do
1389 v(jj) = tmp
1390 end do
1391 end do
1392 end do
1393
1394 end subroutine tnsr1_3d_nvnu_cpu
1395
1396 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1397 integer, parameter :: nu = 4
1398 integer, parameter :: nv = 2
1399 integer, parameter :: nunu = 16
1400 integer, parameter :: nvnu = 8
1401 integer, parameter :: nvnv = 4
1402 integer, parameter :: nununu = 64
1403 integer, parameter :: nvnvnv = 8
1404 integer, intent(in) :: nelv
1405 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1406 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1407 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1408 integer :: e, iu, iv
1409 integer :: i, j, k, l, ii, jj
1410 real(kind=rp) :: tmp
1411
1412 do e = 1,nelv
1413 iu = (e-1)*nununu
1414 iv = (e-1)*nvnvnv
1415
1416 do j = 1, nunu
1417 do i = 1, nv
1418 ii = i + nv * (j - 1)
1419 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1420 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1421 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1422 + a(i,4) * v(4 + nu * (j - 1) + iu)
1423 end do
1424 end do
1425
1426 do i = 1, nu
1427 do j = 1, nv
1428 do l = 1, nv
1429 ii = l + nv * (j - 1) + nvnv * (i - 1)
1430 tmp = 0.0_rp
1431 do k = 1, nu
1432 jj = l + nv * (k - 1) + nvnu * (i - 1)
1433 tmp = tmp + work(jj) * bt(k,j)
1434 end do
1435 work2(ii) = tmp
1436 end do
1437 end do
1438 end do
1439
1440 do j = 1, nv
1441 do i = 1, nvnv
1442 jj = i + nvnv * (j - 1) + iv
1443 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1444 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1445 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1446 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1447
1448 end do
1449 end do
1450 end do
1451
1452 end subroutine tnsr1_3d_nu4nv2_cpu
1453
1454end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)