Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else if (nv .eq. 1) then
59 select case (nu)
60 case (4)
61 call tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
62 case (6)
63 call tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
64 case (8)
65 call tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
66 case (10)
67 call tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
68 case (12)
69 call tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
70 case default
71 call tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
72 end select
73 else
74 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
75 end if
76
77 end subroutine tnsr3d_el_cpu
78
79 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
80 integer, intent(in) :: nv, nu
81 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
82 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
83 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
84 real(kind=rp) :: tmp
85 integer :: i, j, k, l, nunu, nvnu, nvnv
86 integer :: ii, jj
87 nvnu = nv * nu
88 nunu = nu * nu
89 nvnv = nv * nv
90
91 do j = 1, nunu
92 do i = 1, nv
93 ii = i + nv * (j - 1)
94 tmp = 0.0_rp
95 do k = 1, nu
96 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
97 end do
98 work(ii) = tmp
99 end do
100 end do
101
102 do i = 1, nu
103 do j = 1, nv
104 do l = 1, nv
105 ii = l + nv * (j - 1) + nvnv * (i - 1)
106 tmp = 0.0_rp
107 do k = 1, nu
108 jj = l + nv * (k - 1) + nvnu * (i - 1)
109 tmp = tmp + work(jj) * bt(k,j)
110 end do
111 work2(ii) = tmp
112 end do
113 end do
114 end do
115
116 do j = 1, nv
117 do i = 1, nvnv
118 jj = i + nvnv * (j - 1)
119 tmp = 0.0_rp
120 do k = 1, nu
121 ii = i + nvnv * (k - 1)
122 tmp = tmp + work2(ii) * ct(k, j)
123 end do
124 v(jj) = tmp
125 end do
126 end do
127
128 end subroutine tnsr3d_el_nvnu_cpu
129
130 subroutine tnsr3d_el_1_nu_cpu(v, u, nu, A, Bt, Ct)
131 integer, intent(in) :: nu
132 real(kind=rp), intent(inout) :: v(1)
133 real(kind=rp), intent(in) :: u(nu*nu*nu)
134 real(kind=rp), intent(in) :: a(1,nu),bt(nu, 1),ct(nu,1)
135 real(kind=rp) :: work(nu**2), work2(nu)
136 real(kind=rp) :: tmp
137 integer :: i, j, k, l, nunu
138 integer :: ii, jj
139 nunu = nu * nu
140
141 do j = 1, nunu
142 tmp = 0.0_rp
143 do k = 1, nu
144 tmp = tmp + a(1,k) * u(k + nu * (j - 1))
145 end do
146 work(j) = tmp
147 end do
148
149 do i = 1, nu
150 tmp = 0.0_rp
151 do k = 1, nu
152 jj = k + nu * (i - 1)
153 tmp = tmp + work(jj) * bt(k,1)
154 end do
155 work2(i) = tmp
156 end do
157
158 tmp = 0.0_rp
159 do k = 1, nu
160 tmp = tmp + work2(k) * ct(k, 1)
161 end do
162 v(1) = tmp
163
164 end subroutine tnsr3d_el_1_nu_cpu
165
166 subroutine tnsr3d_el_1_4_cpu(v, u, A, Bt, Ct)
167 integer, parameter :: n = 4
168 integer, parameter :: nn = n**2
169 real(kind=rp), intent(inout) :: v(1)
170 real(kind=rp), intent(in) :: u(n*n*n)
171 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
172 real(kind=rp) :: work(n**2), work2(n)
173 integer :: i, j, l
174 integer :: ii, jj
175
176 do j = 1, nn
177 work(j) = a(1,1) * u(1 + n * (j - 1)) &
178 + a(1,2) * u(2 + n * (j - 1)) &
179 + a(1,3) * u(3 + n * (j - 1)) &
180 + a(1,4) * u(4 + n * (j - 1))
181 end do
182
183 do i = 1, n
184 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
185 + work(2 + n * (i - 1)) * bt(2,1) &
186 + work(3 + n * (i - 1)) * bt(3,1) &
187 + work(4 + n * (i - 1)) * bt(4,1)
188 end do
189
190 v(1) = work2(1) * ct(1, 1) &
191 + work2(2) * ct(2, 1) &
192 + work2(3) * ct(3, 1) &
193 + work2(4) * ct(4, 1)
194
195 end subroutine tnsr3d_el_1_4_cpu
196
197 subroutine tnsr3d_el_1_6_cpu(v, u, A, Bt, Ct)
198 integer, parameter :: n = 6
199 integer, parameter :: nn = n**2
200 real(kind=rp), intent(inout) :: v(1)
201 real(kind=rp), intent(in) :: u(n*n*n)
202 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
203 real(kind=rp) :: work(n**2), work2(n)
204 integer :: i, j, l
205 integer :: ii, jj
206
207 do j = 1, nn
208 work(j) = a(1,1) * u(1 + n * (j - 1)) &
209 + a(1,2) * u(2 + n * (j - 1)) &
210 + a(1,3) * u(3 + n * (j - 1)) &
211 + a(1,4) * u(4 + n * (j - 1)) &
212 + a(1,5) * u(5 + n * (j - 1)) &
213 + a(1,6) * u(6 + n * (j - 1))
214 end do
215
216 do i = 1, n
217 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
218 + work(2 + n * (i - 1)) * bt(2,1) &
219 + work(3 + n * (i - 1)) * bt(3,1) &
220 + work(4 + n * (i - 1)) * bt(4,1) &
221 + work(5 + n * (i - 1)) * bt(5,1) &
222 + work(6 + n * (i - 1)) * bt(6,1)
223 end do
224
225 v(1) = work2(1) * ct(1, 1) &
226 + work2(2) * ct(2, 1) &
227 + work2(3) * ct(3, 1) &
228 + work2(4) * ct(4, 1) &
229 + work2(5) * ct(5, 1) &
230 + work2(6) * ct(6, 1)
231
232 end subroutine tnsr3d_el_1_6_cpu
233
234 subroutine tnsr3d_el_1_8_cpu(v, u, A, Bt, Ct)
235 integer, parameter :: n = 8
236 integer, parameter :: nn = n**2
237 real(kind=rp), intent(inout) :: v(1)
238 real(kind=rp), intent(in) :: u(n*n*n)
239 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
240 real(kind=rp) :: work(n**2), work2(n)
241 integer :: i, j, l
242 integer :: ii, jj
243
244 do j = 1, nn
245 work(j) = a(1,1) * u(1 + n * (j - 1)) &
246 + a(1,2) * u(2 + n * (j - 1)) &
247 + a(1,3) * u(3 + n * (j - 1)) &
248 + a(1,4) * u(4 + n * (j - 1)) &
249 + a(1,5) * u(5 + n * (j - 1)) &
250 + a(1,6) * u(6 + n * (j - 1)) &
251 + a(1,7) * u(7 + n * (j - 1)) &
252 + a(1,8) * u(8 + n * (j - 1))
253 end do
254
255 do i = 1, n
256 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
257 + work(2 + n * (i - 1)) * bt(2,1) &
258 + work(3 + n * (i - 1)) * bt(3,1) &
259 + work(4 + n * (i - 1)) * bt(4,1) &
260 + work(5 + n * (i - 1)) * bt(5,1) &
261 + work(6 + n * (i - 1)) * bt(6,1) &
262 + work(7 + n * (i - 1)) * bt(7,1) &
263 + work(8 + n * (i - 1)) * bt(8,1)
264 end do
265
266 v(1) = work2(1) * ct(1, 1) &
267 + work2(2) * ct(2, 1) &
268 + work2(3) * ct(3, 1) &
269 + work2(4) * ct(4, 1) &
270 + work2(5) * ct(5, 1) &
271 + work2(6) * ct(6, 1) &
272 + work2(7) * ct(7, 1) &
273 + work2(8) * ct(8, 1)
274
275
276 end subroutine tnsr3d_el_1_8_cpu
277
278
279 subroutine tnsr3d_el_1_10_cpu(v, u, A, Bt, Ct)
280 integer, parameter :: n = 10
281 integer, parameter :: nn = n**2
282 real(kind=rp), intent(inout) :: v(1)
283 real(kind=rp), intent(in) :: u(n*n*n)
284 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
285 real(kind=rp) :: work(n**2), work2(n)
286 integer :: i, j, l
287 integer :: ii, jj
288
289 do j = 1, nn
290 work(j) = a(1,1) * u(1 + n * (j - 1)) &
291 + a(1,2) * u(2 + n * (j - 1)) &
292 + a(1,3) * u(3 + n * (j - 1)) &
293 + a(1,4) * u(4 + n * (j - 1)) &
294 + a(1,5) * u(5 + n * (j - 1)) &
295 + a(1,6) * u(6 + n * (j - 1)) &
296 + a(1,7) * u(7 + n * (j - 1)) &
297 + a(1,8) * u(8 + n * (j - 1)) &
298 + a(1,9) * u(9 + n * (j - 1)) &
299 + a(1,10) * u(10 + n * (j - 1))
300 end do
301
302 do i = 1, n
303 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
304 + work(2 + n * (i - 1)) * bt(2,1) &
305 + work(3 + n * (i - 1)) * bt(3,1) &
306 + work(4 + n * (i - 1)) * bt(4,1) &
307 + work(5 + n * (i - 1)) * bt(5,1) &
308 + work(6 + n * (i - 1)) * bt(6,1) &
309 + work(7 + n * (i - 1)) * bt(7,1) &
310 + work(8 + n * (i - 1)) * bt(8,1) &
311 + work(9 + n * (i - 1)) * bt(9,1) &
312 + work(10 + n * (i - 1)) * bt(10,1)
313 end do
314
315 v(1) = work2(1) * ct(1, 1) &
316 + work2(2) * ct(2, 1) &
317 + work2(3) * ct(3, 1) &
318 + work2(4) * ct(4, 1) &
319 + work2(5) * ct(5, 1) &
320 + work2(6) * ct(6, 1) &
321 + work2(7) * ct(7, 1) &
322 + work2(8) * ct(8, 1) &
323 + work2(9) * ct(9, 1) &
324 + work2(10) * ct(10, 1)
325
326 end subroutine tnsr3d_el_1_10_cpu
327
328
329 subroutine tnsr3d_el_1_12_cpu(v, u, A, Bt, Ct)
330 integer, parameter :: n = 12
331 integer, parameter :: nn = n**2
332 real(kind=rp), intent(inout) :: v(1)
333 real(kind=rp), intent(in) :: u(n*n*n)
334 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
335 real(kind=rp) :: work(n**2), work2(n)
336 integer :: i, j, l
337 integer :: ii, jj
338
339 do j = 1, nn
340 work(j) = a(1,1) * u(1 + n * (j - 1)) &
341 + a(1,2) * u(2 + n * (j - 1)) &
342 + a(1,3) * u(3 + n * (j - 1)) &
343 + a(1,4) * u(4 + n * (j - 1)) &
344 + a(1,5) * u(5 + n * (j - 1)) &
345 + a(1,6) * u(6 + n * (j - 1)) &
346 + a(1,7) * u(7 + n * (j - 1)) &
347 + a(1,8) * u(8 + n * (j - 1)) &
348 + a(1,9) * u(9 + n * (j - 1)) &
349 + a(1,10) * u(10 + n * (j - 1)) &
350 + a(1,11) * u(11 + n * (j - 1)) &
351 + a(1,12) * u(12 + n * (j - 1))
352 end do
353
354 do i = 1, n
355 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
356 + work(2 + n * (i - 1)) * bt(2,1) &
357 + work(3 + n * (i - 1)) * bt(3,1) &
358 + work(4 + n * (i - 1)) * bt(4,1) &
359 + work(5 + n * (i - 1)) * bt(5,1) &
360 + work(6 + n * (i - 1)) * bt(6,1) &
361 + work(7 + n * (i - 1)) * bt(7,1) &
362 + work(8 + n * (i - 1)) * bt(8,1) &
363 + work(9 + n * (i - 1)) * bt(9,1) &
364 + work(10 + n * (i - 1)) * bt(10,1) &
365 + work(11 + n * (i - 1)) * bt(11,1) &
366 + work(12 + n * (i - 1)) * bt(12,1)
367 end do
368
369 v(1) = work2(1) * ct(1, 1) &
370 + work2(2) * ct(2, 1) &
371 + work2(3) * ct(3, 1) &
372 + work2(4) * ct(4, 1) &
373 + work2(5) * ct(5, 1) &
374 + work2(6) * ct(6, 1) &
375 + work2(7) * ct(7, 1) &
376 + work2(8) * ct(8, 1) &
377 + work2(9) * ct(9, 1) &
378 + work2(10) * ct(10, 1) &
379 + work2(11) * ct(11, 1) &
380 + work2(12) * ct(12, 1)
381
382 end subroutine tnsr3d_el_1_12_cpu
383
384
385
386 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
387 integer, intent(in) :: n
388 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
389 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
390 real(kind=rp) :: work(n**3), work2(n**3), tmp
391 integer :: i, j, l, k
392 integer :: ii, jj, nn
393
394 nn = n**2
395
396 do j = 1, nn
397 do i = 1, n
398 ii = i + n * (j - 1)
399 tmp = 0.0_rp
400 do k = 1, n
401 tmp = tmp + a(i,k) * u(k + n * (j - 1))
402 end do
403 work(ii) = tmp
404 end do
405 end do
406
407 do i = 1, n
408 do j = 1, n
409 do l = 1, n
410 ii = l + n * (j - 1) + nn * (i - 1)
411 tmp = 0.0_rp
412 do k = 1, n
413 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
414 end do
415 work2(ii) = tmp
416 end do
417 end do
418 end do
419
420 do j = 1, n
421 do i = 1, nn
422 jj = i + nn * (j - 1)
423 tmp = 0.0_rp
424 do k = 1, n
425 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
426 end do
427 v(jj) = tmp
428 end do
429 end do
430
431 end subroutine tnsr3d_el_n_cpu
432
433 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
434 integer, parameter :: n = 14
435 integer, parameter :: nn = n**2
436 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
437 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
438 real(kind=rp) :: work(n**3), work2(n**3)
439 integer :: i, j, l
440 integer :: ii, jj
441
442 do j = 1, nn
443 do i = 1, n
444 ii = i + n * (j - 1)
445 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
446 + a(i,2) * u(2 + n * (j - 1)) &
447 + a(i,3) * u(3 + n * (j - 1)) &
448 + a(i,4) * u(4 + n * (j - 1)) &
449 + a(i,5) * u(5 + n * (j - 1)) &
450 + a(i,6) * u(6 + n * (j - 1)) &
451 + a(i,7) * u(7 + n * (j - 1)) &
452 + a(i,8) * u(8 + n * (j - 1)) &
453 + a(i,9) * u(9 + n * (j - 1)) &
454 + a(i,10) * u(10 + n * (j - 1)) &
455 + a(i,11) * u(11 + n * (j - 1)) &
456 + a(i,12) * u(12 + n * (j - 1)) &
457 + a(i,13) * u(13 + n * (j - 1)) &
458 + a(i,14) * u(14 + n * (j - 1))
459 end do
460 end do
461
462 do i = 1, n
463 do j = 1, n
464 do l = 1, n
465 ii = l + n * (j - 1) + nn * (i - 1)
466 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
467 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
468 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
469 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
470 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
471 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
472 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
473 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
474 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
475 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
476 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
477 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
478 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
479 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
480 end do
481 end do
482 end do
483
484 do j = 1, n
485 do i = 1, nn
486 jj = i + nn * (j - 1)
487 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
488 + work2(i + nn * (2 - 1)) * ct(2, j) &
489 + work2(i + nn * (3 - 1)) * ct(3, j) &
490 + work2(i + nn * (4 - 1)) * ct(4, j) &
491 + work2(i + nn * (5 - 1)) * ct(5, j) &
492 + work2(i + nn * (6 - 1)) * ct(6, j) &
493 + work2(i + nn * (7 - 1)) * ct(7, j) &
494 + work2(i + nn * (8 - 1)) * ct(8, j) &
495 + work2(i + nn * (9 - 1)) * ct(9, j) &
496 + work2(i + nn * (10 - 1)) * ct(10, j) &
497 + work2(i + nn * (11 - 1)) * ct(11, j) &
498 + work2(i + nn * (12 - 1)) * ct(12, j) &
499 + work2(i + nn * (13 - 1)) * ct(13, j) &
500 + work2(i + nn * (14 - 1)) * ct(14, j)
501 end do
502 end do
503
504 end subroutine tnsr3d_el_n14_cpu
505
506 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
507 integer, parameter :: n = 13
508 integer, parameter :: nn = n**2
509 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
510 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
511 real(kind=rp) :: work(n**3), work2(n**3)
512 integer :: i, j, l
513 integer :: ii, jj
514
515 do j = 1, nn
516 do i = 1, n
517 ii = i + n * (j - 1)
518 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
519 + a(i,2) * u(2 + n * (j - 1)) &
520 + a(i,3) * u(3 + n * (j - 1)) &
521 + a(i,4) * u(4 + n * (j - 1)) &
522 + a(i,5) * u(5 + n * (j - 1)) &
523 + a(i,6) * u(6 + n * (j - 1)) &
524 + a(i,7) * u(7 + n * (j - 1)) &
525 + a(i,8) * u(8 + n * (j - 1)) &
526 + a(i,9) * u(9 + n * (j - 1)) &
527 + a(i,10) * u(10 + n * (j - 1)) &
528 + a(i,11) * u(11 + n * (j - 1)) &
529 + a(i,12) * u(12 + n * (j - 1)) &
530 + a(i,13) * u(13 + n * (j - 1))
531 end do
532 end do
533
534 do i = 1, n
535 do j = 1, n
536 do l = 1, n
537 ii = l + n * (j - 1) + nn * (i - 1)
538 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
539 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
540 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
541 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
542 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
543 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
544 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
545 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
546 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
547 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
548 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
549 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
550 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
551 end do
552 end do
553 end do
554
555 do j = 1, n
556 do i = 1, nn
557 jj = i + nn * (j - 1)
558 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
559 + work2(i + nn * (2 - 1)) * ct(2, j) &
560 + work2(i + nn * (3 - 1)) * ct(3, j) &
561 + work2(i + nn * (4 - 1)) * ct(4, j) &
562 + work2(i + nn * (5 - 1)) * ct(5, j) &
563 + work2(i + nn * (6 - 1)) * ct(6, j) &
564 + work2(i + nn * (7 - 1)) * ct(7, j) &
565 + work2(i + nn * (8 - 1)) * ct(8, j) &
566 + work2(i + nn * (9 - 1)) * ct(9, j) &
567 + work2(i + nn * (10 - 1)) * ct(10, j) &
568 + work2(i + nn * (11 - 1)) * ct(11, j) &
569 + work2(i + nn * (12 - 1)) * ct(12, j) &
570 + work2(i + nn * (13 - 1)) * ct(13, j)
571 end do
572 end do
573
574 end subroutine tnsr3d_el_n13_cpu
575
576 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
577 integer, parameter :: n = 12
578 integer, parameter :: nn = n**2
579 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
580 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
581 real(kind=rp) :: work(n**3), work2(n**3)
582 integer :: i, j, l
583 integer :: ii, jj
584
585 do j = 1, nn
586 do i = 1, n
587 ii = i + n * (j - 1)
588 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
589 + a(i,2) * u(2 + n * (j - 1)) &
590 + a(i,3) * u(3 + n * (j - 1)) &
591 + a(i,4) * u(4 + n * (j - 1)) &
592 + a(i,5) * u(5 + n * (j - 1)) &
593 + a(i,6) * u(6 + n * (j - 1)) &
594 + a(i,7) * u(7 + n * (j - 1)) &
595 + a(i,8) * u(8 + n * (j - 1)) &
596 + a(i,9) * u(9 + n * (j - 1)) &
597 + a(i,10) * u(10 + n * (j - 1)) &
598 + a(i,11) * u(11 + n * (j - 1)) &
599 + a(i,12) * u(12 + n * (j - 1))
600 end do
601 end do
602
603 do i = 1, n
604 do j = 1, n
605 do l = 1, n
606 ii = l + n * (j - 1) + nn * (i - 1)
607 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
608 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
609 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
610 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
611 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
612 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
613 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
614 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
615 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
616 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
617 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
618 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
619 end do
620 end do
621 end do
622
623 do j = 1, n
624 do i = 1, nn
625 jj = i + nn * (j - 1)
626 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
627 + work2(i + nn * (2 - 1)) * ct(2, j) &
628 + work2(i + nn * (3 - 1)) * ct(3, j) &
629 + work2(i + nn * (4 - 1)) * ct(4, j) &
630 + work2(i + nn * (5 - 1)) * ct(5, j) &
631 + work2(i + nn * (6 - 1)) * ct(6, j) &
632 + work2(i + nn * (7 - 1)) * ct(7, j) &
633 + work2(i + nn * (8 - 1)) * ct(8, j) &
634 + work2(i + nn * (9 - 1)) * ct(9, j) &
635 + work2(i + nn * (10 - 1)) * ct(10, j) &
636 + work2(i + nn * (11 - 1)) * ct(11, j) &
637 + work2(i + nn * (12 - 1)) * ct(12, j)
638 end do
639 end do
640
641 end subroutine tnsr3d_el_n12_cpu
642
643 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
644 integer, parameter :: n = 11
645 integer, parameter :: nn = n**2
646 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
647 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
648 real(kind=rp) :: work(n**3), work2(n**3)
649 integer :: i, j, l
650 integer :: ii, jj
651
652 do j = 1, nn
653 do i = 1, n
654 ii = i + n * (j - 1)
655 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
656 + a(i,2) * u(2 + n * (j - 1)) &
657 + a(i,3) * u(3 + n * (j - 1)) &
658 + a(i,4) * u(4 + n * (j - 1)) &
659 + a(i,5) * u(5 + n * (j - 1)) &
660 + a(i,6) * u(6 + n * (j - 1)) &
661 + a(i,7) * u(7 + n * (j - 1)) &
662 + a(i,8) * u(8 + n * (j - 1)) &
663 + a(i,9) * u(9 + n * (j - 1)) &
664 + a(i,10) * u(10 + n * (j - 1)) &
665 + a(i,11) * u(11 + n * (j - 1))
666 end do
667 end do
668
669 do i = 1, n
670 do j = 1, n
671 do l = 1, n
672 ii = l + n * (j - 1) + nn * (i - 1)
673 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
674 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
675 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
676 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
677 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
678 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
679 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
680 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
681 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
682 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
683 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
684 end do
685 end do
686 end do
687
688 do j = 1, n
689 do i = 1, nn
690 jj = i + nn * (j - 1)
691 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
692 + work2(i + nn * (2 - 1)) * ct(2, j) &
693 + work2(i + nn * (3 - 1)) * ct(3, j) &
694 + work2(i + nn * (4 - 1)) * ct(4, j) &
695 + work2(i + nn * (5 - 1)) * ct(5, j) &
696 + work2(i + nn * (6 - 1)) * ct(6, j) &
697 + work2(i + nn * (7 - 1)) * ct(7, j) &
698 + work2(i + nn * (8 - 1)) * ct(8, j) &
699 + work2(i + nn * (9 - 1)) * ct(9, j) &
700 + work2(i + nn * (10 - 1)) * ct(10, j) &
701 + work2(i + nn * (11 - 1)) * ct(11, j)
702 end do
703 end do
704
705 end subroutine tnsr3d_el_n11_cpu
706
707 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
708 integer, parameter :: n = 10
709 integer, parameter :: nn = n**2
710 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
711 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
712 real(kind=rp) :: work(n**3), work2(n**3)
713 integer :: i, j, l
714 integer :: ii, jj
715
716 do j = 1, nn
717 do i = 1, n
718 ii = i + n * (j - 1)
719 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
720 + a(i,2) * u(2 + n * (j - 1)) &
721 + a(i,3) * u(3 + n * (j - 1)) &
722 + a(i,4) * u(4 + n * (j - 1)) &
723 + a(i,5) * u(5 + n * (j - 1)) &
724 + a(i,6) * u(6 + n * (j - 1)) &
725 + a(i,7) * u(7 + n * (j - 1)) &
726 + a(i,8) * u(8 + n * (j - 1)) &
727 + a(i,9) * u(9 + n * (j - 1)) &
728 + a(i,10) * u(10 + n * (j - 1))
729 end do
730 end do
731
732 do i = 1, n
733 do j = 1, n
734 do l = 1, n
735 ii = l + n * (j - 1) + nn * (i - 1)
736 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
737 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
738 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
739 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
740 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
741 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
742 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
743 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
744 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
745 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
746 end do
747 end do
748 end do
749
750 do j = 1, n
751 do i = 1, nn
752 jj = i + nn * (j - 1)
753 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
754 + work2(i + nn * (2 - 1)) * ct(2, j) &
755 + work2(i + nn * (3 - 1)) * ct(3, j) &
756 + work2(i + nn * (4 - 1)) * ct(4, j) &
757 + work2(i + nn * (5 - 1)) * ct(5, j) &
758 + work2(i + nn * (6 - 1)) * ct(6, j) &
759 + work2(i + nn * (7 - 1)) * ct(7, j) &
760 + work2(i + nn * (8 - 1)) * ct(8, j) &
761 + work2(i + nn * (9 - 1)) * ct(9, j) &
762 + work2(i + nn * (10 - 1)) * ct(10, j)
763 end do
764 end do
765
766 end subroutine tnsr3d_el_n10_cpu
767
768 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
769 integer, parameter :: n = 9
770 integer, parameter :: nn = n**2
771 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
772 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
773 real(kind=rp) :: work(n**3), work2(n**3)
774 integer :: i, j, l
775 integer :: ii, jj
776
777 do j = 1, nn
778 do i = 1, n
779 ii = i + n * (j - 1)
780 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
781 + a(i,2) * u(2 + n * (j - 1)) &
782 + a(i,3) * u(3 + n * (j - 1)) &
783 + a(i,4) * u(4 + n * (j - 1)) &
784 + a(i,5) * u(5 + n * (j - 1)) &
785 + a(i,6) * u(6 + n * (j - 1)) &
786 + a(i,7) * u(7 + n * (j - 1)) &
787 + a(i,8) * u(8 + n * (j - 1)) &
788 + a(i,9) * u(9 + n * (j - 1))
789 end do
790 end do
791
792 do i = 1, n
793 do j = 1, n
794 do l = 1, n
795 ii = l + n * (j - 1) + nn * (i - 1)
796 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
797 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
798 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
799 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
800 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
801 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
802 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
803 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
804 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
805 end do
806 end do
807 end do
808
809 do j = 1, n
810 do i = 1, nn
811 jj = i + nn * (j - 1)
812 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
813 + work2(i + nn * (2 - 1)) * ct(2, j) &
814 + work2(i + nn * (3 - 1)) * ct(3, j) &
815 + work2(i + nn * (4 - 1)) * ct(4, j) &
816 + work2(i + nn * (5 - 1)) * ct(5, j) &
817 + work2(i + nn * (6 - 1)) * ct(6, j) &
818 + work2(i + nn * (7 - 1)) * ct(7, j) &
819 + work2(i + nn * (8 - 1)) * ct(8, j) &
820 + work2(i + nn * (9 - 1)) * ct(9, j)
821 end do
822 end do
823
824 end subroutine tnsr3d_el_n9_cpu
825
826 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
827 integer, parameter :: n = 8
828 integer, parameter :: nn = n**2
829 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
830 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
831 real(kind=rp) :: work(n**3), work2(n**3)
832 integer :: i, j, l
833 integer :: ii, jj
834
835 do j = 1, nn
836 do i = 1, n
837 ii = i + n * (j - 1)
838 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
839 + a(i,2) * u(2 + n * (j - 1)) &
840 + a(i,3) * u(3 + n * (j - 1)) &
841 + a(i,4) * u(4 + n * (j - 1)) &
842 + a(i,5) * u(5 + n * (j - 1)) &
843 + a(i,6) * u(6 + n * (j - 1)) &
844 + a(i,7) * u(7 + n * (j - 1)) &
845 + a(i,8) * u(8 + n * (j - 1))
846 end do
847 end do
848
849 do i = 1, n
850 do j = 1, n
851 do l = 1, n
852 ii = l + n * (j - 1) + nn * (i - 1)
853 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
854 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
855 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
856 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
857 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
858 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
859 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
860 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
861 end do
862 end do
863 end do
864
865 do j = 1, n
866 do i = 1, nn
867 jj = i + nn * (j - 1)
868 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
869 + work2(i + nn * (2 - 1)) * ct(2, j) &
870 + work2(i + nn * (3 - 1)) * ct(3, j) &
871 + work2(i + nn * (4 - 1)) * ct(4, j) &
872 + work2(i + nn * (5 - 1)) * ct(5, j) &
873 + work2(i + nn * (6 - 1)) * ct(6, j) &
874 + work2(i + nn * (7 - 1)) * ct(7, j) &
875 + work2(i + nn * (8 - 1)) * ct(8, j)
876 end do
877 end do
878
879 end subroutine tnsr3d_el_n8_cpu
880
881 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
882 integer, parameter :: n = 7
883 integer, parameter :: nn = n**2
884 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
885 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
886 real(kind=rp) :: work(n**3), work2(n**3)
887 integer :: i, j, l
888 integer :: ii, jj
889
890 do j = 1, nn
891 do i = 1, n
892 ii = i + n * (j - 1)
893 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
894 + a(i,2) * u(2 + n * (j - 1)) &
895 + a(i,3) * u(3 + n * (j - 1)) &
896 + a(i,4) * u(4 + n * (j - 1)) &
897 + a(i,5) * u(5 + n * (j - 1)) &
898 + a(i,6) * u(6 + n * (j - 1)) &
899 + a(i,7) * u(7 + n * (j - 1))
900 end do
901 end do
902
903 do i = 1, n
904 do j = 1, n
905 do l = 1, n
906 ii = l + n * (j - 1) + nn * (i - 1)
907 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
908 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
909 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
910 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
911 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
912 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
913 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
914 end do
915 end do
916 end do
917
918 do j = 1, n
919 do i = 1, nn
920 jj = i + nn * (j - 1)
921 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
922 + work2(i + nn * (2 - 1)) * ct(2, j) &
923 + work2(i + nn * (3 - 1)) * ct(3, j) &
924 + work2(i + nn * (4 - 1)) * ct(4, j) &
925 + work2(i + nn * (5 - 1)) * ct(5, j) &
926 + work2(i + nn * (6 - 1)) * ct(6, j) &
927 + work2(i + nn * (7 - 1)) * ct(7, j)
928 end do
929 end do
930
931 end subroutine tnsr3d_el_n7_cpu
932
933 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
934 integer, parameter :: n = 6
935 integer, parameter :: nn = n**2
936 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
937 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
938 real(kind=rp) :: work(n**3), work2(n**3)
939 integer :: i, j, l
940 integer :: ii, jj
941
942 do j = 1, nn
943 do i = 1, n
944 ii = i + n * (j - 1)
945 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
946 + a(i,2) * u(2 + n * (j - 1)) &
947 + a(i,3) * u(3 + n * (j - 1)) &
948 + a(i,4) * u(4 + n * (j - 1)) &
949 + a(i,5) * u(5 + n * (j - 1)) &
950 + a(i,6) * u(6 + n * (j - 1))
951 end do
952 end do
953
954 do i = 1, n
955 do j = 1, n
956 do l = 1, n
957 ii = l + n * (j - 1) + nn * (i - 1)
958 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
959 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
960 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
961 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
962 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
963 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
964 end do
965 end do
966 end do
967
968 do j = 1, n
969 do i = 1, nn
970 jj = i + nn * (j - 1)
971 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
972 + work2(i + nn * (2 - 1)) * ct(2, j) &
973 + work2(i + nn * (3 - 1)) * ct(3, j) &
974 + work2(i + nn * (4 - 1)) * ct(4, j) &
975 + work2(i + nn * (5 - 1)) * ct(5, j) &
976 + work2(i + nn * (6 - 1)) * ct(6, j)
977 end do
978 end do
979
980 end subroutine tnsr3d_el_n6_cpu
981
982 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
983 integer, parameter :: n = 5
984 integer, parameter :: nn = n**2
985 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
986 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
987 real(kind=rp) :: work(n**3), work2(n**3)
988 integer :: i, j, l
989 integer :: ii, jj
990
991 do j = 1, nn
992 do i = 1, n
993 ii = i + n * (j - 1)
994 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
995 + a(i,2) * u(2 + n * (j - 1)) &
996 + a(i,3) * u(3 + n * (j - 1)) &
997 + a(i,4) * u(4 + n * (j - 1)) &
998 + a(i,5) * u(5 + n * (j - 1))
999 end do
1000 end do
1001
1002 do i = 1, n
1003 do j = 1, n
1004 do l = 1, n
1005 ii = l + n * (j - 1) + nn * (i - 1)
1006 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1007 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1008 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1009 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
1010 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
1011 end do
1012 end do
1013 end do
1014
1015 do j = 1, n
1016 do i = 1, nn
1017 jj = i + nn * (j - 1)
1018 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1019 + work2(i + nn * (2 - 1)) * ct(2, j) &
1020 + work2(i + nn * (3 - 1)) * ct(3, j) &
1021 + work2(i + nn * (4 - 1)) * ct(4, j) &
1022 + work2(i + nn * (5 - 1)) * ct(5, j)
1023 end do
1024 end do
1025
1026 end subroutine tnsr3d_el_n5_cpu
1027
1028 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
1029 integer, parameter :: n = 4
1030 integer, parameter :: nn = n**2
1031 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1032 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1033 real(kind=rp) :: work(n**3), work2(n**3)
1034 integer :: i, j, l
1035 integer :: ii, jj
1036
1037 do j = 1, nn
1038 do i = 1, n
1039 ii = i + n * (j - 1)
1040 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1041 + a(i,2) * u(2 + n * (j - 1)) &
1042 + a(i,3) * u(3 + n * (j - 1)) &
1043 + a(i,4) * u(4 + n * (j - 1))
1044 end do
1045 end do
1046
1047 do i = 1, n
1048 do j = 1, n
1049 do l = 1, n
1050 ii = l + n * (j - 1) + nn * (i - 1)
1051 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1052 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1053 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1054 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
1055 end do
1056 end do
1057 end do
1058
1059 do j = 1, n
1060 do i = 1, nn
1061 jj = i + nn * (j - 1)
1062 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1063 + work2(i + nn * (2 - 1)) * ct(2, j) &
1064 + work2(i + nn * (3 - 1)) * ct(3, j) &
1065 + work2(i + nn * (4 - 1)) * ct(4, j)
1066 end do
1067 end do
1068
1069 end subroutine tnsr3d_el_n4_cpu
1070
1071 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
1072 integer, parameter :: n = 3
1073 integer, parameter :: nn = n**2
1074 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1075 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1076 real(kind=rp) :: work(n**3), work2(n**3)
1077 integer :: i, j, l
1078 integer :: ii, jj
1079
1080 do j = 1, nn
1081 do i = 1, n
1082 ii = i + n * (j - 1)
1083 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1084 + a(i,2) * u(2 + n * (j - 1)) &
1085 + a(i,3) * u(3 + n * (j - 1))
1086 end do
1087 end do
1088
1089 do i = 1, n
1090 do j = 1, n
1091 do l = 1, n
1092 ii = l + n * (j - 1) + nn * (i - 1)
1093 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1094 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1095 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
1096 end do
1097 end do
1098 end do
1099
1100 do j = 1, n
1101 do i = 1, nn
1102 jj = i + nn * (j - 1)
1103 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1104 + work2(i + nn * (2 - 1)) * ct(2, j) &
1105 + work2(i + nn * (3 - 1)) * ct(3, j)
1106 end do
1107 end do
1108
1109 end subroutine tnsr3d_el_n3_cpu
1110
1111 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
1112 integer, parameter :: n = 2
1113 integer, parameter :: nn = n**2
1114 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1115 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1116 real(kind=rp) :: work(n**3), work2(n**3)
1117 integer :: i, j, l
1118 integer :: ii, jj
1119
1120 do j = 1, nn
1121 do i = 1, n
1122 ii = i + n * (j - 1)
1123 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1124 + a(i,2) * u(2 + n * (j - 1))
1125 end do
1126 end do
1127
1128 do i = 1, n
1129 do j = 1, n
1130 do l = 1, n
1131 ii = l + n * (j - 1) + nn * (i - 1)
1132 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1133 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
1134 end do
1135 end do
1136 end do
1137
1138 do j = 1, n
1139 do i = 1, nn
1140 jj = i + nn * (j - 1)
1141 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1142 + work2(i + nn * (2 - 1)) * ct(2, j)
1143 end do
1144 end do
1145
1146 end subroutine tnsr3d_el_n2_cpu
1147
1148 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1149 integer, intent(in) :: nv, nu, nelv
1150 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1151 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1152 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1153
1154 if (nu .eq. 2 .and. nv .eq. 4) then
1155 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
1156 else if (nu .eq. 4) then
1157 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
1158 else
1159 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
1160 end if
1161
1162 end subroutine tnsr3d_cpu
1163
1164 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1165 integer, intent(in) :: nv, nu, nelv
1166 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1167 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1168 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1169 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1170 integer :: ie, i, j, k, l, ii, jj
1171 integer :: nunu, nvnu, nvnv
1172
1173 nvnu = nv * nu
1174 nunu = nu * nu
1175 nvnv = nv * nv
1176
1177 do ie = 1,nelv
1178 do j = 1, nunu
1179 do i = 1, nv
1180 ii = i + nv * (j - 1)
1181 tmp = 0.0_rp
1182 do k = 1, nu
1183 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
1184 end do
1185 work(ii) = tmp
1186 end do
1187 end do
1188
1189 do i = 1, nu
1190 do j = 1, nv
1191 do l = 1, nv
1192 ii = l + nv * (j - 1) + nvnv * (i - 1)
1193 tmp = 0.0_rp
1194 do k = 1, nu
1195 jj = l + nv * (k - 1) + nvnu * (i - 1)
1196 tmp = tmp + work(jj) * bt(k,j)
1197 end do
1198 work2(ii) = tmp
1199 end do
1200 end do
1201 end do
1202
1203 do j = 1, nv
1204 do i = 1, nvnv
1205 jj = i + nvnv * (j - 1)
1206 tmp = 0.0_rp
1207 do k = 1, nu
1208 ii = i + nvnv * (k - 1)
1209 tmp = tmp + work2(ii) * ct(k, j)
1210 end do
1211 v(jj, ie) = tmp
1212 end do
1213 end do
1214 end do
1215
1216 end subroutine tnsr3d_nvnu_cpu
1217
1218 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
1219 integer, parameter :: nu = 2
1220 integer, parameter :: nv = 4
1221 integer, parameter :: nunu = 4
1222 integer, parameter :: nvnu = 8
1223 integer, parameter :: nvnv = 16
1224 integer, intent(in) :: nelv
1225 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1226 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1227 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1228 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1229 integer :: ie, i, j, k, l, ii, jj
1230
1231 do ie = 1,nelv
1232 do j = 1, nunu
1233 do i = 1, nv
1234 ii = i + nv * (j - 1)
1235 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1236 + a(i,2) * u(2 + nu * (j - 1), ie)
1237 end do
1238 end do
1239
1240 do i = 1, nu
1241 do j = 1, nv
1242 do l = 1, nv
1243 ii = l + nv * (j - 1) + nvnv * (i - 1)
1244 tmp = 0.0_rp
1245 do k = 1, nu
1246 jj = l + nv * (k - 1) + nvnu * (i - 1)
1247 tmp = tmp + work(jj) * bt(k,j)
1248 end do
1249 work2(ii) = tmp
1250 end do
1251 end do
1252 end do
1253
1254 do j = 1, nv
1255 do i = 1, nvnv
1256 jj = i + nvnv * (j - 1)
1257 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1258 + work2(i + nvnv * (2 - 1)) * ct(2, j)
1259 end do
1260 end do
1261 end do
1262
1263 end subroutine tnsr3d_nu2nv4_cpu
1264
1265 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
1266 integer, parameter :: nu = 4
1267 integer, parameter :: nunu = 16
1268 integer, intent(in) :: nv, nelv
1269 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
1270 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1271 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1272 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1273 integer :: ie, i, j, k, l, ii, jj
1274 integer :: nvnu, nvnv
1275
1276 nvnu = nv * nu
1277 nvnv = nv * nv
1278
1279 do ie = 1,nelv
1280 do j = 1, nunu
1281 do i = 1, nv
1282 ii = i + nv * (j - 1)
1283 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1284 + a(i,2) * u(2 + nu * (j - 1), ie) &
1285 + a(i,3) * u(3 + nu * (j - 1), ie) &
1286 + a(i,4) * u(4 + nu * (j - 1), ie)
1287 end do
1288 end do
1289
1290 do i = 1, nu
1291 do j = 1, nv
1292 do l = 1, nv
1293 ii = l + nv * (j - 1) + nvnv * (i - 1)
1294 tmp = 0.0_rp
1295 do k = 1, nu
1296 jj = l + nv * (k - 1) + nvnu * (i - 1)
1297 tmp = tmp + work(jj) * bt(k,j)
1298 end do
1299 work2(ii) = tmp
1300 end do
1301 end do
1302 end do
1303
1304 do j = 1, nv
1305 do i = 1, nvnv
1306 jj = i + nvnv * (j - 1)
1307 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1308 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1309 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1310 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1311 end do
1312 end do
1313 end do
1314
1315 end subroutine tnsr3d_nu4_cpu
1316
1317 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1318 integer, intent(in) :: nv, nu, nelv
1319 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1320 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1321
1322 if (nu .eq. 4 .and. nv .eq. 2) then
1323 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1324 else
1325 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1326 end if
1327
1328 end subroutine tnsr1_3d_cpu
1329
1330 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1331 integer, intent(in) :: nv, nu, nelv
1332 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1333 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1334 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1335 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1336 integer :: i, j, k, l, ii, jj, kk
1337 integer :: nunu, nvnu, nvnv
1338 real(kind=rp) :: tmp
1339
1340 nvnu = nv * nu
1341 nunu = nu * nu
1342 nvnv = nv * nv
1343
1344 e0 = 1
1345 es = 1
1346 ee = nelv
1347
1348 if (nv.gt.nu) then
1349 e0 = nelv
1350 es = -1
1351 ee = 1
1352 endif
1353
1354 nu3 = nu**3
1355 nv3 = nv**3
1356
1357 do e = e0,ee,es
1358 iu = (e-1)*nu3
1359 iv = (e-1)*nv3
1360
1361 do j = 1, nunu
1362 do i = 1, nv
1363 ii = i + nv * (j - 1)
1364 tmp = 0.0_rp
1365 do k = 1, nu
1366 kk = k + nu * (j - 1) + iu
1367 tmp = tmp + a(i,k) * v(kk)
1368 end do
1369 work(ii) = tmp
1370 end do
1371 end do
1372
1373 do i = 1, nu
1374 do j = 1, nv
1375 do l = 1, nv
1376 ii = l + nv * (j - 1) + nvnv * (i - 1)
1377 tmp = 0.0_rp
1378 do k = 1, nu
1379 jj = l + nv * (k - 1) + nvnu * (i - 1)
1380 tmp = tmp + work(jj) * bt(k,j)
1381 end do
1382 work2(ii) = tmp
1383 end do
1384 end do
1385 end do
1386
1387 do j = 1, nv
1388 do i = 1, nvnv
1389 jj = i + nvnv * (j - 1) + iv
1390 tmp = 0.0_rp
1391 do k = 1, nu
1392 ii = i + nvnv * (k - 1)
1393 tmp = tmp + work2(ii) * ct(k, j)
1394 end do
1395 v(jj) = tmp
1396 end do
1397 end do
1398 end do
1399
1400 end subroutine tnsr1_3d_nvnu_cpu
1401
1402 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1403 integer, parameter :: nu = 4
1404 integer, parameter :: nv = 2
1405 integer, parameter :: nunu = 16
1406 integer, parameter :: nvnu = 8
1407 integer, parameter :: nvnv = 4
1408 integer, parameter :: nununu = 64
1409 integer, parameter :: nvnvnv = 8
1410 integer, intent(in) :: nelv
1411 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1412 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1413 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1414 integer :: e, iu, iv
1415 integer :: i, j, k, l, ii, jj
1416 real(kind=rp) :: tmp
1417
1418 do e = 1,nelv
1419 iu = (e-1)*nununu
1420 iv = (e-1)*nvnvnv
1421
1422 do j = 1, nunu
1423 do i = 1, nv
1424 ii = i + nv * (j - 1)
1425 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1426 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1427 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1428 + a(i,4) * v(4 + nu * (j - 1) + iu)
1429 end do
1430 end do
1431
1432 do i = 1, nu
1433 do j = 1, nv
1434 do l = 1, nv
1435 ii = l + nv * (j - 1) + nvnv * (i - 1)
1436 tmp = 0.0_rp
1437 do k = 1, nu
1438 jj = l + nv * (k - 1) + nvnu * (i - 1)
1439 tmp = tmp + work(jj) * bt(k,j)
1440 end do
1441 work2(ii) = tmp
1442 end do
1443 end do
1444 end do
1445
1446 do j = 1, nv
1447 do i = 1, nvnv
1448 jj = i + nvnv * (j - 1) + iv
1449 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1450 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1451 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1452 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1453
1454 end do
1455 end do
1456 end do
1457
1458 end subroutine tnsr1_3d_nu4nv2_cpu
1459
1460end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)