Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else if (nv .eq. 1) then
59 select case (nu)
60 case (4)
61 call tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
62 case (6)
63 call tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
64 case (8)
65 call tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
66 case (10)
67 call tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
68 case (12)
69 call tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
70 case default
71 call tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
72 end select
73 else
74 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
75 end if
76
77 end subroutine tnsr3d_el_cpu
78
79 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
80 integer, intent(in) :: nv, nu
81 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
82 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
83 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
84 real(kind=rp) :: tmp
85 integer :: i, j, k, l, nunu, nvnu, nvnv
86 integer :: ii, jj
87 nvnu = nv * nu
88 nunu = nu * nu
89 nvnv = nv * nv
90
91 do j = 1, nunu
92 do i = 1, nv
93 ii = i + nv * (j - 1)
94 tmp = 0.0_rp
95 do k = 1, nu
96 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
97 end do
98 work(ii) = tmp
99 end do
100 end do
101
102 do i = 1, nu
103 do j = 1, nv
104 do l = 1, nv
105 ii = l + nv * (j - 1) + nvnv * (i - 1)
106 tmp = 0.0_rp
107 do k = 1, nu
108 jj = l + nv * (k - 1) + nvnu * (i - 1)
109 tmp = tmp + work(jj) * bt(k,j)
110 end do
111 work2(ii) = tmp
112 end do
113 end do
114 end do
115
116 do j = 1, nv
117 do i = 1, nvnv
118 jj = i + nvnv * (j - 1)
119 tmp = 0.0_rp
120 do k = 1, nu
121 ii = i + nvnv * (k - 1)
122 tmp = tmp + work2(ii) * ct(k, j)
123 end do
124 v(jj) = tmp
125 end do
126 end do
127
128 end subroutine tnsr3d_el_nvnu_cpu
129
130 subroutine tnsr3d_el_1_nu_cpu(v, u, nu, A, Bt, Ct)
131 integer, intent(in) :: nu
132 real(kind=rp), intent(inout) :: v(1)
133 real(kind=rp), intent(in) :: u(nu*nu*nu)
134 real(kind=rp), intent(in) :: a(1, nu), bt(nu, 1), ct(nu, 1)
135 real(kind=rp) :: work(nu**2), work2(nu)
136 real(kind=rp) :: tmp
137 integer :: i, j, k, nunu
138 integer :: jj
139 nunu = nu * nu
140
141 do j = 1, nunu
142 tmp = 0.0_rp
143 do k = 1, nu
144 tmp = tmp + a(1,k) * u(k + nu * (j - 1))
145 end do
146 work(j) = tmp
147 end do
148
149 do i = 1, nu
150 tmp = 0.0_rp
151 do k = 1, nu
152 jj = k + nu * (i - 1)
153 tmp = tmp + work(jj) * bt(k,1)
154 end do
155 work2(i) = tmp
156 end do
157
158 tmp = 0.0_rp
159 do k = 1, nu
160 tmp = tmp + work2(k) * ct(k, 1)
161 end do
162 v(1) = tmp
163
164 end subroutine tnsr3d_el_1_nu_cpu
165
166 subroutine tnsr3d_el_1_4_cpu(v, u, A, Bt, Ct)
167 integer, parameter :: n = 4
168 integer, parameter :: nn = n**2
169 real(kind=rp), intent(inout) :: v(1)
170 real(kind=rp), intent(in) :: u(n*n*n)
171 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
172 real(kind=rp) :: work(n**2), work2(n)
173 integer :: i, j
174
175 do j = 1, nn
176 work(j) = a(1,1) * u(1 + n * (j - 1)) &
177 + a(1,2) * u(2 + n * (j - 1)) &
178 + a(1,3) * u(3 + n * (j - 1)) &
179 + a(1,4) * u(4 + n * (j - 1))
180 end do
181
182 do i = 1, n
183 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
184 + work(2 + n * (i - 1)) * bt(2,1) &
185 + work(3 + n * (i - 1)) * bt(3,1) &
186 + work(4 + n * (i - 1)) * bt(4,1)
187 end do
188
189 v(1) = work2(1) * ct(1, 1) &
190 + work2(2) * ct(2, 1) &
191 + work2(3) * ct(3, 1) &
192 + work2(4) * ct(4, 1)
193
194 end subroutine tnsr3d_el_1_4_cpu
195
196 subroutine tnsr3d_el_1_6_cpu(v, u, A, Bt, Ct)
197 integer, parameter :: n = 6
198 integer, parameter :: nn = n**2
199 real(kind=rp), intent(inout) :: v(1)
200 real(kind=rp), intent(in) :: u(n*n*n)
201 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
202 real(kind=rp) :: work(n**2), work2(n)
203 integer :: i, j
204
205 do j = 1, nn
206 work(j) = a(1,1) * u(1 + n * (j - 1)) &
207 + a(1,2) * u(2 + n * (j - 1)) &
208 + a(1,3) * u(3 + n * (j - 1)) &
209 + a(1,4) * u(4 + n * (j - 1)) &
210 + a(1,5) * u(5 + n * (j - 1)) &
211 + a(1,6) * u(6 + n * (j - 1))
212 end do
213
214 do i = 1, n
215 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
216 + work(2 + n * (i - 1)) * bt(2,1) &
217 + work(3 + n * (i - 1)) * bt(3,1) &
218 + work(4 + n * (i - 1)) * bt(4,1) &
219 + work(5 + n * (i - 1)) * bt(5,1) &
220 + work(6 + n * (i - 1)) * bt(6,1)
221 end do
222
223 v(1) = work2(1) * ct(1, 1) &
224 + work2(2) * ct(2, 1) &
225 + work2(3) * ct(3, 1) &
226 + work2(4) * ct(4, 1) &
227 + work2(5) * ct(5, 1) &
228 + work2(6) * ct(6, 1)
229
230 end subroutine tnsr3d_el_1_6_cpu
231
232 subroutine tnsr3d_el_1_8_cpu(v, u, A, Bt, Ct)
233 integer, parameter :: n = 8
234 integer, parameter :: nn = n**2
235 real(kind=rp), intent(inout) :: v(1)
236 real(kind=rp), intent(in) :: u(n*n*n)
237 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
238 real(kind=rp) :: work(n**2), work2(n)
239 integer :: i, j
240
241 do j = 1, nn
242 work(j) = a(1,1) * u(1 + n * (j - 1)) &
243 + a(1,2) * u(2 + n * (j - 1)) &
244 + a(1,3) * u(3 + n * (j - 1)) &
245 + a(1,4) * u(4 + n * (j - 1)) &
246 + a(1,5) * u(5 + n * (j - 1)) &
247 + a(1,6) * u(6 + n * (j - 1)) &
248 + a(1,7) * u(7 + n * (j - 1)) &
249 + a(1,8) * u(8 + n * (j - 1))
250 end do
251
252 do i = 1, n
253 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
254 + work(2 + n * (i - 1)) * bt(2,1) &
255 + work(3 + n * (i - 1)) * bt(3,1) &
256 + work(4 + n * (i - 1)) * bt(4,1) &
257 + work(5 + n * (i - 1)) * bt(5,1) &
258 + work(6 + n * (i - 1)) * bt(6,1) &
259 + work(7 + n * (i - 1)) * bt(7,1) &
260 + work(8 + n * (i - 1)) * bt(8,1)
261 end do
262
263 v(1) = work2(1) * ct(1, 1) &
264 + work2(2) * ct(2, 1) &
265 + work2(3) * ct(3, 1) &
266 + work2(4) * ct(4, 1) &
267 + work2(5) * ct(5, 1) &
268 + work2(6) * ct(6, 1) &
269 + work2(7) * ct(7, 1) &
270 + work2(8) * ct(8, 1)
271
272
273 end subroutine tnsr3d_el_1_8_cpu
274
275
276 subroutine tnsr3d_el_1_10_cpu(v, u, A, Bt, Ct)
277 integer, parameter :: n = 10
278 integer, parameter :: nn = n**2
279 real(kind=rp), intent(inout) :: v(1)
280 real(kind=rp), intent(in) :: u(n*n*n)
281 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
282 real(kind=rp) :: work(n**2), work2(n)
283 integer :: i, j
284
285 do j = 1, nn
286 work(j) = a(1,1) * u(1 + n * (j - 1)) &
287 + a(1,2) * u(2 + n * (j - 1)) &
288 + a(1,3) * u(3 + n * (j - 1)) &
289 + a(1,4) * u(4 + n * (j - 1)) &
290 + a(1,5) * u(5 + n * (j - 1)) &
291 + a(1,6) * u(6 + n * (j - 1)) &
292 + a(1,7) * u(7 + n * (j - 1)) &
293 + a(1,8) * u(8 + n * (j - 1)) &
294 + a(1,9) * u(9 + n * (j - 1)) &
295 + a(1,10) * u(10 + n * (j - 1))
296 end do
297
298 do i = 1, n
299 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
300 + work(2 + n * (i - 1)) * bt(2,1) &
301 + work(3 + n * (i - 1)) * bt(3,1) &
302 + work(4 + n * (i - 1)) * bt(4,1) &
303 + work(5 + n * (i - 1)) * bt(5,1) &
304 + work(6 + n * (i - 1)) * bt(6,1) &
305 + work(7 + n * (i - 1)) * bt(7,1) &
306 + work(8 + n * (i - 1)) * bt(8,1) &
307 + work(9 + n * (i - 1)) * bt(9,1) &
308 + work(10 + n * (i - 1)) * bt(10,1)
309 end do
310
311 v(1) = work2(1) * ct(1, 1) &
312 + work2(2) * ct(2, 1) &
313 + work2(3) * ct(3, 1) &
314 + work2(4) * ct(4, 1) &
315 + work2(5) * ct(5, 1) &
316 + work2(6) * ct(6, 1) &
317 + work2(7) * ct(7, 1) &
318 + work2(8) * ct(8, 1) &
319 + work2(9) * ct(9, 1) &
320 + work2(10) * ct(10, 1)
321
322 end subroutine tnsr3d_el_1_10_cpu
323
324
325 subroutine tnsr3d_el_1_12_cpu(v, u, A, Bt, Ct)
326 integer, parameter :: n = 12
327 integer, parameter :: nn = n**2
328 real(kind=rp), intent(inout) :: v(1)
329 real(kind=rp), intent(in) :: u(n*n*n)
330 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
331 real(kind=rp) :: work(n**2), work2(n)
332 integer :: i, j
333
334 do j = 1, nn
335 work(j) = a(1,1) * u(1 + n * (j - 1)) &
336 + a(1,2) * u(2 + n * (j - 1)) &
337 + a(1,3) * u(3 + n * (j - 1)) &
338 + a(1,4) * u(4 + n * (j - 1)) &
339 + a(1,5) * u(5 + n * (j - 1)) &
340 + a(1,6) * u(6 + n * (j - 1)) &
341 + a(1,7) * u(7 + n * (j - 1)) &
342 + a(1,8) * u(8 + n * (j - 1)) &
343 + a(1,9) * u(9 + n * (j - 1)) &
344 + a(1,10) * u(10 + n * (j - 1)) &
345 + a(1,11) * u(11 + n * (j - 1)) &
346 + a(1,12) * u(12 + n * (j - 1))
347 end do
348
349 do i = 1, n
350 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
351 + work(2 + n * (i - 1)) * bt(2,1) &
352 + work(3 + n * (i - 1)) * bt(3,1) &
353 + work(4 + n * (i - 1)) * bt(4,1) &
354 + work(5 + n * (i - 1)) * bt(5,1) &
355 + work(6 + n * (i - 1)) * bt(6,1) &
356 + work(7 + n * (i - 1)) * bt(7,1) &
357 + work(8 + n * (i - 1)) * bt(8,1) &
358 + work(9 + n * (i - 1)) * bt(9,1) &
359 + work(10 + n * (i - 1)) * bt(10,1) &
360 + work(11 + n * (i - 1)) * bt(11,1) &
361 + work(12 + n * (i - 1)) * bt(12,1)
362 end do
363
364 v(1) = work2(1) * ct(1, 1) &
365 + work2(2) * ct(2, 1) &
366 + work2(3) * ct(3, 1) &
367 + work2(4) * ct(4, 1) &
368 + work2(5) * ct(5, 1) &
369 + work2(6) * ct(6, 1) &
370 + work2(7) * ct(7, 1) &
371 + work2(8) * ct(8, 1) &
372 + work2(9) * ct(9, 1) &
373 + work2(10) * ct(10, 1) &
374 + work2(11) * ct(11, 1) &
375 + work2(12) * ct(12, 1)
376
377 end subroutine tnsr3d_el_1_12_cpu
378
379
380
381 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
382 integer, intent(in) :: n
383 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
384 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
385 real(kind=rp) :: work(n**3), work2(n**3), tmp
386 integer :: i, j, l, k
387 integer :: ii, jj, nn
388
389 nn = n**2
390
391 do j = 1, nn
392 do i = 1, n
393 ii = i + n * (j - 1)
394 tmp = 0.0_rp
395 do k = 1, n
396 tmp = tmp + a(i,k) * u(k + n * (j - 1))
397 end do
398 work(ii) = tmp
399 end do
400 end do
401
402 do i = 1, n
403 do j = 1, n
404 do l = 1, n
405 ii = l + n * (j - 1) + nn * (i - 1)
406 tmp = 0.0_rp
407 do k = 1, n
408 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
409 end do
410 work2(ii) = tmp
411 end do
412 end do
413 end do
414
415 do j = 1, n
416 do i = 1, nn
417 jj = i + nn * (j - 1)
418 tmp = 0.0_rp
419 do k = 1, n
420 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
421 end do
422 v(jj) = tmp
423 end do
424 end do
425
426 end subroutine tnsr3d_el_n_cpu
427
428 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
429 integer, parameter :: n = 14
430 integer, parameter :: nn = n**2
431 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
432 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
433 real(kind=rp) :: work(n**3), work2(n**3)
434 integer :: i, j, l
435 integer :: ii, jj
436
437 do j = 1, nn
438 do i = 1, n
439 ii = i + n * (j - 1)
440 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
441 + a(i,2) * u(2 + n * (j - 1)) &
442 + a(i,3) * u(3 + n * (j - 1)) &
443 + a(i,4) * u(4 + n * (j - 1)) &
444 + a(i,5) * u(5 + n * (j - 1)) &
445 + a(i,6) * u(6 + n * (j - 1)) &
446 + a(i,7) * u(7 + n * (j - 1)) &
447 + a(i,8) * u(8 + n * (j - 1)) &
448 + a(i,9) * u(9 + n * (j - 1)) &
449 + a(i,10) * u(10 + n * (j - 1)) &
450 + a(i,11) * u(11 + n * (j - 1)) &
451 + a(i,12) * u(12 + n * (j - 1)) &
452 + a(i,13) * u(13 + n * (j - 1)) &
453 + a(i,14) * u(14 + n * (j - 1))
454 end do
455 end do
456
457 do i = 1, n
458 do j = 1, n
459 do l = 1, n
460 ii = l + n * (j - 1) + nn * (i - 1)
461 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
462 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
463 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
464 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
465 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
466 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
467 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
468 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
469 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
470 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
471 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
472 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
473 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
474 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
475 end do
476 end do
477 end do
478
479 do j = 1, n
480 do i = 1, nn
481 jj = i + nn * (j - 1)
482 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
483 + work2(i + nn * (2 - 1)) * ct(2, j) &
484 + work2(i + nn * (3 - 1)) * ct(3, j) &
485 + work2(i + nn * (4 - 1)) * ct(4, j) &
486 + work2(i + nn * (5 - 1)) * ct(5, j) &
487 + work2(i + nn * (6 - 1)) * ct(6, j) &
488 + work2(i + nn * (7 - 1)) * ct(7, j) &
489 + work2(i + nn * (8 - 1)) * ct(8, j) &
490 + work2(i + nn * (9 - 1)) * ct(9, j) &
491 + work2(i + nn * (10 - 1)) * ct(10, j) &
492 + work2(i + nn * (11 - 1)) * ct(11, j) &
493 + work2(i + nn * (12 - 1)) * ct(12, j) &
494 + work2(i + nn * (13 - 1)) * ct(13, j) &
495 + work2(i + nn * (14 - 1)) * ct(14, j)
496 end do
497 end do
498
499 end subroutine tnsr3d_el_n14_cpu
500
501 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
502 integer, parameter :: n = 13
503 integer, parameter :: nn = n**2
504 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
505 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
506 real(kind=rp) :: work(n**3), work2(n**3)
507 integer :: i, j, l
508 integer :: ii, jj
509
510 do j = 1, nn
511 do i = 1, n
512 ii = i + n * (j - 1)
513 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
514 + a(i,2) * u(2 + n * (j - 1)) &
515 + a(i,3) * u(3 + n * (j - 1)) &
516 + a(i,4) * u(4 + n * (j - 1)) &
517 + a(i,5) * u(5 + n * (j - 1)) &
518 + a(i,6) * u(6 + n * (j - 1)) &
519 + a(i,7) * u(7 + n * (j - 1)) &
520 + a(i,8) * u(8 + n * (j - 1)) &
521 + a(i,9) * u(9 + n * (j - 1)) &
522 + a(i,10) * u(10 + n * (j - 1)) &
523 + a(i,11) * u(11 + n * (j - 1)) &
524 + a(i,12) * u(12 + n * (j - 1)) &
525 + a(i,13) * u(13 + n * (j - 1))
526 end do
527 end do
528
529 do i = 1, n
530 do j = 1, n
531 do l = 1, n
532 ii = l + n * (j - 1) + nn * (i - 1)
533 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
534 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
535 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
536 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
537 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
538 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
539 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
540 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
541 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
542 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
543 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
544 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
545 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
546 end do
547 end do
548 end do
549
550 do j = 1, n
551 do i = 1, nn
552 jj = i + nn * (j - 1)
553 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
554 + work2(i + nn * (2 - 1)) * ct(2, j) &
555 + work2(i + nn * (3 - 1)) * ct(3, j) &
556 + work2(i + nn * (4 - 1)) * ct(4, j) &
557 + work2(i + nn * (5 - 1)) * ct(5, j) &
558 + work2(i + nn * (6 - 1)) * ct(6, j) &
559 + work2(i + nn * (7 - 1)) * ct(7, j) &
560 + work2(i + nn * (8 - 1)) * ct(8, j) &
561 + work2(i + nn * (9 - 1)) * ct(9, j) &
562 + work2(i + nn * (10 - 1)) * ct(10, j) &
563 + work2(i + nn * (11 - 1)) * ct(11, j) &
564 + work2(i + nn * (12 - 1)) * ct(12, j) &
565 + work2(i + nn * (13 - 1)) * ct(13, j)
566 end do
567 end do
568
569 end subroutine tnsr3d_el_n13_cpu
570
571 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
572 integer, parameter :: n = 12
573 integer, parameter :: nn = n**2
574 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
575 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
576 real(kind=rp) :: work(n**3), work2(n**3)
577 integer :: i, j, l
578 integer :: ii, jj
579
580 do j = 1, nn
581 do i = 1, n
582 ii = i + n * (j - 1)
583 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
584 + a(i,2) * u(2 + n * (j - 1)) &
585 + a(i,3) * u(3 + n * (j - 1)) &
586 + a(i,4) * u(4 + n * (j - 1)) &
587 + a(i,5) * u(5 + n * (j - 1)) &
588 + a(i,6) * u(6 + n * (j - 1)) &
589 + a(i,7) * u(7 + n * (j - 1)) &
590 + a(i,8) * u(8 + n * (j - 1)) &
591 + a(i,9) * u(9 + n * (j - 1)) &
592 + a(i,10) * u(10 + n * (j - 1)) &
593 + a(i,11) * u(11 + n * (j - 1)) &
594 + a(i,12) * u(12 + n * (j - 1))
595 end do
596 end do
597
598 do i = 1, n
599 do j = 1, n
600 do l = 1, n
601 ii = l + n * (j - 1) + nn * (i - 1)
602 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
603 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
604 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
605 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
606 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
607 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
608 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
609 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
610 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
611 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
612 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
613 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
614 end do
615 end do
616 end do
617
618 do j = 1, n
619 do i = 1, nn
620 jj = i + nn * (j - 1)
621 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
622 + work2(i + nn * (2 - 1)) * ct(2, j) &
623 + work2(i + nn * (3 - 1)) * ct(3, j) &
624 + work2(i + nn * (4 - 1)) * ct(4, j) &
625 + work2(i + nn * (5 - 1)) * ct(5, j) &
626 + work2(i + nn * (6 - 1)) * ct(6, j) &
627 + work2(i + nn * (7 - 1)) * ct(7, j) &
628 + work2(i + nn * (8 - 1)) * ct(8, j) &
629 + work2(i + nn * (9 - 1)) * ct(9, j) &
630 + work2(i + nn * (10 - 1)) * ct(10, j) &
631 + work2(i + nn * (11 - 1)) * ct(11, j) &
632 + work2(i + nn * (12 - 1)) * ct(12, j)
633 end do
634 end do
635
636 end subroutine tnsr3d_el_n12_cpu
637
638 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
639 integer, parameter :: n = 11
640 integer, parameter :: nn = n**2
641 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
642 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
643 real(kind=rp) :: work(n**3), work2(n**3)
644 integer :: i, j, l
645 integer :: ii, jj
646
647 do j = 1, nn
648 do i = 1, n
649 ii = i + n * (j - 1)
650 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
651 + a(i,2) * u(2 + n * (j - 1)) &
652 + a(i,3) * u(3 + n * (j - 1)) &
653 + a(i,4) * u(4 + n * (j - 1)) &
654 + a(i,5) * u(5 + n * (j - 1)) &
655 + a(i,6) * u(6 + n * (j - 1)) &
656 + a(i,7) * u(7 + n * (j - 1)) &
657 + a(i,8) * u(8 + n * (j - 1)) &
658 + a(i,9) * u(9 + n * (j - 1)) &
659 + a(i,10) * u(10 + n * (j - 1)) &
660 + a(i,11) * u(11 + n * (j - 1))
661 end do
662 end do
663
664 do i = 1, n
665 do j = 1, n
666 do l = 1, n
667 ii = l + n * (j - 1) + nn * (i - 1)
668 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
669 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
670 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
671 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
672 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
673 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
674 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
675 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
676 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
677 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
678 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
679 end do
680 end do
681 end do
682
683 do j = 1, n
684 do i = 1, nn
685 jj = i + nn * (j - 1)
686 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
687 + work2(i + nn * (2 - 1)) * ct(2, j) &
688 + work2(i + nn * (3 - 1)) * ct(3, j) &
689 + work2(i + nn * (4 - 1)) * ct(4, j) &
690 + work2(i + nn * (5 - 1)) * ct(5, j) &
691 + work2(i + nn * (6 - 1)) * ct(6, j) &
692 + work2(i + nn * (7 - 1)) * ct(7, j) &
693 + work2(i + nn * (8 - 1)) * ct(8, j) &
694 + work2(i + nn * (9 - 1)) * ct(9, j) &
695 + work2(i + nn * (10 - 1)) * ct(10, j) &
696 + work2(i + nn * (11 - 1)) * ct(11, j)
697 end do
698 end do
699
700 end subroutine tnsr3d_el_n11_cpu
701
702 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
703 integer, parameter :: n = 10
704 integer, parameter :: nn = n**2
705 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
706 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
707 real(kind=rp) :: work(n**3), work2(n**3)
708 integer :: i, j, l
709 integer :: ii, jj
710
711 do j = 1, nn
712 do i = 1, n
713 ii = i + n * (j - 1)
714 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
715 + a(i,2) * u(2 + n * (j - 1)) &
716 + a(i,3) * u(3 + n * (j - 1)) &
717 + a(i,4) * u(4 + n * (j - 1)) &
718 + a(i,5) * u(5 + n * (j - 1)) &
719 + a(i,6) * u(6 + n * (j - 1)) &
720 + a(i,7) * u(7 + n * (j - 1)) &
721 + a(i,8) * u(8 + n * (j - 1)) &
722 + a(i,9) * u(9 + n * (j - 1)) &
723 + a(i,10) * u(10 + n * (j - 1))
724 end do
725 end do
726
727 do i = 1, n
728 do j = 1, n
729 do l = 1, n
730 ii = l + n * (j - 1) + nn * (i - 1)
731 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
732 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
733 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
734 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
735 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
736 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
737 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
738 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
739 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
740 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
741 end do
742 end do
743 end do
744
745 do j = 1, n
746 do i = 1, nn
747 jj = i + nn * (j - 1)
748 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
749 + work2(i + nn * (2 - 1)) * ct(2, j) &
750 + work2(i + nn * (3 - 1)) * ct(3, j) &
751 + work2(i + nn * (4 - 1)) * ct(4, j) &
752 + work2(i + nn * (5 - 1)) * ct(5, j) &
753 + work2(i + nn * (6 - 1)) * ct(6, j) &
754 + work2(i + nn * (7 - 1)) * ct(7, j) &
755 + work2(i + nn * (8 - 1)) * ct(8, j) &
756 + work2(i + nn * (9 - 1)) * ct(9, j) &
757 + work2(i + nn * (10 - 1)) * ct(10, j)
758 end do
759 end do
760
761 end subroutine tnsr3d_el_n10_cpu
762
763 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
764 integer, parameter :: n = 9
765 integer, parameter :: nn = n**2
766 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
767 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
768 real(kind=rp) :: work(n**3), work2(n**3)
769 integer :: i, j, l
770 integer :: ii, jj
771
772 do j = 1, nn
773 do i = 1, n
774 ii = i + n * (j - 1)
775 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
776 + a(i,2) * u(2 + n * (j - 1)) &
777 + a(i,3) * u(3 + n * (j - 1)) &
778 + a(i,4) * u(4 + n * (j - 1)) &
779 + a(i,5) * u(5 + n * (j - 1)) &
780 + a(i,6) * u(6 + n * (j - 1)) &
781 + a(i,7) * u(7 + n * (j - 1)) &
782 + a(i,8) * u(8 + n * (j - 1)) &
783 + a(i,9) * u(9 + n * (j - 1))
784 end do
785 end do
786
787 do i = 1, n
788 do j = 1, n
789 do l = 1, n
790 ii = l + n * (j - 1) + nn * (i - 1)
791 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
792 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
793 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
794 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
795 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
796 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
797 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
798 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
799 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
800 end do
801 end do
802 end do
803
804 do j = 1, n
805 do i = 1, nn
806 jj = i + nn * (j - 1)
807 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
808 + work2(i + nn * (2 - 1)) * ct(2, j) &
809 + work2(i + nn * (3 - 1)) * ct(3, j) &
810 + work2(i + nn * (4 - 1)) * ct(4, j) &
811 + work2(i + nn * (5 - 1)) * ct(5, j) &
812 + work2(i + nn * (6 - 1)) * ct(6, j) &
813 + work2(i + nn * (7 - 1)) * ct(7, j) &
814 + work2(i + nn * (8 - 1)) * ct(8, j) &
815 + work2(i + nn * (9 - 1)) * ct(9, j)
816 end do
817 end do
818
819 end subroutine tnsr3d_el_n9_cpu
820
821 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
822 integer, parameter :: n = 8
823 integer, parameter :: nn = n**2
824 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
825 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
826 real(kind=rp) :: work(n**3), work2(n**3)
827 integer :: i, j, l
828 integer :: ii, jj
829
830 do j = 1, nn
831 do i = 1, n
832 ii = i + n * (j - 1)
833 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
834 + a(i,2) * u(2 + n * (j - 1)) &
835 + a(i,3) * u(3 + n * (j - 1)) &
836 + a(i,4) * u(4 + n * (j - 1)) &
837 + a(i,5) * u(5 + n * (j - 1)) &
838 + a(i,6) * u(6 + n * (j - 1)) &
839 + a(i,7) * u(7 + n * (j - 1)) &
840 + a(i,8) * u(8 + n * (j - 1))
841 end do
842 end do
843
844 do i = 1, n
845 do j = 1, n
846 do l = 1, n
847 ii = l + n * (j - 1) + nn * (i - 1)
848 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
849 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
850 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
851 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
852 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
853 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
854 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
855 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
856 end do
857 end do
858 end do
859
860 do j = 1, n
861 do i = 1, nn
862 jj = i + nn * (j - 1)
863 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
864 + work2(i + nn * (2 - 1)) * ct(2, j) &
865 + work2(i + nn * (3 - 1)) * ct(3, j) &
866 + work2(i + nn * (4 - 1)) * ct(4, j) &
867 + work2(i + nn * (5 - 1)) * ct(5, j) &
868 + work2(i + nn * (6 - 1)) * ct(6, j) &
869 + work2(i + nn * (7 - 1)) * ct(7, j) &
870 + work2(i + nn * (8 - 1)) * ct(8, j)
871 end do
872 end do
873
874 end subroutine tnsr3d_el_n8_cpu
875
876 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
877 integer, parameter :: n = 7
878 integer, parameter :: nn = n**2
879 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
880 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
881 real(kind=rp) :: work(n**3), work2(n**3)
882 integer :: i, j, l
883 integer :: ii, jj
884
885 do j = 1, nn
886 do i = 1, n
887 ii = i + n * (j - 1)
888 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
889 + a(i,2) * u(2 + n * (j - 1)) &
890 + a(i,3) * u(3 + n * (j - 1)) &
891 + a(i,4) * u(4 + n * (j - 1)) &
892 + a(i,5) * u(5 + n * (j - 1)) &
893 + a(i,6) * u(6 + n * (j - 1)) &
894 + a(i,7) * u(7 + n * (j - 1))
895 end do
896 end do
897
898 do i = 1, n
899 do j = 1, n
900 do l = 1, n
901 ii = l + n * (j - 1) + nn * (i - 1)
902 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
903 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
904 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
905 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
906 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
907 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
908 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
909 end do
910 end do
911 end do
912
913 do j = 1, n
914 do i = 1, nn
915 jj = i + nn * (j - 1)
916 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
917 + work2(i + nn * (2 - 1)) * ct(2, j) &
918 + work2(i + nn * (3 - 1)) * ct(3, j) &
919 + work2(i + nn * (4 - 1)) * ct(4, j) &
920 + work2(i + nn * (5 - 1)) * ct(5, j) &
921 + work2(i + nn * (6 - 1)) * ct(6, j) &
922 + work2(i + nn * (7 - 1)) * ct(7, j)
923 end do
924 end do
925
926 end subroutine tnsr3d_el_n7_cpu
927
928 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
929 integer, parameter :: n = 6
930 integer, parameter :: nn = n**2
931 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
932 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
933 real(kind=rp) :: work(n**3), work2(n**3)
934 integer :: i, j, l
935 integer :: ii, jj
936
937 do j = 1, nn
938 do i = 1, n
939 ii = i + n * (j - 1)
940 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
941 + a(i,2) * u(2 + n * (j - 1)) &
942 + a(i,3) * u(3 + n * (j - 1)) &
943 + a(i,4) * u(4 + n * (j - 1)) &
944 + a(i,5) * u(5 + n * (j - 1)) &
945 + a(i,6) * u(6 + n * (j - 1))
946 end do
947 end do
948
949 do i = 1, n
950 do j = 1, n
951 do l = 1, n
952 ii = l + n * (j - 1) + nn * (i - 1)
953 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
954 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
955 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
956 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
957 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
958 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
959 end do
960 end do
961 end do
962
963 do j = 1, n
964 do i = 1, nn
965 jj = i + nn * (j - 1)
966 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
967 + work2(i + nn * (2 - 1)) * ct(2, j) &
968 + work2(i + nn * (3 - 1)) * ct(3, j) &
969 + work2(i + nn * (4 - 1)) * ct(4, j) &
970 + work2(i + nn * (5 - 1)) * ct(5, j) &
971 + work2(i + nn * (6 - 1)) * ct(6, j)
972 end do
973 end do
974
975 end subroutine tnsr3d_el_n6_cpu
976
977 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
978 integer, parameter :: n = 5
979 integer, parameter :: nn = n**2
980 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
981 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
982 real(kind=rp) :: work(n**3), work2(n**3)
983 integer :: i, j, l
984 integer :: ii, jj
985
986 do j = 1, nn
987 do i = 1, n
988 ii = i + n * (j - 1)
989 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
990 + a(i,2) * u(2 + n * (j - 1)) &
991 + a(i,3) * u(3 + n * (j - 1)) &
992 + a(i,4) * u(4 + n * (j - 1)) &
993 + a(i,5) * u(5 + n * (j - 1))
994 end do
995 end do
996
997 do i = 1, n
998 do j = 1, n
999 do l = 1, n
1000 ii = l + n * (j - 1) + nn * (i - 1)
1001 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1002 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1003 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1004 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
1005 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
1006 end do
1007 end do
1008 end do
1009
1010 do j = 1, n
1011 do i = 1, nn
1012 jj = i + nn * (j - 1)
1013 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1014 + work2(i + nn * (2 - 1)) * ct(2, j) &
1015 + work2(i + nn * (3 - 1)) * ct(3, j) &
1016 + work2(i + nn * (4 - 1)) * ct(4, j) &
1017 + work2(i + nn * (5 - 1)) * ct(5, j)
1018 end do
1019 end do
1020
1021 end subroutine tnsr3d_el_n5_cpu
1022
1023 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
1024 integer, parameter :: n = 4
1025 integer, parameter :: nn = n**2
1026 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1027 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1028 real(kind=rp) :: work(n**3), work2(n**3)
1029 integer :: i, j, l
1030 integer :: ii, jj
1031
1032 do j = 1, nn
1033 do i = 1, n
1034 ii = i + n * (j - 1)
1035 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1036 + a(i,2) * u(2 + n * (j - 1)) &
1037 + a(i,3) * u(3 + n * (j - 1)) &
1038 + a(i,4) * u(4 + n * (j - 1))
1039 end do
1040 end do
1041
1042 do i = 1, n
1043 do j = 1, n
1044 do l = 1, n
1045 ii = l + n * (j - 1) + nn * (i - 1)
1046 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1047 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1048 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1049 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
1050 end do
1051 end do
1052 end do
1053
1054 do j = 1, n
1055 do i = 1, nn
1056 jj = i + nn * (j - 1)
1057 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1058 + work2(i + nn * (2 - 1)) * ct(2, j) &
1059 + work2(i + nn * (3 - 1)) * ct(3, j) &
1060 + work2(i + nn * (4 - 1)) * ct(4, j)
1061 end do
1062 end do
1063
1064 end subroutine tnsr3d_el_n4_cpu
1065
1066 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
1067 integer, parameter :: n = 3
1068 integer, parameter :: nn = n**2
1069 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1070 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1071 real(kind=rp) :: work(n**3), work2(n**3)
1072 integer :: i, j, l
1073 integer :: ii, jj
1074
1075 do j = 1, nn
1076 do i = 1, n
1077 ii = i + n * (j - 1)
1078 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1079 + a(i,2) * u(2 + n * (j - 1)) &
1080 + a(i,3) * u(3 + n * (j - 1))
1081 end do
1082 end do
1083
1084 do i = 1, n
1085 do j = 1, n
1086 do l = 1, n
1087 ii = l + n * (j - 1) + nn * (i - 1)
1088 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1089 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1090 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
1091 end do
1092 end do
1093 end do
1094
1095 do j = 1, n
1096 do i = 1, nn
1097 jj = i + nn * (j - 1)
1098 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1099 + work2(i + nn * (2 - 1)) * ct(2, j) &
1100 + work2(i + nn * (3 - 1)) * ct(3, j)
1101 end do
1102 end do
1103
1104 end subroutine tnsr3d_el_n3_cpu
1105
1106 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
1107 integer, parameter :: n = 2
1108 integer, parameter :: nn = n**2
1109 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1110 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1111 real(kind=rp) :: work(n**3), work2(n**3)
1112 integer :: i, j, l
1113 integer :: ii, jj
1114
1115 do j = 1, nn
1116 do i = 1, n
1117 ii = i + n * (j - 1)
1118 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1119 + a(i,2) * u(2 + n * (j - 1))
1120 end do
1121 end do
1122
1123 do i = 1, n
1124 do j = 1, n
1125 do l = 1, n
1126 ii = l + n * (j - 1) + nn * (i - 1)
1127 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1128 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
1129 end do
1130 end do
1131 end do
1132
1133 do j = 1, n
1134 do i = 1, nn
1135 jj = i + nn * (j - 1)
1136 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1137 + work2(i + nn * (2 - 1)) * ct(2, j)
1138 end do
1139 end do
1140
1141 end subroutine tnsr3d_el_n2_cpu
1142
1143 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1144 integer, intent(in) :: nv, nu, nelv
1145 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1146 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1147 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1148
1149 if (nu .eq. 2 .and. nv .eq. 4) then
1150 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
1151 else if (nu .eq. 4) then
1152 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
1153 else
1154 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
1155 end if
1156
1157 end subroutine tnsr3d_cpu
1158
1159 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1160 integer, intent(in) :: nv, nu, nelv
1161 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1162 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1163 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1164 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1165 integer :: ie, i, j, k, l, ii, jj
1166 integer :: nunu, nvnu, nvnv
1167
1168 nvnu = nv * nu
1169 nunu = nu * nu
1170 nvnv = nv * nv
1171
1172 do ie = 1, nelv
1173 do j = 1, nunu
1174 do i = 1, nv
1175 ii = i + nv * (j - 1)
1176 tmp = 0.0_rp
1177 do k = 1, nu
1178 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
1179 end do
1180 work(ii) = tmp
1181 end do
1182 end do
1183
1184 do i = 1, nu
1185 do j = 1, nv
1186 do l = 1, nv
1187 ii = l + nv * (j - 1) + nvnv * (i - 1)
1188 tmp = 0.0_rp
1189 do k = 1, nu
1190 jj = l + nv * (k - 1) + nvnu * (i - 1)
1191 tmp = tmp + work(jj) * bt(k,j)
1192 end do
1193 work2(ii) = tmp
1194 end do
1195 end do
1196 end do
1197
1198 do j = 1, nv
1199 do i = 1, nvnv
1200 jj = i + nvnv * (j - 1)
1201 tmp = 0.0_rp
1202 do k = 1, nu
1203 ii = i + nvnv * (k - 1)
1204 tmp = tmp + work2(ii) * ct(k, j)
1205 end do
1206 v(jj, ie) = tmp
1207 end do
1208 end do
1209 end do
1210
1211 end subroutine tnsr3d_nvnu_cpu
1212
1213 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
1214 integer, parameter :: nu = 2
1215 integer, parameter :: nv = 4
1216 integer, parameter :: nunu = 4
1217 integer, parameter :: nvnu = 8
1218 integer, parameter :: nvnv = 16
1219 integer, intent(in) :: nelv
1220 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1221 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1222 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1223 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1224 integer :: ie, i, j, k, l, ii, jj
1225
1226 do ie = 1, nelv
1227 do j = 1, nunu
1228 do i = 1, nv
1229 ii = i + nv * (j - 1)
1230 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1231 + a(i,2) * u(2 + nu * (j - 1), ie)
1232 end do
1233 end do
1234
1235 do i = 1, nu
1236 do j = 1, nv
1237 do l = 1, nv
1238 ii = l + nv * (j - 1) + nvnv * (i - 1)
1239 tmp = 0.0_rp
1240 do k = 1, nu
1241 jj = l + nv * (k - 1) + nvnu * (i - 1)
1242 tmp = tmp + work(jj) * bt(k,j)
1243 end do
1244 work2(ii) = tmp
1245 end do
1246 end do
1247 end do
1248
1249 do j = 1, nv
1250 do i = 1, nvnv
1251 jj = i + nvnv * (j - 1)
1252 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1253 + work2(i + nvnv * (2 - 1)) * ct(2, j)
1254 end do
1255 end do
1256 end do
1257
1258 end subroutine tnsr3d_nu2nv4_cpu
1259
1260 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
1261 integer, parameter :: nu = 4
1262 integer, parameter :: nunu = 16
1263 integer, intent(in) :: nv, nelv
1264 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1265 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1266 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1267 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1268 integer :: ie, i, j, k, l, ii, jj
1269 integer :: nvnu, nvnv
1270
1271 nvnu = nv * nu
1272 nvnv = nv * nv
1273
1274 do ie = 1, nelv
1275 do j = 1, nunu
1276 do i = 1, nv
1277 ii = i + nv * (j - 1)
1278 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1279 + a(i,2) * u(2 + nu * (j - 1), ie) &
1280 + a(i,3) * u(3 + nu * (j - 1), ie) &
1281 + a(i,4) * u(4 + nu * (j - 1), ie)
1282 end do
1283 end do
1284
1285 do i = 1, nu
1286 do j = 1, nv
1287 do l = 1, nv
1288 ii = l + nv * (j - 1) + nvnv * (i - 1)
1289 tmp = 0.0_rp
1290 do k = 1, nu
1291 jj = l + nv * (k - 1) + nvnu * (i - 1)
1292 tmp = tmp + work(jj) * bt(k,j)
1293 end do
1294 work2(ii) = tmp
1295 end do
1296 end do
1297 end do
1298
1299 do j = 1, nv
1300 do i = 1, nvnv
1301 jj = i + nvnv * (j - 1)
1302 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1303 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1304 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1305 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1306 end do
1307 end do
1308 end do
1309
1310 end subroutine tnsr3d_nu4_cpu
1311
1312 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1313 integer, intent(in) :: nv, nu, nelv
1314 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1315 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1316
1317 if (nu .eq. 4 .and. nv .eq. 2) then
1318 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1319 else
1320 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1321 end if
1322
1323 end subroutine tnsr1_3d_cpu
1324
1325 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1326 integer, intent(in) :: nv, nu, nelv
1327 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1328 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1329 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1330 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1331 integer :: i, j, k, l, ii, jj, kk
1332 integer :: nunu, nvnu, nvnv
1333 real(kind=rp) :: tmp
1334
1335 nvnu = nv * nu
1336 nunu = nu * nu
1337 nvnv = nv * nv
1338
1339 e0 = 1
1340 es = 1
1341 ee = nelv
1342
1343 if (nv .gt. nu) then
1344 e0 = nelv
1345 es = -1
1346 ee = 1
1347 end if
1348
1349 nu3 = nu**3
1350 nv3 = nv**3
1351
1352 do e = e0, ee, es
1353 iu = (e-1)*nu3
1354 iv = (e-1)*nv3
1355
1356 do j = 1, nunu
1357 do i = 1, nv
1358 ii = i + nv * (j - 1)
1359 tmp = 0.0_rp
1360 do k = 1, nu
1361 kk = k + nu * (j - 1) + iu
1362 tmp = tmp + a(i,k) * v(kk)
1363 end do
1364 work(ii) = tmp
1365 end do
1366 end do
1367
1368 do i = 1, nu
1369 do j = 1, nv
1370 do l = 1, nv
1371 ii = l + nv * (j - 1) + nvnv * (i - 1)
1372 tmp = 0.0_rp
1373 do k = 1, nu
1374 jj = l + nv * (k - 1) + nvnu * (i - 1)
1375 tmp = tmp + work(jj) * bt(k,j)
1376 end do
1377 work2(ii) = tmp
1378 end do
1379 end do
1380 end do
1381
1382 do j = 1, nv
1383 do i = 1, nvnv
1384 jj = i + nvnv * (j - 1) + iv
1385 tmp = 0.0_rp
1386 do k = 1, nu
1387 ii = i + nvnv * (k - 1)
1388 tmp = tmp + work2(ii) * ct(k, j)
1389 end do
1390 v(jj) = tmp
1391 end do
1392 end do
1393 end do
1394
1395 end subroutine tnsr1_3d_nvnu_cpu
1396
1397 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1398 integer, parameter :: nu = 4
1399 integer, parameter :: nv = 2
1400 integer, parameter :: nunu = 16
1401 integer, parameter :: nvnu = 8
1402 integer, parameter :: nvnv = 4
1403 integer, parameter :: nununu = 64
1404 integer, parameter :: nvnvnv = 8
1405 integer, intent(in) :: nelv
1406 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1407 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1408 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1409 integer :: e, iu, iv
1410 integer :: i, j, k, l, ii, jj
1411 real(kind=rp) :: tmp
1412
1413 do e = 1, nelv
1414 iu = (e-1)*nununu
1415 iv = (e-1)*nvnvnv
1416
1417 do j = 1, nunu
1418 do i = 1, nv
1419 ii = i + nv * (j - 1)
1420 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1421 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1422 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1423 + a(i,4) * v(4 + nu * (j - 1) + iu)
1424 end do
1425 end do
1426
1427 do i = 1, nu
1428 do j = 1, nv
1429 do l = 1, nv
1430 ii = l + nv * (j - 1) + nvnv * (i - 1)
1431 tmp = 0.0_rp
1432 do k = 1, nu
1433 jj = l + nv * (k - 1) + nvnu * (i - 1)
1434 tmp = tmp + work(jj) * bt(k,j)
1435 end do
1436 work2(ii) = tmp
1437 end do
1438 end do
1439 end do
1440
1441 do j = 1, nv
1442 do i = 1, nvnv
1443 jj = i + nvnv * (j - 1) + iv
1444 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1445 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1446 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1447 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1448
1449 end do
1450 end do
1451 end do
1452
1453 end subroutine tnsr1_3d_nu4nv2_cpu
1454
1455end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)