Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else
59 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
60 end if
61
62 end subroutine tnsr3d_el_cpu
63
64 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
65 integer, intent(in) :: nv, nu
66 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
67 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
68 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
69 real(kind=rp) :: tmp
70 integer :: i, j, k, l, nunu, nvnu, nvnv
71 integer :: ii, jj
72 nvnu = nv * nu
73 nunu = nu * nu
74 nvnv = nv * nv
75
76 do j = 1, nunu
77 do i = 1, nv
78 ii = i + nv * (j - 1)
79 tmp = 0.0_rp
80 do k = 1, nu
81 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
82 end do
83 work(ii) = tmp
84 end do
85 end do
86
87 do i = 1, nu
88 do j = 1, nv
89 do l = 1, nv
90 ii = l + nv * (j - 1) + nvnv * (i - 1)
91 tmp = 0.0_rp
92 do k = 1, nu
93 jj = l + nv * (k - 1) + nvnu * (i - 1)
94 tmp = tmp + work(jj) * bt(k,j)
95 end do
96 work2(ii) = tmp
97 end do
98 end do
99 end do
100
101 do j = 1, nv
102 do i = 1, nvnv
103 jj = i + nvnv * (j - 1)
104 tmp = 0.0_rp
105 do k = 1, nu
106 ii = i + nvnv * (k - 1)
107 tmp = tmp + work2(ii) * ct(k, j)
108 end do
109 v(jj) = tmp
110 end do
111 end do
112
113 end subroutine tnsr3d_el_nvnu_cpu
114
115 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
116 integer, intent(in) :: n
117 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
118 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
119 real(kind=rp) :: work(n**3), work2(n**3), tmp
120 integer :: i, j, l, k
121 integer :: ii, jj, nn
122
123 nn = n**2
124
125 do j = 1, nn
126 do i = 1, n
127 ii = i + n * (j - 1)
128 tmp = 0.0_rp
129 do k = 1, n
130 tmp = tmp + a(i,k) * u(k + n * (j - 1))
131 end do
132 work(ii) = tmp
133 end do
134 end do
135
136 do i = 1, n
137 do j = 1, n
138 do l = 1, n
139 ii = l + n * (j - 1) + nn * (i - 1)
140 tmp = 0.0_rp
141 do k = 1, n
142 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
143 end do
144 work2(ii) = tmp
145 end do
146 end do
147 end do
148
149 do j = 1, n
150 do i = 1, nn
151 jj = i + nn * (j - 1)
152 tmp = 0.0_rp
153 do k = 1, n
154 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
155 end do
156 v(jj) = tmp
157 end do
158 end do
159
160 end subroutine tnsr3d_el_n_cpu
161
162 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
163 integer, parameter :: n = 14
164 integer, parameter :: nn = n**2
165 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
166 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
167 real(kind=rp) :: work(n**3), work2(n**3)
168 integer :: i, j, l
169 integer :: ii, jj
170
171 do j = 1, nn
172 do i = 1, n
173 ii = i + n * (j - 1)
174 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
175 + a(i,2) * u(2 + n * (j - 1)) &
176 + a(i,3) * u(3 + n * (j - 1)) &
177 + a(i,4) * u(4 + n * (j - 1)) &
178 + a(i,5) * u(5 + n * (j - 1)) &
179 + a(i,6) * u(6 + n * (j - 1)) &
180 + a(i,7) * u(7 + n * (j - 1)) &
181 + a(i,8) * u(8 + n * (j - 1)) &
182 + a(i,9) * u(9 + n * (j - 1)) &
183 + a(i,10) * u(10 + n * (j - 1)) &
184 + a(i,11) * u(11 + n * (j - 1)) &
185 + a(i,12) * u(12 + n * (j - 1)) &
186 + a(i,13) * u(13 + n * (j - 1)) &
187 + a(i,14) * u(14 + n * (j - 1))
188 end do
189 end do
190
191 do i = 1, n
192 do j = 1, n
193 do l = 1, n
194 ii = l + n * (j - 1) + nn * (i - 1)
195 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
196 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
197 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
198 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
199 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
200 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
201 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
202 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
203 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
204 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
205 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
206 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
207 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
208 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
209 end do
210 end do
211 end do
212
213 do j = 1, n
214 do i = 1, nn
215 jj = i + nn * (j - 1)
216 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
217 + work2(i + nn * (2 - 1)) * ct(2, j) &
218 + work2(i + nn * (3 - 1)) * ct(3, j) &
219 + work2(i + nn * (4 - 1)) * ct(4, j) &
220 + work2(i + nn * (5 - 1)) * ct(5, j) &
221 + work2(i + nn * (6 - 1)) * ct(6, j) &
222 + work2(i + nn * (7 - 1)) * ct(7, j) &
223 + work2(i + nn * (8 - 1)) * ct(8, j) &
224 + work2(i + nn * (9 - 1)) * ct(9, j) &
225 + work2(i + nn * (10 - 1)) * ct(10, j) &
226 + work2(i + nn * (11 - 1)) * ct(11, j) &
227 + work2(i + nn * (12 - 1)) * ct(12, j) &
228 + work2(i + nn * (13 - 1)) * ct(13, j) &
229 + work2(i + nn * (14 - 1)) * ct(14, j)
230 end do
231 end do
232
233 end subroutine tnsr3d_el_n14_cpu
234
235 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
236 integer, parameter :: n = 13
237 integer, parameter :: nn = n**2
238 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
239 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
240 real(kind=rp) :: work(n**3), work2(n**3)
241 integer :: i, j, l
242 integer :: ii, jj
243
244 do j = 1, nn
245 do i = 1, n
246 ii = i + n * (j - 1)
247 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
248 + a(i,2) * u(2 + n * (j - 1)) &
249 + a(i,3) * u(3 + n * (j - 1)) &
250 + a(i,4) * u(4 + n * (j - 1)) &
251 + a(i,5) * u(5 + n * (j - 1)) &
252 + a(i,6) * u(6 + n * (j - 1)) &
253 + a(i,7) * u(7 + n * (j - 1)) &
254 + a(i,8) * u(8 + n * (j - 1)) &
255 + a(i,9) * u(9 + n * (j - 1)) &
256 + a(i,10) * u(10 + n * (j - 1)) &
257 + a(i,11) * u(11 + n * (j - 1)) &
258 + a(i,12) * u(12 + n * (j - 1)) &
259 + a(i,13) * u(13 + n * (j - 1))
260 end do
261 end do
262
263 do i = 1, n
264 do j = 1, n
265 do l = 1, n
266 ii = l + n * (j - 1) + nn * (i - 1)
267 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
268 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
269 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
270 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
271 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
272 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
273 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
274 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
275 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
276 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
277 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
278 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
279 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
280 end do
281 end do
282 end do
283
284 do j = 1, n
285 do i = 1, nn
286 jj = i + nn * (j - 1)
287 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
288 + work2(i + nn * (2 - 1)) * ct(2, j) &
289 + work2(i + nn * (3 - 1)) * ct(3, j) &
290 + work2(i + nn * (4 - 1)) * ct(4, j) &
291 + work2(i + nn * (5 - 1)) * ct(5, j) &
292 + work2(i + nn * (6 - 1)) * ct(6, j) &
293 + work2(i + nn * (7 - 1)) * ct(7, j) &
294 + work2(i + nn * (8 - 1)) * ct(8, j) &
295 + work2(i + nn * (9 - 1)) * ct(9, j) &
296 + work2(i + nn * (10 - 1)) * ct(10, j) &
297 + work2(i + nn * (11 - 1)) * ct(11, j) &
298 + work2(i + nn * (12 - 1)) * ct(12, j) &
299 + work2(i + nn * (13 - 1)) * ct(13, j)
300 end do
301 end do
302
303 end subroutine tnsr3d_el_n13_cpu
304
305 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
306 integer, parameter :: n = 12
307 integer, parameter :: nn = n**2
308 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
309 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
310 real(kind=rp) :: work(n**3), work2(n**3)
311 integer :: i, j, l
312 integer :: ii, jj
313
314 do j = 1, nn
315 do i = 1, n
316 ii = i + n * (j - 1)
317 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
318 + a(i,2) * u(2 + n * (j - 1)) &
319 + a(i,3) * u(3 + n * (j - 1)) &
320 + a(i,4) * u(4 + n * (j - 1)) &
321 + a(i,5) * u(5 + n * (j - 1)) &
322 + a(i,6) * u(6 + n * (j - 1)) &
323 + a(i,7) * u(7 + n * (j - 1)) &
324 + a(i,8) * u(8 + n * (j - 1)) &
325 + a(i,9) * u(9 + n * (j - 1)) &
326 + a(i,10) * u(10 + n * (j - 1)) &
327 + a(i,11) * u(11 + n * (j - 1)) &
328 + a(i,12) * u(12 + n * (j - 1))
329 end do
330 end do
331
332 do i = 1, n
333 do j = 1, n
334 do l = 1, n
335 ii = l + n * (j - 1) + nn * (i - 1)
336 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
337 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
338 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
339 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
340 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
341 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
342 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
343 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
344 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
345 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
346 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
347 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
348 end do
349 end do
350 end do
351
352 do j = 1, n
353 do i = 1, nn
354 jj = i + nn * (j - 1)
355 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
356 + work2(i + nn * (2 - 1)) * ct(2, j) &
357 + work2(i + nn * (3 - 1)) * ct(3, j) &
358 + work2(i + nn * (4 - 1)) * ct(4, j) &
359 + work2(i + nn * (5 - 1)) * ct(5, j) &
360 + work2(i + nn * (6 - 1)) * ct(6, j) &
361 + work2(i + nn * (7 - 1)) * ct(7, j) &
362 + work2(i + nn * (8 - 1)) * ct(8, j) &
363 + work2(i + nn * (9 - 1)) * ct(9, j) &
364 + work2(i + nn * (10 - 1)) * ct(10, j) &
365 + work2(i + nn * (11 - 1)) * ct(11, j) &
366 + work2(i + nn * (12 - 1)) * ct(12, j)
367 end do
368 end do
369
370 end subroutine tnsr3d_el_n12_cpu
371
372 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
373 integer, parameter :: n = 11
374 integer, parameter :: nn = n**2
375 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
376 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
377 real(kind=rp) :: work(n**3), work2(n**3)
378 integer :: i, j, l
379 integer :: ii, jj
380
381 do j = 1, nn
382 do i = 1, n
383 ii = i + n * (j - 1)
384 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
385 + a(i,2) * u(2 + n * (j - 1)) &
386 + a(i,3) * u(3 + n * (j - 1)) &
387 + a(i,4) * u(4 + n * (j - 1)) &
388 + a(i,5) * u(5 + n * (j - 1)) &
389 + a(i,6) * u(6 + n * (j - 1)) &
390 + a(i,7) * u(7 + n * (j - 1)) &
391 + a(i,8) * u(8 + n * (j - 1)) &
392 + a(i,9) * u(9 + n * (j - 1)) &
393 + a(i,10) * u(10 + n * (j - 1)) &
394 + a(i,11) * u(11 + n * (j - 1))
395 end do
396 end do
397
398 do i = 1, n
399 do j = 1, n
400 do l = 1, n
401 ii = l + n * (j - 1) + nn * (i - 1)
402 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
403 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
404 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
405 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
406 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
407 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
408 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
409 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
410 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
411 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
412 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
413 end do
414 end do
415 end do
416
417 do j = 1, n
418 do i = 1, nn
419 jj = i + nn * (j - 1)
420 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
421 + work2(i + nn * (2 - 1)) * ct(2, j) &
422 + work2(i + nn * (3 - 1)) * ct(3, j) &
423 + work2(i + nn * (4 - 1)) * ct(4, j) &
424 + work2(i + nn * (5 - 1)) * ct(5, j) &
425 + work2(i + nn * (6 - 1)) * ct(6, j) &
426 + work2(i + nn * (7 - 1)) * ct(7, j) &
427 + work2(i + nn * (8 - 1)) * ct(8, j) &
428 + work2(i + nn * (9 - 1)) * ct(9, j) &
429 + work2(i + nn * (10 - 1)) * ct(10, j) &
430 + work2(i + nn * (11 - 1)) * ct(11, j)
431 end do
432 end do
433
434 end subroutine tnsr3d_el_n11_cpu
435
436 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
437 integer, parameter :: n = 10
438 integer, parameter :: nn = n**2
439 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
440 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
441 real(kind=rp) :: work(n**3), work2(n**3)
442 integer :: i, j, l
443 integer :: ii, jj
444
445 do j = 1, nn
446 do i = 1, n
447 ii = i + n * (j - 1)
448 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
449 + a(i,2) * u(2 + n * (j - 1)) &
450 + a(i,3) * u(3 + n * (j - 1)) &
451 + a(i,4) * u(4 + n * (j - 1)) &
452 + a(i,5) * u(5 + n * (j - 1)) &
453 + a(i,6) * u(6 + n * (j - 1)) &
454 + a(i,7) * u(7 + n * (j - 1)) &
455 + a(i,8) * u(8 + n * (j - 1)) &
456 + a(i,9) * u(9 + n * (j - 1)) &
457 + a(i,10) * u(10 + n * (j - 1))
458 end do
459 end do
460
461 do i = 1, n
462 do j = 1, n
463 do l = 1, n
464 ii = l + n * (j - 1) + nn * (i - 1)
465 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
466 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
467 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
468 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
469 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
470 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
471 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
472 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
473 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
474 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
475 end do
476 end do
477 end do
478
479 do j = 1, n
480 do i = 1, nn
481 jj = i + nn * (j - 1)
482 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
483 + work2(i + nn * (2 - 1)) * ct(2, j) &
484 + work2(i + nn * (3 - 1)) * ct(3, j) &
485 + work2(i + nn * (4 - 1)) * ct(4, j) &
486 + work2(i + nn * (5 - 1)) * ct(5, j) &
487 + work2(i + nn * (6 - 1)) * ct(6, j) &
488 + work2(i + nn * (7 - 1)) * ct(7, j) &
489 + work2(i + nn * (8 - 1)) * ct(8, j) &
490 + work2(i + nn * (9 - 1)) * ct(9, j) &
491 + work2(i + nn * (10 - 1)) * ct(10, j)
492 end do
493 end do
494
495 end subroutine tnsr3d_el_n10_cpu
496
497 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
498 integer, parameter :: n = 9
499 integer, parameter :: nn = n**2
500 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
501 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
502 real(kind=rp) :: work(n**3), work2(n**3)
503 integer :: i, j, l
504 integer :: ii, jj
505
506 do j = 1, nn
507 do i = 1, n
508 ii = i + n * (j - 1)
509 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
510 + a(i,2) * u(2 + n * (j - 1)) &
511 + a(i,3) * u(3 + n * (j - 1)) &
512 + a(i,4) * u(4 + n * (j - 1)) &
513 + a(i,5) * u(5 + n * (j - 1)) &
514 + a(i,6) * u(6 + n * (j - 1)) &
515 + a(i,7) * u(7 + n * (j - 1)) &
516 + a(i,8) * u(8 + n * (j - 1)) &
517 + a(i,9) * u(9 + n * (j - 1))
518 end do
519 end do
520
521 do i = 1, n
522 do j = 1, n
523 do l = 1, n
524 ii = l + n * (j - 1) + nn * (i - 1)
525 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
526 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
527 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
528 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
529 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
530 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
531 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
532 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
533 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
534 end do
535 end do
536 end do
537
538 do j = 1, n
539 do i = 1, nn
540 jj = i + nn * (j - 1)
541 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
542 + work2(i + nn * (2 - 1)) * ct(2, j) &
543 + work2(i + nn * (3 - 1)) * ct(3, j) &
544 + work2(i + nn * (4 - 1)) * ct(4, j) &
545 + work2(i + nn * (5 - 1)) * ct(5, j) &
546 + work2(i + nn * (6 - 1)) * ct(6, j) &
547 + work2(i + nn * (7 - 1)) * ct(7, j) &
548 + work2(i + nn * (8 - 1)) * ct(8, j) &
549 + work2(i + nn * (9 - 1)) * ct(9, j)
550 end do
551 end do
552
553 end subroutine tnsr3d_el_n9_cpu
554
555 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
556 integer, parameter :: n = 8
557 integer, parameter :: nn = n**2
558 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
559 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
560 real(kind=rp) :: work(n**3), work2(n**3)
561 integer :: i, j, l
562 integer :: ii, jj
563
564 do j = 1, nn
565 do i = 1, n
566 ii = i + n * (j - 1)
567 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
568 + a(i,2) * u(2 + n * (j - 1)) &
569 + a(i,3) * u(3 + n * (j - 1)) &
570 + a(i,4) * u(4 + n * (j - 1)) &
571 + a(i,5) * u(5 + n * (j - 1)) &
572 + a(i,6) * u(6 + n * (j - 1)) &
573 + a(i,7) * u(7 + n * (j - 1)) &
574 + a(i,8) * u(8 + n * (j - 1))
575 end do
576 end do
577
578 do i = 1, n
579 do j = 1, n
580 do l = 1, n
581 ii = l + n * (j - 1) + nn * (i - 1)
582 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
583 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
584 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
585 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
586 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
587 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
588 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
589 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
590 end do
591 end do
592 end do
593
594 do j = 1, n
595 do i = 1, nn
596 jj = i + nn * (j - 1)
597 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
598 + work2(i + nn * (2 - 1)) * ct(2, j) &
599 + work2(i + nn * (3 - 1)) * ct(3, j) &
600 + work2(i + nn * (4 - 1)) * ct(4, j) &
601 + work2(i + nn * (5 - 1)) * ct(5, j) &
602 + work2(i + nn * (6 - 1)) * ct(6, j) &
603 + work2(i + nn * (7 - 1)) * ct(7, j) &
604 + work2(i + nn * (8 - 1)) * ct(8, j)
605 end do
606 end do
607
608 end subroutine tnsr3d_el_n8_cpu
609
610 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
611 integer, parameter :: n = 7
612 integer, parameter :: nn = n**2
613 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
614 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
615 real(kind=rp) :: work(n**3), work2(n**3)
616 integer :: i, j, l
617 integer :: ii, jj
618
619 do j = 1, nn
620 do i = 1, n
621 ii = i + n * (j - 1)
622 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
623 + a(i,2) * u(2 + n * (j - 1)) &
624 + a(i,3) * u(3 + n * (j - 1)) &
625 + a(i,4) * u(4 + n * (j - 1)) &
626 + a(i,5) * u(5 + n * (j - 1)) &
627 + a(i,6) * u(6 + n * (j - 1)) &
628 + a(i,7) * u(7 + n * (j - 1))
629 end do
630 end do
631
632 do i = 1, n
633 do j = 1, n
634 do l = 1, n
635 ii = l + n * (j - 1) + nn * (i - 1)
636 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
637 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
638 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
639 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
640 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
641 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
642 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
643 end do
644 end do
645 end do
646
647 do j = 1, n
648 do i = 1, nn
649 jj = i + nn * (j - 1)
650 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
651 + work2(i + nn * (2 - 1)) * ct(2, j) &
652 + work2(i + nn * (3 - 1)) * ct(3, j) &
653 + work2(i + nn * (4 - 1)) * ct(4, j) &
654 + work2(i + nn * (5 - 1)) * ct(5, j) &
655 + work2(i + nn * (6 - 1)) * ct(6, j) &
656 + work2(i + nn * (7 - 1)) * ct(7, j)
657 end do
658 end do
659
660 end subroutine tnsr3d_el_n7_cpu
661
662 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
663 integer, parameter :: n = 6
664 integer, parameter :: nn = n**2
665 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
666 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
667 real(kind=rp) :: work(n**3), work2(n**3)
668 integer :: i, j, l
669 integer :: ii, jj
670
671 do j = 1, nn
672 do i = 1, n
673 ii = i + n * (j - 1)
674 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
675 + a(i,2) * u(2 + n * (j - 1)) &
676 + a(i,3) * u(3 + n * (j - 1)) &
677 + a(i,4) * u(4 + n * (j - 1)) &
678 + a(i,5) * u(5 + n * (j - 1)) &
679 + a(i,6) * u(6 + n * (j - 1))
680 end do
681 end do
682
683 do i = 1, n
684 do j = 1, n
685 do l = 1, n
686 ii = l + n * (j - 1) + nn * (i - 1)
687 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
688 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
689 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
690 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
691 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
692 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
693 end do
694 end do
695 end do
696
697 do j = 1, n
698 do i = 1, nn
699 jj = i + nn * (j - 1)
700 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
701 + work2(i + nn * (2 - 1)) * ct(2, j) &
702 + work2(i + nn * (3 - 1)) * ct(3, j) &
703 + work2(i + nn * (4 - 1)) * ct(4, j) &
704 + work2(i + nn * (5 - 1)) * ct(5, j) &
705 + work2(i + nn * (6 - 1)) * ct(6, j)
706 end do
707 end do
708
709 end subroutine tnsr3d_el_n6_cpu
710
711 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
712 integer, parameter :: n = 5
713 integer, parameter :: nn = n**2
714 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
715 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
716 real(kind=rp) :: work(n**3), work2(n**3)
717 integer :: i, j, l
718 integer :: ii, jj
719
720 do j = 1, nn
721 do i = 1, n
722 ii = i + n * (j - 1)
723 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
724 + a(i,2) * u(2 + n * (j - 1)) &
725 + a(i,3) * u(3 + n * (j - 1)) &
726 + a(i,4) * u(4 + n * (j - 1)) &
727 + a(i,5) * u(5 + n * (j - 1))
728 end do
729 end do
730
731 do i = 1, n
732 do j = 1, n
733 do l = 1, n
734 ii = l + n * (j - 1) + nn * (i - 1)
735 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
736 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
737 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
738 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
739 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
740 end do
741 end do
742 end do
743
744 do j = 1, n
745 do i = 1, nn
746 jj = i + nn * (j - 1)
747 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
748 + work2(i + nn * (2 - 1)) * ct(2, j) &
749 + work2(i + nn * (3 - 1)) * ct(3, j) &
750 + work2(i + nn * (4 - 1)) * ct(4, j) &
751 + work2(i + nn * (5 - 1)) * ct(5, j)
752 end do
753 end do
754
755 end subroutine tnsr3d_el_n5_cpu
756
757 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
758 integer, parameter :: n = 4
759 integer, parameter :: nn = n**2
760 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
761 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
762 real(kind=rp) :: work(n**3), work2(n**3)
763 integer :: i, j, l
764 integer :: ii, jj
765
766 do j = 1, nn
767 do i = 1, n
768 ii = i + n * (j - 1)
769 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
770 + a(i,2) * u(2 + n * (j - 1)) &
771 + a(i,3) * u(3 + n * (j - 1)) &
772 + a(i,4) * u(4 + n * (j - 1))
773 end do
774 end do
775
776 do i = 1, n
777 do j = 1, n
778 do l = 1, n
779 ii = l + n * (j - 1) + nn * (i - 1)
780 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
781 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
782 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
783 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
784 end do
785 end do
786 end do
787
788 do j = 1, n
789 do i = 1, nn
790 jj = i + nn * (j - 1)
791 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
792 + work2(i + nn * (2 - 1)) * ct(2, j) &
793 + work2(i + nn * (3 - 1)) * ct(3, j) &
794 + work2(i + nn * (4 - 1)) * ct(4, j)
795 end do
796 end do
797
798 end subroutine tnsr3d_el_n4_cpu
799
800 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
801 integer, parameter :: n = 3
802 integer, parameter :: nn = n**2
803 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
804 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
805 real(kind=rp) :: work(n**3), work2(n**3)
806 integer :: i, j, l
807 integer :: ii, jj
808
809 do j = 1, nn
810 do i = 1, n
811 ii = i + n * (j - 1)
812 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
813 + a(i,2) * u(2 + n * (j - 1)) &
814 + a(i,3) * u(3 + n * (j - 1))
815 end do
816 end do
817
818 do i = 1, n
819 do j = 1, n
820 do l = 1, n
821 ii = l + n * (j - 1) + nn * (i - 1)
822 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
823 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
824 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
825 end do
826 end do
827 end do
828
829 do j = 1, n
830 do i = 1, nn
831 jj = i + nn * (j - 1)
832 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
833 + work2(i + nn * (2 - 1)) * ct(2, j) &
834 + work2(i + nn * (3 - 1)) * ct(3, j)
835 end do
836 end do
837
838 end subroutine tnsr3d_el_n3_cpu
839
840 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
841 integer, parameter :: n = 2
842 integer, parameter :: nn = n**2
843 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
844 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
845 real(kind=rp) :: work(n**3), work2(n**3)
846 integer :: i, j, l
847 integer :: ii, jj
848
849 do j = 1, nn
850 do i = 1, n
851 ii = i + n * (j - 1)
852 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
853 + a(i,2) * u(2 + n * (j - 1))
854 end do
855 end do
856
857 do i = 1, n
858 do j = 1, n
859 do l = 1, n
860 ii = l + n * (j - 1) + nn * (i - 1)
861 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
862 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
863 end do
864 end do
865 end do
866
867 do j = 1, n
868 do i = 1, nn
869 jj = i + nn * (j - 1)
870 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
871 + work2(i + nn * (2 - 1)) * ct(2, j)
872 end do
873 end do
874
875 end subroutine tnsr3d_el_n2_cpu
876
877 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
878 integer, intent(in) :: nv, nu, nelv
879 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
880 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
881 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
882
883 if (nu .eq. 2 .and. nv .eq. 4) then
884 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
885 else if (nu .eq. 4) then
886 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
887 else
888 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
889 end if
890
891 end subroutine tnsr3d_cpu
892
893 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
894 integer, intent(in) :: nv, nu, nelv
895 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
896 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
897 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
898 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
899 integer :: ie, i, j, k, l, ii, jj
900 integer :: nunu, nvnu, nvnv
901
902 nvnu = nv * nu
903 nunu = nu * nu
904 nvnv = nv * nv
905
906 do ie = 1,nelv
907 do j = 1, nunu
908 do i = 1, nv
909 ii = i + nv * (j - 1)
910 tmp = 0.0_rp
911 do k = 1, nu
912 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
913 end do
914 work(ii) = tmp
915 end do
916 end do
917
918 do i = 1, nu
919 do j = 1, nv
920 do l = 1, nv
921 ii = l + nv * (j - 1) + nvnv * (i - 1)
922 tmp = 0.0_rp
923 do k = 1, nu
924 jj = l + nv * (k - 1) + nvnu * (i - 1)
925 tmp = tmp + work(jj) * bt(k,j)
926 end do
927 work2(ii) = tmp
928 end do
929 end do
930 end do
931
932 do j = 1, nv
933 do i = 1, nvnv
934 jj = i + nvnv * (j - 1)
935 tmp = 0.0_rp
936 do k = 1, nu
937 ii = i + nvnv * (k - 1)
938 tmp = tmp + work2(ii) * ct(k, j)
939 end do
940 v(jj, ie) = tmp
941 end do
942 end do
943 end do
944
945 end subroutine tnsr3d_nvnu_cpu
946
947 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
948 integer, parameter :: nu = 2
949 integer, parameter :: nv = 4
950 integer, parameter :: nunu = 4
951 integer, parameter :: nvnu = 8
952 integer, parameter :: nvnv = 16
953 integer, intent(in) :: nelv
954 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
955 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
956 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
957 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
958 integer :: ie, i, j, k, l, ii, jj
959
960 do ie = 1,nelv
961 do j = 1, nunu
962 do i = 1, nv
963 ii = i + nv * (j - 1)
964 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
965 + a(i,2) * u(2 + nu * (j - 1), ie)
966 end do
967 end do
968
969 do i = 1, nu
970 do j = 1, nv
971 do l = 1, nv
972 ii = l + nv * (j - 1) + nvnv * (i - 1)
973 tmp = 0.0_rp
974 do k = 1, nu
975 jj = l + nv * (k - 1) + nvnu * (i - 1)
976 tmp = tmp + work(jj) * bt(k,j)
977 end do
978 work2(ii) = tmp
979 end do
980 end do
981 end do
982
983 do j = 1, nv
984 do i = 1, nvnv
985 jj = i + nvnv * (j - 1)
986 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
987 + work2(i + nvnv * (2 - 1)) * ct(2, j)
988 end do
989 end do
990 end do
991
992 end subroutine tnsr3d_nu2nv4_cpu
993
994 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
995 integer, parameter :: nu = 4
996 integer, parameter :: nunu = 16
997 integer, intent(in) :: nv, nelv
998 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
999 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
1000 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1001 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1002 integer :: ie, i, j, k, l, ii, jj
1003 integer :: nvnu, nvnv
1004
1005 nvnu = nv * nu
1006 nvnv = nv * nv
1007
1008 do ie = 1,nelv
1009 do j = 1, nunu
1010 do i = 1, nv
1011 ii = i + nv * (j - 1)
1012 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1013 + a(i,2) * u(2 + nu * (j - 1), ie) &
1014 + a(i,3) * u(3 + nu * (j - 1), ie) &
1015 + a(i,4) * u(4 + nu * (j - 1), ie)
1016 end do
1017 end do
1018
1019 do i = 1, nu
1020 do j = 1, nv
1021 do l = 1, nv
1022 ii = l + nv * (j - 1) + nvnv * (i - 1)
1023 tmp = 0.0_rp
1024 do k = 1, nu
1025 jj = l + nv * (k - 1) + nvnu * (i - 1)
1026 tmp = tmp + work(jj) * bt(k,j)
1027 end do
1028 work2(ii) = tmp
1029 end do
1030 end do
1031 end do
1032
1033 do j = 1, nv
1034 do i = 1, nvnv
1035 jj = i + nvnv * (j - 1)
1036 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1037 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1038 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1039 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1040 end do
1041 end do
1042 end do
1043
1044 end subroutine tnsr3d_nu4_cpu
1045
1046 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1047 integer, intent(in) :: nv, nu, nelv
1048 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1049 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1050
1051 if (nu .eq. 4 .and. nv .eq. 2) then
1052 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1053 else
1054 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1055 end if
1056
1057 end subroutine tnsr1_3d_cpu
1058
1059 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1060 integer, intent(in) :: nv, nu, nelv
1061 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1062 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1063 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1064 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1065 integer :: i, j, k, l, ii, jj, kk
1066 integer :: nunu, nvnu, nvnv
1067 real(kind=rp) :: tmp
1068
1069 nvnu = nv * nu
1070 nunu = nu * nu
1071 nvnv = nv * nv
1072
1073 e0 = 1
1074 es = 1
1075 ee = nelv
1076
1077 if (nv.gt.nu) then
1078 e0 = nelv
1079 es = -1
1080 ee = 1
1081 endif
1082
1083 nu3 = nu**3
1084 nv3 = nv**3
1085
1086 do e = e0,ee,es
1087 iu = (e-1)*nu3
1088 iv = (e-1)*nv3
1089
1090 do j = 1, nunu
1091 do i = 1, nv
1092 ii = i + nv * (j - 1)
1093 tmp = 0.0_rp
1094 do k = 1, nu
1095 kk = k + nu * (j - 1) + iu
1096 tmp = tmp + a(i,k) * v(kk)
1097 end do
1098 work(ii) = tmp
1099 end do
1100 end do
1101
1102 do i = 1, nu
1103 do j = 1, nv
1104 do l = 1, nv
1105 ii = l + nv * (j - 1) + nvnv * (i - 1)
1106 tmp = 0.0_rp
1107 do k = 1, nu
1108 jj = l + nv * (k - 1) + nvnu * (i - 1)
1109 tmp = tmp + work(jj) * bt(k,j)
1110 end do
1111 work2(ii) = tmp
1112 end do
1113 end do
1114 end do
1115
1116 do j = 1, nv
1117 do i = 1, nvnv
1118 jj = i + nvnv * (j - 1) + iv
1119 tmp = 0.0_rp
1120 do k = 1, nu
1121 ii = i + nvnv * (k - 1)
1122 tmp = tmp + work2(ii) * ct(k, j)
1123 end do
1124 v(jj) = tmp
1125 end do
1126 end do
1127 end do
1128
1129 end subroutine tnsr1_3d_nvnu_cpu
1130
1131 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1132 integer, parameter :: nu = 4
1133 integer, parameter :: nv = 2
1134 integer, parameter :: nunu = 16
1135 integer, parameter :: nvnu = 8
1136 integer, parameter :: nvnv = 4
1137 integer, parameter :: nununu = 64
1138 integer, parameter :: nvnvnv = 8
1139 integer, intent(in) :: nelv
1140 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1141 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
1142 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1143 integer :: e, iu, iv
1144 integer :: i, j, k, l, ii, jj
1145 real(kind=rp) :: tmp
1146
1147 do e = 1,nelv
1148 iu = (e-1)*nununu
1149 iv = (e-1)*nvnvnv
1150
1151 do j = 1, nunu
1152 do i = 1, nv
1153 ii = i + nv * (j - 1)
1154 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1155 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1156 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1157 + a(i,4) * v(4 + nu * (j - 1) + iu)
1158 end do
1159 end do
1160
1161 do i = 1, nu
1162 do j = 1, nv
1163 do l = 1, nv
1164 ii = l + nv * (j - 1) + nvnv * (i - 1)
1165 tmp = 0.0_rp
1166 do k = 1, nu
1167 jj = l + nv * (k - 1) + nvnu * (i - 1)
1168 tmp = tmp + work(jj) * bt(k,j)
1169 end do
1170 work2(ii) = tmp
1171 end do
1172 end do
1173 end do
1174
1175 do j = 1, nv
1176 do i = 1, nvnv
1177 jj = i + nvnv * (j - 1) + iv
1178 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1179 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1180 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1181 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1182
1183 end do
1184 end do
1185 end do
1186
1187 end subroutine tnsr1_3d_nu4nv2_cpu
1188
1189end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)