Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_cpu.f90
Go to the documentation of this file.
2 use num_types, only : rp
3 use mxm_wrapper, only : mxm
4 implicit none
5 private
6
8
9contains
10
11 subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
12 integer, intent(in) :: nv, nu
13 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
14 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv)
15 real(kind=rp) :: work(0:nu**2*nv)
16
17 call mxm(a, nv, u, nu, work, nu)
18 call mxm(work, nv, bt, nu, v, nv)
19
20 end subroutine tnsr2d_el_cpu
21
22 subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
23 integer, intent(in) :: nv, nu
24 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
25 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
26
27 if (nv .eq. nu) then
28 select case (nv)
29 case (14)
30 call tnsr3d_el_n14_cpu(v, u, a, bt, ct)
31 case (13)
32 call tnsr3d_el_n13_cpu(v, u, a, bt, ct)
33 case (12)
34 call tnsr3d_el_n12_cpu(v, u, a, bt, ct)
35 case (11)
36 call tnsr3d_el_n11_cpu(v, u, a, bt, ct)
37 case (10)
38 call tnsr3d_el_n10_cpu(v, u, a, bt, ct)
39 case (9)
40 call tnsr3d_el_n9_cpu(v, u, a, bt, ct)
41 case (8)
42 call tnsr3d_el_n8_cpu(v, u, a, bt, ct)
43 case (7)
44 call tnsr3d_el_n7_cpu(v, u, a, bt, ct)
45 case (6)
46 call tnsr3d_el_n6_cpu(v, u, a, bt, ct)
47 case (5)
48 call tnsr3d_el_n5_cpu(v, u, a, bt, ct)
49 case (4)
50 call tnsr3d_el_n4_cpu(v, u, a, bt, ct)
51 case (3)
52 call tnsr3d_el_n3_cpu(v, u, a, bt, ct)
53 case (2)
54 call tnsr3d_el_n2_cpu(v, u, a, bt, ct)
55 case default
56 call tnsr3d_el_n_cpu(v, u, a, bt, ct, nv)
57 end select
58 else if (nv .eq. 1) then
59 select case (nu)
60 case (4)
61 call tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
62 case (6)
63 call tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
64 case (8)
65 call tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
66 case (10)
67 call tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
68 case (12)
69 call tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
70 case default
71 call tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
72 end select
73 else
74 call tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
75 end if
76
77 end subroutine tnsr3d_el_cpu
78
79 !OCL SERIAL
80 subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
81 integer, intent(in) :: nv, nu
82 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
83 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
84 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
85 real(kind=rp) :: tmp
86 integer :: i, j, k, l, nunu, nvnu, nvnv
87 integer :: ii, jj
88 nvnu = nv * nu
89 nunu = nu * nu
90 nvnv = nv * nv
91
92 do j = 1, nunu
93 do i = 1, nv
94 ii = i + nv * (j - 1)
95 tmp = 0.0_rp
96 do k = 1, nu
97 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
98 end do
99 work(ii) = tmp
100 end do
101 end do
102
103 do i = 1, nu
104 do j = 1, nv
105 do l = 1, nv
106 ii = l + nv * (j - 1) + nvnv * (i - 1)
107 tmp = 0.0_rp
108 do k = 1, nu
109 jj = l + nv * (k - 1) + nvnu * (i - 1)
110 tmp = tmp + work(jj) * bt(k,j)
111 end do
112 work2(ii) = tmp
113 end do
114 end do
115 end do
116
117 do j = 1, nv
118 do i = 1, nvnv
119 jj = i + nvnv * (j - 1)
120 tmp = 0.0_rp
121 do k = 1, nu
122 ii = i + nvnv * (k - 1)
123 tmp = tmp + work2(ii) * ct(k, j)
124 end do
125 v(jj) = tmp
126 end do
127 end do
128
129 end subroutine tnsr3d_el_nvnu_cpu
130
131 !OCL SERIAL
132 subroutine tnsr3d_el_1_nu_cpu(v, u, nu, A, Bt, Ct)
133 integer, intent(in) :: nu
134 real(kind=rp), intent(inout) :: v(1)
135 real(kind=rp), intent(in) :: u(nu*nu*nu)
136 real(kind=rp), intent(in) :: a(1, nu), bt(nu, 1), ct(nu, 1)
137 real(kind=rp) :: work(nu**2), work2(nu)
138 real(kind=rp) :: tmp
139 integer :: i, j, k, nunu
140 integer :: jj
141 nunu = nu * nu
142
143 do j = 1, nunu
144 tmp = 0.0_rp
145 do k = 1, nu
146 tmp = tmp + a(1,k) * u(k + nu * (j - 1))
147 end do
148 work(j) = tmp
149 end do
150
151 do i = 1, nu
152 tmp = 0.0_rp
153 do k = 1, nu
154 jj = k + nu * (i - 1)
155 tmp = tmp + work(jj) * bt(k,1)
156 end do
157 work2(i) = tmp
158 end do
159
160 tmp = 0.0_rp
161 do k = 1, nu
162 tmp = tmp + work2(k) * ct(k, 1)
163 end do
164 v(1) = tmp
165
166 end subroutine tnsr3d_el_1_nu_cpu
167
168 !OCL SERIAL
169 subroutine tnsr3d_el_1_4_cpu(v, u, A, Bt, Ct)
170 integer, parameter :: n = 4
171 integer, parameter :: nn = n**2
172 real(kind=rp), intent(inout) :: v(1)
173 real(kind=rp), intent(in) :: u(n*n*n)
174 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
175 real(kind=rp) :: work(n**2), work2(n)
176 integer :: i, j
177
178 do j = 1, nn
179 work(j) = a(1,1) * u(1 + n * (j - 1)) &
180 + a(1,2) * u(2 + n * (j - 1)) &
181 + a(1,3) * u(3 + n * (j - 1)) &
182 + a(1,4) * u(4 + n * (j - 1))
183 end do
184
185 do i = 1, n
186 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
187 + work(2 + n * (i - 1)) * bt(2,1) &
188 + work(3 + n * (i - 1)) * bt(3,1) &
189 + work(4 + n * (i - 1)) * bt(4,1)
190 end do
191
192 v(1) = work2(1) * ct(1, 1) &
193 + work2(2) * ct(2, 1) &
194 + work2(3) * ct(3, 1) &
195 + work2(4) * ct(4, 1)
196
197 end subroutine tnsr3d_el_1_4_cpu
198
199 !OCL SERIAL
200 subroutine tnsr3d_el_1_6_cpu(v, u, A, Bt, Ct)
201 integer, parameter :: n = 6
202 integer, parameter :: nn = n**2
203 real(kind=rp), intent(inout) :: v(1)
204 real(kind=rp), intent(in) :: u(n*n*n)
205 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
206 real(kind=rp) :: work(n**2), work2(n)
207 integer :: i, j
208
209 do j = 1, nn
210 work(j) = a(1,1) * u(1 + n * (j - 1)) &
211 + a(1,2) * u(2 + n * (j - 1)) &
212 + a(1,3) * u(3 + n * (j - 1)) &
213 + a(1,4) * u(4 + n * (j - 1)) &
214 + a(1,5) * u(5 + n * (j - 1)) &
215 + a(1,6) * u(6 + n * (j - 1))
216 end do
217
218 do i = 1, n
219 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
220 + work(2 + n * (i - 1)) * bt(2,1) &
221 + work(3 + n * (i - 1)) * bt(3,1) &
222 + work(4 + n * (i - 1)) * bt(4,1) &
223 + work(5 + n * (i - 1)) * bt(5,1) &
224 + work(6 + n * (i - 1)) * bt(6,1)
225 end do
226
227 v(1) = work2(1) * ct(1, 1) &
228 + work2(2) * ct(2, 1) &
229 + work2(3) * ct(3, 1) &
230 + work2(4) * ct(4, 1) &
231 + work2(5) * ct(5, 1) &
232 + work2(6) * ct(6, 1)
233
234 end subroutine tnsr3d_el_1_6_cpu
235
236 !OCL SERIAL
237 subroutine tnsr3d_el_1_8_cpu(v, u, A, Bt, Ct)
238 integer, parameter :: n = 8
239 integer, parameter :: nn = n**2
240 real(kind=rp), intent(inout) :: v(1)
241 real(kind=rp), intent(in) :: u(n*n*n)
242 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
243 real(kind=rp) :: work(n**2), work2(n)
244 integer :: i, j
245
246 do j = 1, nn
247 work(j) = a(1,1) * u(1 + n * (j - 1)) &
248 + a(1,2) * u(2 + n * (j - 1)) &
249 + a(1,3) * u(3 + n * (j - 1)) &
250 + a(1,4) * u(4 + n * (j - 1)) &
251 + a(1,5) * u(5 + n * (j - 1)) &
252 + a(1,6) * u(6 + n * (j - 1)) &
253 + a(1,7) * u(7 + n * (j - 1)) &
254 + a(1,8) * u(8 + n * (j - 1))
255 end do
256
257 do i = 1, n
258 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
259 + work(2 + n * (i - 1)) * bt(2,1) &
260 + work(3 + n * (i - 1)) * bt(3,1) &
261 + work(4 + n * (i - 1)) * bt(4,1) &
262 + work(5 + n * (i - 1)) * bt(5,1) &
263 + work(6 + n * (i - 1)) * bt(6,1) &
264 + work(7 + n * (i - 1)) * bt(7,1) &
265 + work(8 + n * (i - 1)) * bt(8,1)
266 end do
267
268 v(1) = work2(1) * ct(1, 1) &
269 + work2(2) * ct(2, 1) &
270 + work2(3) * ct(3, 1) &
271 + work2(4) * ct(4, 1) &
272 + work2(5) * ct(5, 1) &
273 + work2(6) * ct(6, 1) &
274 + work2(7) * ct(7, 1) &
275 + work2(8) * ct(8, 1)
276
277
278 end subroutine tnsr3d_el_1_8_cpu
279
280 !OCL SERIAL
281 subroutine tnsr3d_el_1_10_cpu(v, u, A, Bt, Ct)
282 integer, parameter :: n = 10
283 integer, parameter :: nn = n**2
284 real(kind=rp), intent(inout) :: v(1)
285 real(kind=rp), intent(in) :: u(n*n*n)
286 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
287 real(kind=rp) :: work(n**2), work2(n)
288 integer :: i, j
289
290 do j = 1, nn
291 work(j) = a(1,1) * u(1 + n * (j - 1)) &
292 + a(1,2) * u(2 + n * (j - 1)) &
293 + a(1,3) * u(3 + n * (j - 1)) &
294 + a(1,4) * u(4 + n * (j - 1)) &
295 + a(1,5) * u(5 + n * (j - 1)) &
296 + a(1,6) * u(6 + n * (j - 1)) &
297 + a(1,7) * u(7 + n * (j - 1)) &
298 + a(1,8) * u(8 + n * (j - 1)) &
299 + a(1,9) * u(9 + n * (j - 1)) &
300 + a(1,10) * u(10 + n * (j - 1))
301 end do
302
303 do i = 1, n
304 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
305 + work(2 + n * (i - 1)) * bt(2,1) &
306 + work(3 + n * (i - 1)) * bt(3,1) &
307 + work(4 + n * (i - 1)) * bt(4,1) &
308 + work(5 + n * (i - 1)) * bt(5,1) &
309 + work(6 + n * (i - 1)) * bt(6,1) &
310 + work(7 + n * (i - 1)) * bt(7,1) &
311 + work(8 + n * (i - 1)) * bt(8,1) &
312 + work(9 + n * (i - 1)) * bt(9,1) &
313 + work(10 + n * (i - 1)) * bt(10,1)
314 end do
315
316 v(1) = work2(1) * ct(1, 1) &
317 + work2(2) * ct(2, 1) &
318 + work2(3) * ct(3, 1) &
319 + work2(4) * ct(4, 1) &
320 + work2(5) * ct(5, 1) &
321 + work2(6) * ct(6, 1) &
322 + work2(7) * ct(7, 1) &
323 + work2(8) * ct(8, 1) &
324 + work2(9) * ct(9, 1) &
325 + work2(10) * ct(10, 1)
326
327 end subroutine tnsr3d_el_1_10_cpu
328
329 !OCL SERIAL
330 subroutine tnsr3d_el_1_12_cpu(v, u, A, Bt, Ct)
331 integer, parameter :: n = 12
332 integer, parameter :: nn = n**2
333 real(kind=rp), intent(inout) :: v(1)
334 real(kind=rp), intent(in) :: u(n*n*n)
335 real(kind=rp), intent(in) :: a(1,n), bt(n,1), ct(n,1)
336 real(kind=rp) :: work(n**2), work2(n)
337 integer :: i, j
338
339 do j = 1, nn
340 work(j) = a(1,1) * u(1 + n * (j - 1)) &
341 + a(1,2) * u(2 + n * (j - 1)) &
342 + a(1,3) * u(3 + n * (j - 1)) &
343 + a(1,4) * u(4 + n * (j - 1)) &
344 + a(1,5) * u(5 + n * (j - 1)) &
345 + a(1,6) * u(6 + n * (j - 1)) &
346 + a(1,7) * u(7 + n * (j - 1)) &
347 + a(1,8) * u(8 + n * (j - 1)) &
348 + a(1,9) * u(9 + n * (j - 1)) &
349 + a(1,10) * u(10 + n * (j - 1)) &
350 + a(1,11) * u(11 + n * (j - 1)) &
351 + a(1,12) * u(12 + n * (j - 1))
352 end do
353
354 do i = 1, n
355 work2(i) = work(1 + n * (i - 1)) * bt(1,1) &
356 + work(2 + n * (i - 1)) * bt(2,1) &
357 + work(3 + n * (i - 1)) * bt(3,1) &
358 + work(4 + n * (i - 1)) * bt(4,1) &
359 + work(5 + n * (i - 1)) * bt(5,1) &
360 + work(6 + n * (i - 1)) * bt(6,1) &
361 + work(7 + n * (i - 1)) * bt(7,1) &
362 + work(8 + n * (i - 1)) * bt(8,1) &
363 + work(9 + n * (i - 1)) * bt(9,1) &
364 + work(10 + n * (i - 1)) * bt(10,1) &
365 + work(11 + n * (i - 1)) * bt(11,1) &
366 + work(12 + n * (i - 1)) * bt(12,1)
367 end do
368
369 v(1) = work2(1) * ct(1, 1) &
370 + work2(2) * ct(2, 1) &
371 + work2(3) * ct(3, 1) &
372 + work2(4) * ct(4, 1) &
373 + work2(5) * ct(5, 1) &
374 + work2(6) * ct(6, 1) &
375 + work2(7) * ct(7, 1) &
376 + work2(8) * ct(8, 1) &
377 + work2(9) * ct(9, 1) &
378 + work2(10) * ct(10, 1) &
379 + work2(11) * ct(11, 1) &
380 + work2(12) * ct(12, 1)
381
382 end subroutine tnsr3d_el_1_12_cpu
383
384 !OCL SERIAL
385 subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
386 integer, intent(in) :: n
387 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
388 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
389 real(kind=rp) :: work(n**3), work2(n**3), tmp
390 integer :: i, j, l, k
391 integer :: ii, jj, nn
392
393 nn = n**2
394
395 do j = 1, nn
396 do i = 1, n
397 ii = i + n * (j - 1)
398 tmp = 0.0_rp
399 do k = 1, n
400 tmp = tmp + a(i,k) * u(k + n * (j - 1))
401 end do
402 work(ii) = tmp
403 end do
404 end do
405
406 do i = 1, n
407 do j = 1, n
408 do l = 1, n
409 ii = l + n * (j - 1) + nn * (i - 1)
410 tmp = 0.0_rp
411 do k = 1, n
412 tmp = tmp + work(l + n * (k - 1) + nn * (i - 1)) * bt(k,j)
413 end do
414 work2(ii) = tmp
415 end do
416 end do
417 end do
418
419 do j = 1, n
420 do i = 1, nn
421 jj = i + nn * (j - 1)
422 tmp = 0.0_rp
423 do k = 1, n
424 tmp = tmp + work2(i + nn * (k - 1)) * ct(k, j)
425 end do
426 v(jj) = tmp
427 end do
428 end do
429
430 end subroutine tnsr3d_el_n_cpu
431
432 !OCL SERIAL
433 subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
434 integer, parameter :: n = 14
435 integer, parameter :: nn = n**2
436 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
437 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
438 real(kind=rp) :: work(n**3), work2(n**3)
439 integer :: i, j, l
440 integer :: ii, jj
441
442 do j = 1, nn
443 do i = 1, n
444 ii = i + n * (j - 1)
445 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
446 + a(i,2) * u(2 + n * (j - 1)) &
447 + a(i,3) * u(3 + n * (j - 1)) &
448 + a(i,4) * u(4 + n * (j - 1)) &
449 + a(i,5) * u(5 + n * (j - 1)) &
450 + a(i,6) * u(6 + n * (j - 1)) &
451 + a(i,7) * u(7 + n * (j - 1)) &
452 + a(i,8) * u(8 + n * (j - 1)) &
453 + a(i,9) * u(9 + n * (j - 1)) &
454 + a(i,10) * u(10 + n * (j - 1)) &
455 + a(i,11) * u(11 + n * (j - 1)) &
456 + a(i,12) * u(12 + n * (j - 1)) &
457 + a(i,13) * u(13 + n * (j - 1)) &
458 + a(i,14) * u(14 + n * (j - 1))
459 end do
460 end do
461
462 do i = 1, n
463 do j = 1, n
464 do l = 1, n
465 ii = l + n * (j - 1) + nn * (i - 1)
466 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
467 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
468 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
469 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
470 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
471 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
472 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
473 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
474 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
475 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
476 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
477 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
478 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j) &
479 + work(l + n * (14 - 1) + nn * (i - 1)) * bt(14,j)
480 end do
481 end do
482 end do
483
484 do j = 1, n
485 do i = 1, nn
486 jj = i + nn * (j - 1)
487 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
488 + work2(i + nn * (2 - 1)) * ct(2, j) &
489 + work2(i + nn * (3 - 1)) * ct(3, j) &
490 + work2(i + nn * (4 - 1)) * ct(4, j) &
491 + work2(i + nn * (5 - 1)) * ct(5, j) &
492 + work2(i + nn * (6 - 1)) * ct(6, j) &
493 + work2(i + nn * (7 - 1)) * ct(7, j) &
494 + work2(i + nn * (8 - 1)) * ct(8, j) &
495 + work2(i + nn * (9 - 1)) * ct(9, j) &
496 + work2(i + nn * (10 - 1)) * ct(10, j) &
497 + work2(i + nn * (11 - 1)) * ct(11, j) &
498 + work2(i + nn * (12 - 1)) * ct(12, j) &
499 + work2(i + nn * (13 - 1)) * ct(13, j) &
500 + work2(i + nn * (14 - 1)) * ct(14, j)
501 end do
502 end do
503
504 end subroutine tnsr3d_el_n14_cpu
505
506 !OCL SERIAL
507 subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
508 integer, parameter :: n = 13
509 integer, parameter :: nn = n**2
510 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
511 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
512 real(kind=rp) :: work(n**3), work2(n**3)
513 integer :: i, j, l
514 integer :: ii, jj
515
516 do j = 1, nn
517 do i = 1, n
518 ii = i + n * (j - 1)
519 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
520 + a(i,2) * u(2 + n * (j - 1)) &
521 + a(i,3) * u(3 + n * (j - 1)) &
522 + a(i,4) * u(4 + n * (j - 1)) &
523 + a(i,5) * u(5 + n * (j - 1)) &
524 + a(i,6) * u(6 + n * (j - 1)) &
525 + a(i,7) * u(7 + n * (j - 1)) &
526 + a(i,8) * u(8 + n * (j - 1)) &
527 + a(i,9) * u(9 + n * (j - 1)) &
528 + a(i,10) * u(10 + n * (j - 1)) &
529 + a(i,11) * u(11 + n * (j - 1)) &
530 + a(i,12) * u(12 + n * (j - 1)) &
531 + a(i,13) * u(13 + n * (j - 1))
532 end do
533 end do
534
535 do i = 1, n
536 do j = 1, n
537 do l = 1, n
538 ii = l + n * (j - 1) + nn * (i - 1)
539 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
540 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
541 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
542 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
543 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
544 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
545 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
546 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
547 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
548 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
549 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
550 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j) &
551 + work(l + n * (13 - 1) + nn * (i - 1)) * bt(13,j)
552 end do
553 end do
554 end do
555
556 do j = 1, n
557 do i = 1, nn
558 jj = i + nn * (j - 1)
559 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
560 + work2(i + nn * (2 - 1)) * ct(2, j) &
561 + work2(i + nn * (3 - 1)) * ct(3, j) &
562 + work2(i + nn * (4 - 1)) * ct(4, j) &
563 + work2(i + nn * (5 - 1)) * ct(5, j) &
564 + work2(i + nn * (6 - 1)) * ct(6, j) &
565 + work2(i + nn * (7 - 1)) * ct(7, j) &
566 + work2(i + nn * (8 - 1)) * ct(8, j) &
567 + work2(i + nn * (9 - 1)) * ct(9, j) &
568 + work2(i + nn * (10 - 1)) * ct(10, j) &
569 + work2(i + nn * (11 - 1)) * ct(11, j) &
570 + work2(i + nn * (12 - 1)) * ct(12, j) &
571 + work2(i + nn * (13 - 1)) * ct(13, j)
572 end do
573 end do
574
575 end subroutine tnsr3d_el_n13_cpu
576
577 !OCL SERIAL
578 subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
579 integer, parameter :: n = 12
580 integer, parameter :: nn = n**2
581 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
582 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
583 real(kind=rp) :: work(n**3), work2(n**3)
584 integer :: i, j, l
585 integer :: ii, jj
586
587 do j = 1, nn
588 do i = 1, n
589 ii = i + n * (j - 1)
590 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
591 + a(i,2) * u(2 + n * (j - 1)) &
592 + a(i,3) * u(3 + n * (j - 1)) &
593 + a(i,4) * u(4 + n * (j - 1)) &
594 + a(i,5) * u(5 + n * (j - 1)) &
595 + a(i,6) * u(6 + n * (j - 1)) &
596 + a(i,7) * u(7 + n * (j - 1)) &
597 + a(i,8) * u(8 + n * (j - 1)) &
598 + a(i,9) * u(9 + n * (j - 1)) &
599 + a(i,10) * u(10 + n * (j - 1)) &
600 + a(i,11) * u(11 + n * (j - 1)) &
601 + a(i,12) * u(12 + n * (j - 1))
602 end do
603 end do
604
605 do i = 1, n
606 do j = 1, n
607 do l = 1, n
608 ii = l + n * (j - 1) + nn * (i - 1)
609 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
610 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
611 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
612 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
613 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
614 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
615 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
616 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
617 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
618 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
619 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j) &
620 + work(l + n * (12 - 1) + nn * (i - 1)) * bt(12,j)
621 end do
622 end do
623 end do
624
625 do j = 1, n
626 do i = 1, nn
627 jj = i + nn * (j - 1)
628 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
629 + work2(i + nn * (2 - 1)) * ct(2, j) &
630 + work2(i + nn * (3 - 1)) * ct(3, j) &
631 + work2(i + nn * (4 - 1)) * ct(4, j) &
632 + work2(i + nn * (5 - 1)) * ct(5, j) &
633 + work2(i + nn * (6 - 1)) * ct(6, j) &
634 + work2(i + nn * (7 - 1)) * ct(7, j) &
635 + work2(i + nn * (8 - 1)) * ct(8, j) &
636 + work2(i + nn * (9 - 1)) * ct(9, j) &
637 + work2(i + nn * (10 - 1)) * ct(10, j) &
638 + work2(i + nn * (11 - 1)) * ct(11, j) &
639 + work2(i + nn * (12 - 1)) * ct(12, j)
640 end do
641 end do
642
643 end subroutine tnsr3d_el_n12_cpu
644
645 !OCL SERIAL
646 subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
647 integer, parameter :: n = 11
648 integer, parameter :: nn = n**2
649 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
650 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
651 real(kind=rp) :: work(n**3), work2(n**3)
652 integer :: i, j, l
653 integer :: ii, jj
654
655 do j = 1, nn
656 do i = 1, n
657 ii = i + n * (j - 1)
658 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
659 + a(i,2) * u(2 + n * (j - 1)) &
660 + a(i,3) * u(3 + n * (j - 1)) &
661 + a(i,4) * u(4 + n * (j - 1)) &
662 + a(i,5) * u(5 + n * (j - 1)) &
663 + a(i,6) * u(6 + n * (j - 1)) &
664 + a(i,7) * u(7 + n * (j - 1)) &
665 + a(i,8) * u(8 + n * (j - 1)) &
666 + a(i,9) * u(9 + n * (j - 1)) &
667 + a(i,10) * u(10 + n * (j - 1)) &
668 + a(i,11) * u(11 + n * (j - 1))
669 end do
670 end do
671
672 do i = 1, n
673 do j = 1, n
674 do l = 1, n
675 ii = l + n * (j - 1) + nn * (i - 1)
676 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
677 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
678 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
679 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
680 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
681 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
682 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
683 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
684 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
685 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j) &
686 + work(l + n * (11 - 1) + nn * (i - 1)) * bt(11,j)
687 end do
688 end do
689 end do
690
691 do j = 1, n
692 do i = 1, nn
693 jj = i + nn * (j - 1)
694 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
695 + work2(i + nn * (2 - 1)) * ct(2, j) &
696 + work2(i + nn * (3 - 1)) * ct(3, j) &
697 + work2(i + nn * (4 - 1)) * ct(4, j) &
698 + work2(i + nn * (5 - 1)) * ct(5, j) &
699 + work2(i + nn * (6 - 1)) * ct(6, j) &
700 + work2(i + nn * (7 - 1)) * ct(7, j) &
701 + work2(i + nn * (8 - 1)) * ct(8, j) &
702 + work2(i + nn * (9 - 1)) * ct(9, j) &
703 + work2(i + nn * (10 - 1)) * ct(10, j) &
704 + work2(i + nn * (11 - 1)) * ct(11, j)
705 end do
706 end do
707
708 end subroutine tnsr3d_el_n11_cpu
709
710 !OCL SERIAL
711 subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
712 integer, parameter :: n = 10
713 integer, parameter :: nn = n**2
714 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
715 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
716 real(kind=rp) :: work(n**3), work2(n**3)
717 integer :: i, j, l
718 integer :: ii, jj
719
720 do j = 1, nn
721 do i = 1, n
722 ii = i + n * (j - 1)
723 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
724 + a(i,2) * u(2 + n * (j - 1)) &
725 + a(i,3) * u(3 + n * (j - 1)) &
726 + a(i,4) * u(4 + n * (j - 1)) &
727 + a(i,5) * u(5 + n * (j - 1)) &
728 + a(i,6) * u(6 + n * (j - 1)) &
729 + a(i,7) * u(7 + n * (j - 1)) &
730 + a(i,8) * u(8 + n * (j - 1)) &
731 + a(i,9) * u(9 + n * (j - 1)) &
732 + a(i,10) * u(10 + n * (j - 1))
733 end do
734 end do
735
736 do i = 1, n
737 do j = 1, n
738 do l = 1, n
739 ii = l + n * (j - 1) + nn * (i - 1)
740 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
741 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
742 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
743 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
744 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
745 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
746 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
747 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
748 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j) &
749 + work(l + n * (10 - 1) + nn * (i - 1)) * bt(10,j)
750 end do
751 end do
752 end do
753
754 do j = 1, n
755 do i = 1, nn
756 jj = i + nn * (j - 1)
757 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
758 + work2(i + nn * (2 - 1)) * ct(2, j) &
759 + work2(i + nn * (3 - 1)) * ct(3, j) &
760 + work2(i + nn * (4 - 1)) * ct(4, j) &
761 + work2(i + nn * (5 - 1)) * ct(5, j) &
762 + work2(i + nn * (6 - 1)) * ct(6, j) &
763 + work2(i + nn * (7 - 1)) * ct(7, j) &
764 + work2(i + nn * (8 - 1)) * ct(8, j) &
765 + work2(i + nn * (9 - 1)) * ct(9, j) &
766 + work2(i + nn * (10 - 1)) * ct(10, j)
767 end do
768 end do
769
770 end subroutine tnsr3d_el_n10_cpu
771
772 !OCL SERIAL
773 subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
774 integer, parameter :: n = 9
775 integer, parameter :: nn = n**2
776 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
777 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
778 real(kind=rp) :: work(n**3), work2(n**3)
779 integer :: i, j, l
780 integer :: ii, jj
781
782 do j = 1, nn
783 do i = 1, n
784 ii = i + n * (j - 1)
785 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
786 + a(i,2) * u(2 + n * (j - 1)) &
787 + a(i,3) * u(3 + n * (j - 1)) &
788 + a(i,4) * u(4 + n * (j - 1)) &
789 + a(i,5) * u(5 + n * (j - 1)) &
790 + a(i,6) * u(6 + n * (j - 1)) &
791 + a(i,7) * u(7 + n * (j - 1)) &
792 + a(i,8) * u(8 + n * (j - 1)) &
793 + a(i,9) * u(9 + n * (j - 1))
794 end do
795 end do
796
797 do i = 1, n
798 do j = 1, n
799 do l = 1, n
800 ii = l + n * (j - 1) + nn * (i - 1)
801 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
802 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
803 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
804 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
805 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
806 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
807 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
808 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j) &
809 + work(l + n * (9 - 1) + nn * (i - 1)) * bt(9,j)
810 end do
811 end do
812 end do
813
814 do j = 1, n
815 do i = 1, nn
816 jj = i + nn * (j - 1)
817 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
818 + work2(i + nn * (2 - 1)) * ct(2, j) &
819 + work2(i + nn * (3 - 1)) * ct(3, j) &
820 + work2(i + nn * (4 - 1)) * ct(4, j) &
821 + work2(i + nn * (5 - 1)) * ct(5, j) &
822 + work2(i + nn * (6 - 1)) * ct(6, j) &
823 + work2(i + nn * (7 - 1)) * ct(7, j) &
824 + work2(i + nn * (8 - 1)) * ct(8, j) &
825 + work2(i + nn * (9 - 1)) * ct(9, j)
826 end do
827 end do
828
829 end subroutine tnsr3d_el_n9_cpu
830
831 !OCL SERIAL
832 subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
833 integer, parameter :: n = 8
834 integer, parameter :: nn = n**2
835 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
836 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
837 real(kind=rp) :: work(n**3), work2(n**3)
838 integer :: i, j, l
839 integer :: ii, jj
840
841 do j = 1, nn
842 do i = 1, n
843 ii = i + n * (j - 1)
844 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
845 + a(i,2) * u(2 + n * (j - 1)) &
846 + a(i,3) * u(3 + n * (j - 1)) &
847 + a(i,4) * u(4 + n * (j - 1)) &
848 + a(i,5) * u(5 + n * (j - 1)) &
849 + a(i,6) * u(6 + n * (j - 1)) &
850 + a(i,7) * u(7 + n * (j - 1)) &
851 + a(i,8) * u(8 + n * (j - 1))
852 end do
853 end do
854
855 do i = 1, n
856 do j = 1, n
857 do l = 1, n
858 ii = l + n * (j - 1) + nn * (i - 1)
859 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
860 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
861 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
862 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
863 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
864 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
865 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j) &
866 + work(l + n * (8 - 1) + nn * (i - 1)) * bt(8,j)
867 end do
868 end do
869 end do
870
871 do j = 1, n
872 do i = 1, nn
873 jj = i + nn * (j - 1)
874 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
875 + work2(i + nn * (2 - 1)) * ct(2, j) &
876 + work2(i + nn * (3 - 1)) * ct(3, j) &
877 + work2(i + nn * (4 - 1)) * ct(4, j) &
878 + work2(i + nn * (5 - 1)) * ct(5, j) &
879 + work2(i + nn * (6 - 1)) * ct(6, j) &
880 + work2(i + nn * (7 - 1)) * ct(7, j) &
881 + work2(i + nn * (8 - 1)) * ct(8, j)
882 end do
883 end do
884
885 end subroutine tnsr3d_el_n8_cpu
886
887 !OCL SERIAL
888 subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
889 integer, parameter :: n = 7
890 integer, parameter :: nn = n**2
891 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
892 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
893 real(kind=rp) :: work(n**3), work2(n**3)
894 integer :: i, j, l
895 integer :: ii, jj
896
897 do j = 1, nn
898 do i = 1, n
899 ii = i + n * (j - 1)
900 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
901 + a(i,2) * u(2 + n * (j - 1)) &
902 + a(i,3) * u(3 + n * (j - 1)) &
903 + a(i,4) * u(4 + n * (j - 1)) &
904 + a(i,5) * u(5 + n * (j - 1)) &
905 + a(i,6) * u(6 + n * (j - 1)) &
906 + a(i,7) * u(7 + n * (j - 1))
907 end do
908 end do
909
910 do i = 1, n
911 do j = 1, n
912 do l = 1, n
913 ii = l + n * (j - 1) + nn * (i - 1)
914 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
915 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
916 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
917 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
918 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
919 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j) &
920 + work(l + n * (7 - 1) + nn * (i - 1)) * bt(7,j)
921 end do
922 end do
923 end do
924
925 do j = 1, n
926 do i = 1, nn
927 jj = i + nn * (j - 1)
928 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
929 + work2(i + nn * (2 - 1)) * ct(2, j) &
930 + work2(i + nn * (3 - 1)) * ct(3, j) &
931 + work2(i + nn * (4 - 1)) * ct(4, j) &
932 + work2(i + nn * (5 - 1)) * ct(5, j) &
933 + work2(i + nn * (6 - 1)) * ct(6, j) &
934 + work2(i + nn * (7 - 1)) * ct(7, j)
935 end do
936 end do
937
938 end subroutine tnsr3d_el_n7_cpu
939
940 !OCL SERIAL
941 subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
942 integer, parameter :: n = 6
943 integer, parameter :: nn = n**2
944 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
945 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
946 real(kind=rp) :: work(n**3), work2(n**3)
947 integer :: i, j, l
948 integer :: ii, jj
949
950 do j = 1, nn
951 do i = 1, n
952 ii = i + n * (j - 1)
953 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
954 + a(i,2) * u(2 + n * (j - 1)) &
955 + a(i,3) * u(3 + n * (j - 1)) &
956 + a(i,4) * u(4 + n * (j - 1)) &
957 + a(i,5) * u(5 + n * (j - 1)) &
958 + a(i,6) * u(6 + n * (j - 1))
959 end do
960 end do
961
962 do i = 1, n
963 do j = 1, n
964 do l = 1, n
965 ii = l + n * (j - 1) + nn * (i - 1)
966 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
967 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
968 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
969 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
970 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j) &
971 + work(l + n * (6 - 1) + nn * (i - 1)) * bt(6,j)
972 end do
973 end do
974 end do
975
976 do j = 1, n
977 do i = 1, nn
978 jj = i + nn * (j - 1)
979 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
980 + work2(i + nn * (2 - 1)) * ct(2, j) &
981 + work2(i + nn * (3 - 1)) * ct(3, j) &
982 + work2(i + nn * (4 - 1)) * ct(4, j) &
983 + work2(i + nn * (5 - 1)) * ct(5, j) &
984 + work2(i + nn * (6 - 1)) * ct(6, j)
985 end do
986 end do
987
988 end subroutine tnsr3d_el_n6_cpu
989
990 !OCL SERIAL
991 subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
992 integer, parameter :: n = 5
993 integer, parameter :: nn = n**2
994 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
995 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
996 real(kind=rp) :: work(n**3), work2(n**3)
997 integer :: i, j, l
998 integer :: ii, jj
999
1000 do j = 1, nn
1001 do i = 1, n
1002 ii = i + n * (j - 1)
1003 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1004 + a(i,2) * u(2 + n * (j - 1)) &
1005 + a(i,3) * u(3 + n * (j - 1)) &
1006 + a(i,4) * u(4 + n * (j - 1)) &
1007 + a(i,5) * u(5 + n * (j - 1))
1008 end do
1009 end do
1010
1011 do i = 1, n
1012 do j = 1, n
1013 do l = 1, n
1014 ii = l + n * (j - 1) + nn * (i - 1)
1015 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1016 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1017 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1018 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j) &
1019 + work(l + n * (5 - 1) + nn * (i - 1)) * bt(5,j)
1020 end do
1021 end do
1022 end do
1023
1024 do j = 1, n
1025 do i = 1, nn
1026 jj = i + nn * (j - 1)
1027 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1028 + work2(i + nn * (2 - 1)) * ct(2, j) &
1029 + work2(i + nn * (3 - 1)) * ct(3, j) &
1030 + work2(i + nn * (4 - 1)) * ct(4, j) &
1031 + work2(i + nn * (5 - 1)) * ct(5, j)
1032 end do
1033 end do
1034
1035 end subroutine tnsr3d_el_n5_cpu
1036
1037 !OCL SERIAL
1038 subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
1039 integer, parameter :: n = 4
1040 integer, parameter :: nn = n**2
1041 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1042 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1043 real(kind=rp) :: work(n**3), work2(n**3)
1044 integer :: i, j, l
1045 integer :: ii, jj
1046
1047 do j = 1, nn
1048 do i = 1, n
1049 ii = i + n * (j - 1)
1050 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1051 + a(i,2) * u(2 + n * (j - 1)) &
1052 + a(i,3) * u(3 + n * (j - 1)) &
1053 + a(i,4) * u(4 + n * (j - 1))
1054 end do
1055 end do
1056
1057 do i = 1, n
1058 do j = 1, n
1059 do l = 1, n
1060 ii = l + n * (j - 1) + nn * (i - 1)
1061 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1062 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1063 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j) &
1064 + work(l + n * (4 - 1) + nn * (i - 1)) * bt(4,j)
1065 end do
1066 end do
1067 end do
1068
1069 do j = 1, n
1070 do i = 1, nn
1071 jj = i + nn * (j - 1)
1072 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1073 + work2(i + nn * (2 - 1)) * ct(2, j) &
1074 + work2(i + nn * (3 - 1)) * ct(3, j) &
1075 + work2(i + nn * (4 - 1)) * ct(4, j)
1076 end do
1077 end do
1078
1079 end subroutine tnsr3d_el_n4_cpu
1080
1081 !OCL SERIAL
1082 subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
1083 integer, parameter :: n = 3
1084 integer, parameter :: nn = n**2
1085 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1086 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1087 real(kind=rp) :: work(n**3), work2(n**3)
1088 integer :: i, j, l
1089 integer :: ii, jj
1090
1091 do j = 1, nn
1092 do i = 1, n
1093 ii = i + n * (j - 1)
1094 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1095 + a(i,2) * u(2 + n * (j - 1)) &
1096 + a(i,3) * u(3 + n * (j - 1))
1097 end do
1098 end do
1099
1100 do i = 1, n
1101 do j = 1, n
1102 do l = 1, n
1103 ii = l + n * (j - 1) + nn * (i - 1)
1104 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1105 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j) &
1106 + work(l + n * (3 - 1) + nn * (i - 1)) * bt(3,j)
1107 end do
1108 end do
1109 end do
1110
1111 do j = 1, n
1112 do i = 1, nn
1113 jj = i + nn * (j - 1)
1114 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1115 + work2(i + nn * (2 - 1)) * ct(2, j) &
1116 + work2(i + nn * (3 - 1)) * ct(3, j)
1117 end do
1118 end do
1119
1120 end subroutine tnsr3d_el_n3_cpu
1121
1122 !OCL SERIAL
1123 subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
1124 integer, parameter :: n = 2
1125 integer, parameter :: nn = n**2
1126 real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
1127 real(kind=rp), intent(inout) :: a(n,n), bt(n,n), ct(n,n)
1128 real(kind=rp) :: work(n**3), work2(n**3)
1129 integer :: i, j, l
1130 integer :: ii, jj
1131
1132 do j = 1, nn
1133 do i = 1, n
1134 ii = i + n * (j - 1)
1135 work(ii) = a(i,1) * u(1 + n * (j - 1)) &
1136 + a(i,2) * u(2 + n * (j - 1))
1137 end do
1138 end do
1139
1140 do i = 1, n
1141 do j = 1, n
1142 do l = 1, n
1143 ii = l + n * (j - 1) + nn * (i - 1)
1144 work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * bt(1,j) &
1145 + work(l + n * (2 - 1) + nn * (i - 1)) * bt(2,j)
1146 end do
1147 end do
1148 end do
1149
1150 do j = 1, n
1151 do i = 1, nn
1152 jj = i + nn * (j - 1)
1153 v(jj) = work2(i + nn * (1 - 1)) * ct(1, j) &
1154 + work2(i + nn * (2 - 1)) * ct(2, j)
1155 end do
1156 end do
1157
1158 end subroutine tnsr3d_el_n2_cpu
1159
1160 subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1161 integer, intent(in) :: nv, nu, nelv
1162 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1163 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1164 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1165
1166 if (nu .eq. 2 .and. nv .eq. 4) then
1167 call tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
1168 else if (nu .eq. 4) then
1169 call tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
1170 else
1171 call tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
1172 end if
1173
1174 end subroutine tnsr3d_cpu
1175
1176 subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
1177 integer, intent(in) :: nv, nu, nelv
1178 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1179 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1180 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1181 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1182 integer :: ie, i, j, k, l, ii, jj
1183 integer :: nunu, nvnu, nvnv
1184
1185 nvnu = nv * nu
1186 nunu = nu * nu
1187 nvnv = nv * nv
1188
1189 !$omp parallel do private(ie, i, j, k, l, ii, jj, tmp, work, work2)
1190 do ie = 1, nelv
1191 do j = 1, nunu
1192 do i = 1, nv
1193 ii = i + nv * (j - 1)
1194 tmp = 0.0_rp
1195 do k = 1, nu
1196 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
1197 end do
1198 work(ii) = tmp
1199 end do
1200 end do
1201
1202 do i = 1, nu
1203 do j = 1, nv
1204 do l = 1, nv
1205 ii = l + nv * (j - 1) + nvnv * (i - 1)
1206 tmp = 0.0_rp
1207 do k = 1, nu
1208 jj = l + nv * (k - 1) + nvnu * (i - 1)
1209 tmp = tmp + work(jj) * bt(k,j)
1210 end do
1211 work2(ii) = tmp
1212 end do
1213 end do
1214 end do
1215
1216 do j = 1, nv
1217 do i = 1, nvnv
1218 jj = i + nvnv * (j - 1)
1219 tmp = 0.0_rp
1220 do k = 1, nu
1221 ii = i + nvnv * (k - 1)
1222 tmp = tmp + work2(ii) * ct(k, j)
1223 end do
1224 v(jj, ie) = tmp
1225 end do
1226 end do
1227 end do
1228 !$omp end parallel do
1229
1230 end subroutine tnsr3d_nvnu_cpu
1231
1232 subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
1233 integer, parameter :: nu = 2
1234 integer, parameter :: nv = 4
1235 integer, parameter :: nunu = 4
1236 integer, parameter :: nvnu = 8
1237 integer, parameter :: nvnv = 16
1238 integer, intent(in) :: nelv
1239 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1240 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1241 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1242 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1243 integer :: ie, i, j, k, l, ii, jj
1244
1245 !$omp parallel do private(ie, i, j, k, l, ii, jj, tmp, work, work2)
1246 do ie = 1, nelv
1247 do j = 1, nunu
1248 do i = 1, nv
1249 ii = i + nv * (j - 1)
1250 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1251 + a(i,2) * u(2 + nu * (j - 1), ie)
1252 end do
1253 end do
1254
1255 do i = 1, nu
1256 do j = 1, nv
1257 do l = 1, nv
1258 ii = l + nv * (j - 1) + nvnv * (i - 1)
1259 tmp = 0.0_rp
1260 do k = 1, nu
1261 jj = l + nv * (k - 1) + nvnu * (i - 1)
1262 tmp = tmp + work(jj) * bt(k,j)
1263 end do
1264 work2(ii) = tmp
1265 end do
1266 end do
1267 end do
1268
1269 do j = 1, nv
1270 do i = 1, nvnv
1271 jj = i + nvnv * (j - 1)
1272 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1273 + work2(i + nvnv * (2 - 1)) * ct(2, j)
1274 end do
1275 end do
1276 end do
1277 !$omp end parallel do
1278
1279 end subroutine tnsr3d_nu2nv4_cpu
1280
1281 subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
1282 integer, parameter :: nu = 4
1283 integer, parameter :: nunu = 16
1284 integer, intent(in) :: nv, nelv
1285 real(kind=rp), intent(inout) :: v(nv*nv*nv, nelv)
1286 real(kind=rp), intent(in) :: u(nu*nu*nu, nelv)
1287 real(kind=rp), intent(in) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1288 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
1289 integer :: ie, i, j, k, l, ii, jj
1290 integer :: nvnu, nvnv
1291
1292 nvnu = nv * nu
1293 nvnv = nv * nv
1294
1295 !$omp parallel do private(ie, i, j, k, l, ii, tmp, work, work2)
1296 do ie = 1, nelv
1297 do j = 1, nunu
1298 do i = 1, nv
1299 ii = i + nv * (j - 1)
1300 work(ii) = a(i,1) * u(1 + nu * (j - 1), ie) &
1301 + a(i,2) * u(2 + nu * (j - 1), ie) &
1302 + a(i,3) * u(3 + nu * (j - 1), ie) &
1303 + a(i,4) * u(4 + nu * (j - 1), ie)
1304 end do
1305 end do
1306
1307 do i = 1, nu
1308 do j = 1, nv
1309 do l = 1, nv
1310 ii = l + nv * (j - 1) + nvnv * (i - 1)
1311 tmp = 0.0_rp
1312 do k = 1, nu
1313 jj = l + nv * (k - 1) + nvnu * (i - 1)
1314 tmp = tmp + work(jj) * bt(k,j)
1315 end do
1316 work2(ii) = tmp
1317 end do
1318 end do
1319 end do
1320
1321 do j = 1, nv
1322 do i = 1, nvnv
1323 jj = i + nvnv * (j - 1)
1324 v(jj, ie) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1325 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1326 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1327 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1328 end do
1329 end do
1330 end do
1331 !$omp end parallel do
1332
1333 end subroutine tnsr3d_nu4_cpu
1334
1335 subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
1336 integer, intent(in) :: nv, nu, nelv
1337 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1338 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1339
1340 if (nu .eq. 4 .and. nv .eq. 2) then
1341 call tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
1342 else
1343 call tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
1344 end if
1345
1346 end subroutine tnsr1_3d_cpu
1347
1348 subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
1349 integer, intent(in) :: nv, nu, nelv
1350 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1351 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1352 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1353 integer :: e, e0, ee, es, iu, iv, nu3, nv3
1354 integer :: i, j, k, l, ii, jj, kk
1355 integer :: nunu, nvnu, nvnv
1356 real(kind=rp) :: tmp
1357
1358 nvnu = nv * nu
1359 nunu = nu * nu
1360 nvnv = nv * nv
1361
1362 e0 = 1
1363 es = 1
1364 ee = nelv
1365
1366 if (nv .gt. nu) then
1367 e0 = nelv
1368 es = -1
1369 ee = 1
1370 end if
1371
1372 nu3 = nu**3
1373 nv3 = nv**3
1374
1375 !$omp parallel do private(e,iu,iv,i,j,k,l,ii,jj,kk,work,work2,tmp)
1376 do e = e0, ee, es
1377 iu = (e-1)*nu3
1378 iv = (e-1)*nv3
1379
1380 do j = 1, nunu
1381 do i = 1, nv
1382 ii = i + nv * (j - 1)
1383 tmp = 0.0_rp
1384 do k = 1, nu
1385 kk = k + nu * (j - 1) + iu
1386 tmp = tmp + a(i,k) * v(kk)
1387 end do
1388 work(ii) = tmp
1389 end do
1390 end do
1391
1392 do i = 1, nu
1393 do j = 1, nv
1394 do l = 1, nv
1395 ii = l + nv * (j - 1) + nvnv * (i - 1)
1396 tmp = 0.0_rp
1397 do k = 1, nu
1398 jj = l + nv * (k - 1) + nvnu * (i - 1)
1399 tmp = tmp + work(jj) * bt(k,j)
1400 end do
1401 work2(ii) = tmp
1402 end do
1403 end do
1404 end do
1405
1406 do j = 1, nv
1407 do i = 1, nvnv
1408 jj = i + nvnv * (j - 1) + iv
1409 tmp = 0.0_rp
1410 do k = 1, nu
1411 ii = i + nvnv * (k - 1)
1412 tmp = tmp + work2(ii) * ct(k, j)
1413 end do
1414 v(jj) = tmp
1415 end do
1416 end do
1417 end do
1418 !$omp end parallel do
1419 end subroutine tnsr1_3d_nvnu_cpu
1420
1421 subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
1422 integer, parameter :: nu = 4
1423 integer, parameter :: nv = 2
1424 integer, parameter :: nunu = 16
1425 integer, parameter :: nvnu = 8
1426 integer, parameter :: nvnv = 4
1427 integer, parameter :: nununu = 64
1428 integer, parameter :: nvnvnv = 8
1429 integer, intent(in) :: nelv
1430 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
1431 real(kind=rp), intent(inout) :: a(nv, nu), bt(nu, nv), ct(nu, nv)
1432 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
1433 integer :: e, iu, iv
1434 integer :: i, j, k, l, ii, jj
1435 real(kind=rp) :: tmp
1436
1437 !$omp parallel do private(e,iu,iv,i,j,k,l,ii,jj, work, work2, tmp)
1438 do e = 1, nelv
1439 iu = (e-1)*nununu
1440 iv = (e-1)*nvnvnv
1441
1442 do j = 1, nunu
1443 do i = 1, nv
1444 ii = i + nv * (j - 1)
1445 work(ii) = a(i,1) * v(1 + nu * (j - 1) + iu) &
1446 + a(i,2) * v(2 + nu * (j - 1) + iu) &
1447 + a(i,3) * v(3 + nu * (j - 1) + iu) &
1448 + a(i,4) * v(4 + nu * (j - 1) + iu)
1449 end do
1450 end do
1451
1452 do i = 1, nu
1453 do j = 1, nv
1454 do l = 1, nv
1455 ii = l + nv * (j - 1) + nvnv * (i - 1)
1456 tmp = 0.0_rp
1457 do k = 1, nu
1458 jj = l + nv * (k - 1) + nvnu * (i - 1)
1459 tmp = tmp + work(jj) * bt(k,j)
1460 end do
1461 work2(ii) = tmp
1462 end do
1463 end do
1464 end do
1465
1466 do j = 1, nv
1467 do i = 1, nvnv
1468 jj = i + nvnv * (j - 1) + iv
1469 v(jj) = work2(i + nvnv * (1 - 1)) * ct(1, j) &
1470 + work2(i + nvnv * (2 - 1)) * ct(2, j) &
1471 + work2(i + nvnv * (3 - 1)) * ct(3, j) &
1472 + work2(i + nvnv * (4 - 1)) * ct(4, j)
1473
1474 end do
1475 end do
1476 end do
1477 !$omp end parallel do
1478 end subroutine tnsr1_3d_nu4nv2_cpu
1479
1480end module tensor_cpu
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
subroutine tnsr3d_el_1_12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n5_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_8_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_6_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n13_cpu(v, u, a, bt, ct)
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n3_cpu(v, u, a, bt, ct)
subroutine tnsr3d_nu2nv4_cpu(v, u, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine tnsr3d_el_n7_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n11_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_nu_cpu(v, u, nu, a, bt, ct)
subroutine tnsr3d_el_n9_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n6_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n2_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n12_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n4_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_1_10_cpu(v, u, a, bt, ct)
subroutine tnsr1_3d_nu4nv2_cpu(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_cpu(v, nv, u, nu, a, bt)
subroutine, public tnsr1_3d_cpu(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nu4_cpu(v, nv, u, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n_cpu(v, u, a, bt, ct, n)
subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, a, bt, ct)
subroutine tnsr3d_el_n14_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n10_cpu(v, u, a, bt, ct)
subroutine tnsr3d_el_n8_cpu(v, u, a, bt, ct)