Neko 0.9.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_sx.f90
Go to the documentation of this file.
1
3 use num_types
5 implicit none
6 private
7
9
10contains
11
12 subroutine tnsr2d_el_sx(v, nv, u, nu, A, Bt)
13 integer, intent(in) :: nv, nu
14 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
15 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
16 real(kind=rp) :: work(0:nu**2*nv)
17
18 call mxm(a, nv, u, nu, work, nu)
19 call mxm(work, nv, bt, nu, v, nv)
20
21 end subroutine tnsr2d_el_sx
22
23 subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
24 integer, intent(in) :: nv, nu
25 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
26 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
27 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
28 real(kind=rp) :: tmp
29 integer :: i, j, k, l, nunu, nvnu, nvnv
30 integer :: ii, jj
31 nvnu = nv * nu
32 nunu = nu * nu
33 nvnv = nv * nv
34
35 do j = 1, nunu
36 do i = 1, nv
37 ii = i + nv * (j - 1)
38 tmp = 0.0_rp
39 do k = 1, nu
40 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
41 end do
42 work(ii) = tmp
43 end do
44 end do
45
46 do i = 1, nu
47 do j = 1, nv
48 do l = 1, nv
49 ii = l + nv * (j - 1) + nvnv * (i - 1)
50 tmp = 0.0_rp
51 do k = 1, nu
52 jj = l + nv * (k - 1) + nvnu * (i - 1)
53 tmp = tmp + work(jj) * bt(k,j)
54 end do
55 work2(ii) = tmp
56 end do
57 end do
58 end do
59
60 do j = 1, nv
61 do i = 1, nvnv
62 jj = i + nvnv * (j - 1)
63 tmp = 0.0_rp
64 do k = 1, nu
65 ii = i + nvnv * (k - 1)
66 tmp = tmp + work2(ii) * ct(k, j)
67 end do
68 v(jj) = tmp
69 end do
70 end do
71
72 end subroutine tnsr3d_el_sx
73
74 subroutine tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
75 integer, intent(in) :: nv, nu, nelv
76 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
77 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
78
79 if (nu .eq. 2 .and. nv .eq. 4) then
80 call tnsr3d_nu2nv4_sx(v, u, a, bt, ct, nelv)
81 else if (nu .eq. 4) then
82 call tnsr3d_nu4_sx(v, nv, u, a, bt, ct, nelv)
83 else
84 call tnsr3d_nvnu_sx(v, nv, u, nu, a, bt, ct, nelv)
85 end if
86
87 end subroutine tnsr3d_sx
88
89 subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
90 integer, intent(in) :: nv, nu, nelv
91 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
92 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
93 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
94 integer :: ie, i, j, k, l, ii, jj
95 integer :: nunu, nvnu, nvnv
96
97 nvnu = nv * nu
98 nunu = nu * nu
99 nvnv = nv * nv
100
101 do ie = 1, nelv
102 do j = 1, nunu
103 do i = 1, nv
104 ii = i + nv * (j - 1)
105 tmp = 0.0_rp
106 do k = 1, nu
107 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
108 end do
109 work(ii) = tmp
110 end do
111 end do
112
113 do i = 1, nu
114 do j = 1, nv
115 do l = 1, nv
116 ii = l + nv * (j - 1) + nvnv * (i - 1)
117 tmp = 0.0_rp
118 do k = 1, nu
119 jj = l + nv * (k - 1) + nvnu * (i - 1)
120 tmp = tmp + work(jj) * bt(k,j)
121 end do
122 work2(ii) = tmp
123 end do
124 end do
125 end do
126
127 do j = 1, nv
128 do i = 1, nvnv
129 jj = i + nvnv * (j - 1)
130 tmp = 0.0_rp
131 do k = 1, nu
132 ii = i + nvnv * (k - 1)
133 tmp = tmp + work2(ii) * ct(k, j)
134 end do
135 v(jj, ie) = tmp
136 end do
137 end do
138 end do
139
140 end subroutine tnsr3d_nvnu_sx
141
142 subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
143 integer, parameter :: nu = 2
144 integer, parameter :: nv = 4
145 integer, parameter :: nunu = 4
146 integer, parameter :: nvnu = 8
147 integer, parameter :: nvnv = 16
148 integer, intent(in) :: nelv
149 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
150 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
151 real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv), tmp
152 integer :: ie, i, j, k, l, ii, jj
153
154
155 do j = 1, nunu
156 do i = 1, nv
157 do ie = 1, nelv
158 ii = i + nv * (j - 1)
159 work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
160 + a(i,2) * u(2 + nu * (j - 1), ie)
161 end do
162 end do
163 end do
164
165 do i = 1, nu
166 do j = 1, nv
167 do l = 1, nv
168 do ie = 1, nelv
169 ii = l + nv * (j - 1) + nvnv * (i - 1)
170 tmp = 0.0_rp
171 !NEC$ unroll_completely
172 do k = 1, nu
173 jj = l + nv * (k - 1) + nvnu * (i - 1)
174 tmp = tmp + work(jj,ie) * bt(k,j)
175 end do
176 work2(ii, ie) = tmp
177 end do
178 end do
179 end do
180 end do
181
182 do j = 1, nv
183 do i = 1, nvnv
184 do ie = 1, nelv
185 jj = i + nvnv * (j - 1)
186 v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
187 + work2(i + nvnv * (2 - 1),ie) * ct(2, j)
188 end do
189 end do
190 end do
191
192 end subroutine tnsr3d_nu2nv4_sx
193
194 subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
195 integer, parameter :: nu = 4
196 integer, parameter :: nunu = 16
197 integer, intent(in) :: nv, nelv
198 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
199 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
200 real(kind=rp) :: work(nu**2*nv, nelv), work2(nu*nv**2, nelv), tmp
201 integer :: ie, i, j, k, l, ii, jj
202 integer :: nvnu, nvnv
203
204 nvnu = nv * nu
205 nvnv = nv * nv
206
207 do j = 1, nunu
208 do i = 1, nv
209 do ie = 1, nelv
210 ii = i + nv * (j - 1)
211 work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
212 + a(i,2) * u(2 + nu * (j - 1), ie) &
213 + a(i,3) * u(3 + nu * (j - 1), ie) &
214 + a(i,4) * u(4 + nu * (j - 1), ie)
215 end do
216 end do
217 end do
218
219 do i = 1, nu
220 do j = 1, nv
221 do l = 1, nv
222 do ie = 1, nelv
223 ii = l + nv * (j - 1) + nvnv * (i - 1)
224 tmp = 0.0_rp
225 !NEC$ unroll_completely
226 do k = 1, nu
227 jj = l + nv * (k - 1) + nvnu * (i - 1)
228 tmp = tmp + work(jj,ie) * bt(k,j)
229 end do
230 work2(ii, ie) = tmp
231 end do
232 end do
233 end do
234 end do
235
236 do j = 1, nv
237 do i = 1, nvnv
238 do ie = 1, nelv
239 jj = i + nvnv * (j - 1)
240 v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
241 + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
242 + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
243 + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
244 end do
245 end do
246 end do
247
248 end subroutine tnsr3d_nu4_sx
249
250 subroutine tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
251 integer, intent(in) :: nv, nu, nelv
252 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
253 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
254
255
256 if (nu .eq. 4 .and. nv .eq. 2) then
257 call tnsr1_3d_nu4nv2_sx(v, a, bt, ct, nelv)
258 else
259 call tnsr1_3d_nvnu_sx(v, nv, nu, a, bt, ct, nelv)
260 end if
261
262 end subroutine tnsr1_3d_sx
263
264 subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
265 integer, intent(in) :: nv, nu, nelv
266 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
267 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
268 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
269 integer :: e, e0, ee, es, iu, iv, nu3, nv3
270 integer :: i, j, k, l, ii, jj, kk
271 integer :: nunu, nvnu, nvnv
272 real(kind=rp) :: tmp
273
274 nvnu = nv * nu
275 nunu = nu * nu
276 nvnv = nv * nv
277
278 e0 = 1
279 es = 1
280 ee = nelv
281
282 if (nv.gt.nu) then
283 e0 = nelv
284 es = -1
285 ee = 1
286 endif
287
288 nu3 = nu**3
289 nv3 = nv**3
290
291 do e = e0,ee,es
292 iu = (e-1)*nu3
293 iv = (e-1)*nv3
294
295 do j = 1, nunu
296 do i = 1, nv
297 ii = i + nv * (j - 1)
298 tmp = 0.0_rp
299 do k = 1, nu
300 kk = k + nu * (j - 1) + iu
301 tmp = tmp + a(i,k) * v(kk)
302 end do
303 work(ii) = tmp
304 end do
305 end do
306
307 do i = 1, nu
308 do j = 1, nv
309 do l = 1, nv
310 ii = l + nv * (j - 1) + nvnv * (i - 1)
311 tmp = 0.0_rp
312 do k = 1, nu
313 jj = l + nv * (k - 1) + nvnu * (i - 1)
314 tmp = tmp + work(jj) * bt(k,j)
315 end do
316 work2(ii) = tmp
317 end do
318 end do
319 end do
320
321 do j = 1, nv
322 do i = 1, nvnv
323 jj = i + nvnv * (j - 1) + iv
324 tmp = 0.0_rp
325 do k = 1, nu
326 ii = i + nvnv * (k - 1)
327 tmp = tmp + work2(ii) * ct(k, j)
328 end do
329 v(jj) = tmp
330 end do
331 end do
332 end do
333
334 end subroutine tnsr1_3d_nvnu_sx
335
336 subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
337 integer, parameter :: nu = 4
338 integer, parameter :: nv = 2
339 integer, parameter :: nunu = 16
340 integer, parameter :: nvnu = 8
341 integer, parameter :: nvnv = 4
342 integer, parameter :: nununu = 64
343 integer, parameter :: nvnvnv = 8
344 integer, intent(in) :: nelv
345 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
346 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
347 real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv)
348 integer :: ie, iu, iv
349 integer :: i, j, k, l, ii, jj
350 real(kind=rp) :: tmp
351
352 do j = 1, nunu
353 do i = 1, nv
354 do ie = 1, nelv
355 iu = (ie-1)*nununu
356 ii = i + nv * (j - 1)
357 work(ii, ie) = a(i,1) * v(1 + nu * (j - 1) + iu) &
358 + a(i,2) * v(2 + nu * (j - 1) + iu) &
359 + a(i,3) * v(3 + nu * (j - 1) + iu) &
360 + a(i,4) * v(4 + nu * (j - 1) + iu)
361 end do
362 end do
363 end do
364
365 do i = 1, nu
366 do j = 1, nv
367 do l = 1, nv
368 do ie = 1, nelv
369 ii = l + nv * (j - 1) + nvnv * (i - 1)
370 tmp = 0.0_rp
371 !NEC$ unroll_completely
372 do k = 1, nu
373 jj = l + nv * (k - 1) + nvnu * (i - 1)
374 tmp = tmp + work(jj,ie) * bt(k,j)
375 end do
376 work2(ii,ie) = tmp
377 end do
378 end do
379 end do
380 end do
381
382 do j = 1, nv
383 do i = 1, nvnv
384 do ie = 1, nelv
385 iv = (ie-1)*nvnvnv
386 jj = i + nvnv * (j - 1) + iv
387 v(jj) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
388 + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
389 + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
390 + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
391 end do
392 end do
393 end do
394
395 end subroutine tnsr1_3d_nu4nv2_sx
396
397end module tensor_sx
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:90
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75
subroutine tnsr3d_nu2nv4_sx(v, u, a, bt, ct, nelv)
subroutine tnsr1_3d_nu4nv2_sx(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13
subroutine tnsr3d_nu4_sx(v, nv, u, a, bt, ct, nelv)
subroutine tnsr1_3d_nvnu_sx(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24