Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tensor_sx.f90
Go to the documentation of this file.
1
3 use num_types
5 implicit none
6 private
7
9
10contains
11
12 subroutine tnsr2d_el_sx(v, nv, u, nu, A, Bt)
13 integer, intent(in) :: nv, nu
14 real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
15 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu,nv)
16 real(kind=rp) :: work(0:nu**2*nv)
17
18 call mxm(a, nv, u, nu, work, nu)
19 call mxm(work, nv, bt, nu, v, nv)
20
21 end subroutine tnsr2d_el_sx
22
23 subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
24 integer, intent(in) :: nv, nu
25 real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
26 real(kind=rp), intent(inout) :: a(nv,nu),bt(nu, nv),ct(nu,nv)
27 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
28 real(kind=rp) :: tmp
29 integer :: i, j, k, l, nunu, nvnu, nvnv
30 integer :: ii, jj
31 nvnu = nv * nu
32 nunu = nu * nu
33 nvnv = nv * nv
34
35 do j = 1, nunu
36 do i = 1, nv
37 ii = i + nv * (j - 1)
38 tmp = 0.0_rp
39 do k = 1, nu
40 tmp = tmp + a(i,k) * u(k + nu * (j - 1))
41 end do
42 work(ii) = tmp
43 end do
44 end do
45
46 do i = 1, nu
47 do j = 1, nv
48 do l = 1, nv
49 ii = l + nv * (j - 1) + nvnv * (i - 1)
50 tmp = 0.0_rp
51 do k = 1, nu
52 jj = l + nv * (k - 1) + nvnu * (i - 1)
53 tmp = tmp + work(jj) * bt(k,j)
54 end do
55 work2(ii) = tmp
56 end do
57 end do
58 end do
59
60 do j = 1, nv
61 do i = 1, nvnv
62 jj = i + nvnv * (j - 1)
63 tmp = 0.0_rp
64 do k = 1, nu
65 ii = i + nvnv * (k - 1)
66 tmp = tmp + work2(ii) * ct(k, j)
67 end do
68 v(jj) = tmp
69 end do
70 end do
71
72 end subroutine tnsr3d_el_sx
73
74 subroutine tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
75 integer, intent(in) :: nv, nu, nelv
76 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
77 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
78 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
79
80 if (nu .eq. 2 .and. nv .eq. 4) then
81 call tnsr3d_nu2nv4_sx(v, u, a, bt, ct, nelv)
82 else if (nu .eq. 4) then
83 call tnsr3d_nu4_sx(v, nv, u, a, bt, ct, nelv)
84 else
85 call tnsr3d_nvnu_sx(v, nv, u, nu, a, bt, ct, nelv)
86 end if
87
88 end subroutine tnsr3d_sx
89
90 subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
91 integer, intent(in) :: nv, nu, nelv
92 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
93 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
94 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
95 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
96 integer :: ie, i, j, k, l, ii, jj
97 integer :: nunu, nvnu, nvnv
98
99 nvnu = nv * nu
100 nunu = nu * nu
101 nvnv = nv * nv
102
103 do ie = 1, nelv
104 do j = 1, nunu
105 do i = 1, nv
106 ii = i + nv * (j - 1)
107 tmp = 0.0_rp
108 do k = 1, nu
109 tmp = tmp + a(i,k) * u(k + nu * (j - 1), ie)
110 end do
111 work(ii) = tmp
112 end do
113 end do
114
115 do i = 1, nu
116 do j = 1, nv
117 do l = 1, nv
118 ii = l + nv * (j - 1) + nvnv * (i - 1)
119 tmp = 0.0_rp
120 do k = 1, nu
121 jj = l + nv * (k - 1) + nvnu * (i - 1)
122 tmp = tmp + work(jj) * bt(k,j)
123 end do
124 work2(ii) = tmp
125 end do
126 end do
127 end do
128
129 do j = 1, nv
130 do i = 1, nvnv
131 jj = i + nvnv * (j - 1)
132 tmp = 0.0_rp
133 do k = 1, nu
134 ii = i + nvnv * (k - 1)
135 tmp = tmp + work2(ii) * ct(k, j)
136 end do
137 v(jj, ie) = tmp
138 end do
139 end do
140 end do
141
142 end subroutine tnsr3d_nvnu_sx
143
144 subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
145 integer, parameter :: nu = 2
146 integer, parameter :: nv = 4
147 integer, parameter :: nunu = 4
148 integer, parameter :: nvnu = 8
149 integer, parameter :: nvnv = 16
150 integer, intent(in) :: nelv
151 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
152 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
153 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
154 real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv), tmp
155 integer :: ie, i, j, k, l, ii, jj
156
157
158 do j = 1, nunu
159 do i = 1, nv
160 do ie = 1, nelv
161 ii = i + nv * (j - 1)
162 work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
163 + a(i,2) * u(2 + nu * (j - 1), ie)
164 end do
165 end do
166 end do
167
168 do i = 1, nu
169 do j = 1, nv
170 do l = 1, nv
171 do ie = 1, nelv
172 ii = l + nv * (j - 1) + nvnv * (i - 1)
173 tmp = 0.0_rp
174 !NEC$ unroll_completely
175 do k = 1, nu
176 jj = l + nv * (k - 1) + nvnu * (i - 1)
177 tmp = tmp + work(jj,ie) * bt(k,j)
178 end do
179 work2(ii, ie) = tmp
180 end do
181 end do
182 end do
183 end do
184
185 do j = 1, nv
186 do i = 1, nvnv
187 do ie = 1, nelv
188 jj = i + nvnv * (j - 1)
189 v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
190 + work2(i + nvnv * (2 - 1),ie) * ct(2, j)
191 end do
192 end do
193 end do
194
195 end subroutine tnsr3d_nu2nv4_sx
196
197 subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
198 integer, parameter :: nu = 4
199 integer, parameter :: nunu = 16
200 integer, intent(in) :: nv, nelv
201 real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv)
202 real(kind=rp), intent(in) :: u(nu*nu*nu,nelv)
203 real(kind=rp), intent(in) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
204 real(kind=rp) :: work(nu**2*nv, nelv), work2(nu*nv**2, nelv), tmp
205 integer :: ie, i, j, k, l, ii, jj
206 integer :: nvnu, nvnv
207
208 nvnu = nv * nu
209 nvnv = nv * nv
210
211 do j = 1, nunu
212 do i = 1, nv
213 do ie = 1, nelv
214 ii = i + nv * (j - 1)
215 work(ii, ie) = a(i,1) * u(1 + nu * (j - 1), ie) &
216 + a(i,2) * u(2 + nu * (j - 1), ie) &
217 + a(i,3) * u(3 + nu * (j - 1), ie) &
218 + a(i,4) * u(4 + nu * (j - 1), ie)
219 end do
220 end do
221 end do
222
223 do i = 1, nu
224 do j = 1, nv
225 do l = 1, nv
226 do ie = 1, nelv
227 ii = l + nv * (j - 1) + nvnv * (i - 1)
228 tmp = 0.0_rp
229 !NEC$ unroll_completely
230 do k = 1, nu
231 jj = l + nv * (k - 1) + nvnu * (i - 1)
232 tmp = tmp + work(jj,ie) * bt(k,j)
233 end do
234 work2(ii, ie) = tmp
235 end do
236 end do
237 end do
238 end do
239
240 do j = 1, nv
241 do i = 1, nvnv
242 do ie = 1, nelv
243 jj = i + nvnv * (j - 1)
244 v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
245 + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
246 + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
247 + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
248 end do
249 end do
250 end do
251
252 end subroutine tnsr3d_nu4_sx
253
254 subroutine tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
255 integer, intent(in) :: nv, nu, nelv
256 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
257 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
258
259
260 if (nu .eq. 4 .and. nv .eq. 2) then
261 call tnsr1_3d_nu4nv2_sx(v, a, bt, ct, nelv)
262 else
263 call tnsr1_3d_nvnu_sx(v, nv, nu, a, bt, ct, nelv)
264 end if
265
266 end subroutine tnsr1_3d_sx
267
268 subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
269 integer, intent(in) :: nv, nu, nelv
270 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
271 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
272 real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2)
273 integer :: e, e0, ee, es, iu, iv, nu3, nv3
274 integer :: i, j, k, l, ii, jj, kk
275 integer :: nunu, nvnu, nvnv
276 real(kind=rp) :: tmp
277
278 nvnu = nv * nu
279 nunu = nu * nu
280 nvnv = nv * nv
281
282 e0 = 1
283 es = 1
284 ee = nelv
285
286 if (nv.gt.nu) then
287 e0 = nelv
288 es = -1
289 ee = 1
290 endif
291
292 nu3 = nu**3
293 nv3 = nv**3
294
295 do e = e0,ee,es
296 iu = (e-1)*nu3
297 iv = (e-1)*nv3
298
299 do j = 1, nunu
300 do i = 1, nv
301 ii = i + nv * (j - 1)
302 tmp = 0.0_rp
303 do k = 1, nu
304 kk = k + nu * (j - 1) + iu
305 tmp = tmp + a(i,k) * v(kk)
306 end do
307 work(ii) = tmp
308 end do
309 end do
310
311 do i = 1, nu
312 do j = 1, nv
313 do l = 1, nv
314 ii = l + nv * (j - 1) + nvnv * (i - 1)
315 tmp = 0.0_rp
316 do k = 1, nu
317 jj = l + nv * (k - 1) + nvnu * (i - 1)
318 tmp = tmp + work(jj) * bt(k,j)
319 end do
320 work2(ii) = tmp
321 end do
322 end do
323 end do
324
325 do j = 1, nv
326 do i = 1, nvnv
327 jj = i + nvnv * (j - 1) + iv
328 tmp = 0.0_rp
329 do k = 1, nu
330 ii = i + nvnv * (k - 1)
331 tmp = tmp + work2(ii) * ct(k, j)
332 end do
333 v(jj) = tmp
334 end do
335 end do
336 end do
337
338 end subroutine tnsr1_3d_nvnu_sx
339
340 subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
341 integer, parameter :: nu = 4
342 integer, parameter :: nv = 2
343 integer, parameter :: nunu = 16
344 integer, parameter :: nvnu = 8
345 integer, parameter :: nvnv = 4
346 integer, parameter :: nununu = 64
347 integer, parameter :: nvnvnv = 8
348 integer, intent(in) :: nelv
349 real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
350 real(kind=rp), intent(inout) :: a(nv,nu), bt(nu, nv), ct(nu,nv)
351 real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv)
352 integer :: ie, iu, iv
353 integer :: i, j, k, l, ii, jj
354 real(kind=rp) :: tmp
355
356 do j = 1, nunu
357 do i = 1, nv
358 do ie = 1, nelv
359 iu = (ie-1)*nununu
360 ii = i + nv * (j - 1)
361 work(ii, ie) = a(i,1) * v(1 + nu * (j - 1) + iu) &
362 + a(i,2) * v(2 + nu * (j - 1) + iu) &
363 + a(i,3) * v(3 + nu * (j - 1) + iu) &
364 + a(i,4) * v(4 + nu * (j - 1) + iu)
365 end do
366 end do
367 end do
368
369 do i = 1, nu
370 do j = 1, nv
371 do l = 1, nv
372 do ie = 1, nelv
373 ii = l + nv * (j - 1) + nvnv * (i - 1)
374 tmp = 0.0_rp
375 !NEC$ unroll_completely
376 do k = 1, nu
377 jj = l + nv * (k - 1) + nvnu * (i - 1)
378 tmp = tmp + work(jj,ie) * bt(k,j)
379 end do
380 work2(ii,ie) = tmp
381 end do
382 end do
383 end do
384 end do
385
386 do j = 1, nv
387 do i = 1, nvnv
388 do ie = 1, nelv
389 iv = (ie-1)*nvnvnv
390 jj = i + nvnv * (j - 1) + iv
391 v(jj) = work2(i + nvnv * (1 - 1),ie) * ct(1, j) &
392 + work2(i + nvnv * (2 - 1),ie) * ct(2, j) &
393 + work2(i + nvnv * (3 - 1),ie) * ct(3, j) &
394 + work2(i + nvnv * (4 - 1),ie) * ct(4, j)
395 end do
396 end do
397 end do
398
399 end subroutine tnsr1_3d_nu4nv2_sx
400
401end module tensor_sx
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Tensor operations SX-Aurora backend.
Definition tensor_sx.f90:2
subroutine, public tnsr1_3d_sx(v, nv, nu, a, bt, ct, nelv)
subroutine tnsr3d_nvnu_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:91
subroutine, public tnsr3d_sx(v, nv, u, nu, a, bt, ct, nelv)
Definition tensor_sx.f90:75
subroutine tnsr3d_nu2nv4_sx(v, u, a, bt, ct, nelv)
subroutine tnsr1_3d_nu4nv2_sx(v, a, bt, ct, nelv)
subroutine, public tnsr2d_el_sx(v, nv, u, nu, a, bt)
Definition tensor_sx.f90:13
subroutine tnsr3d_nu4_sx(v, nv, u, a, bt, ct, nelv)
subroutine tnsr1_3d_nvnu_sx(v, nv, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_sx(v, nv, u, nu, a, bt, ct)
Definition tensor_sx.f90:24