72 use libxsmm,
only: libxsmm_mmcall => libxsmm_dmmcall_abc, &
73 libxsmm_dmmfunction, libxsmm_dispatch, &
83 type(libxsmm_dmmfunction),
private :: lgrad_xmm1
84 type(libxsmm_dmmfunction),
private :: lgrad_xmm2
85 type(libxsmm_dmmfunction),
private :: lgrad_xmm3
91 type(
coef_t),
intent(in),
target :: coef
92 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(inout) :: du
93 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(in) :: u, dr, ds, dt
95 real(kind=
rp) :: drst(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz)
97 type(
mesh_t),
pointer :: msh
98 integer :: e, k, lxy, lyz, lxyz
99 type(libxsmm_dmmfunction),
save :: dudxyz_xmm1
100 type(libxsmm_dmmfunction),
save :: dudxyz_xmm2
101 type(libxsmm_dmmfunction),
save :: dudxyz_xmm3
102 logical,
save :: dudxyz_xsmm_init = .false.
108 lxyz = xh%lx*xh%ly*xh%lz
110 if (.not. dudxyz_xsmm_init)
then
111 call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
112 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
113 call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &
114 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
115 call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
116 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
117 dudxyz_xsmm_init = .true.
121 if (msh%gdim .eq. 2)
then
122 call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)
123 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
124 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)
125 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
127 call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))
128 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
130 call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))
132 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
133 call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)
134 call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)
137 call col2(du, coef%jacinv, coef%dof%n_dofs)
143 type(
coef_t),
intent(in) :: coef
144 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
145 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
146 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
147 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
149 real(kind=
rp) :: ur(coef%Xh%lxyz)
150 real(kind=
rp) :: us(coef%Xh%lxyz)
151 real(kind=
rp) :: ut(coef%Xh%lxyz)
152 logical,
save :: lgrad_xsmm_init = .false.
153 integer,
save :: init_size = 0
158 if ((.not. lgrad_xsmm_init) .or. &
159 (init_size .gt. 0 .and. init_size .ne. n))
then
160 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
161 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
162 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
163 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
164 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
165 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
166 lgrad_xsmm_init = .true.
170 do e = 1, coef%msh%nelv
171 if (coef%msh%gdim .eq. 3)
then
173 do i = 1, coef%Xh%lxyz
174 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
175 + us(i) * coef%dsdx(i,1,1,e) &
176 + ut(i) * coef%dtdx(i,1,1,e) )
177 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
178 + us(i) * coef%dsdy(i,1,1,e) &
179 + ut(i) * coef%dtdy(i,1,1,e) )
180 uz(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdz(i,1,1,e) &
181 + us(i) * coef%dsdz(i,1,1,e) &
182 + ut(i) * coef%dtdz(i,1,1,e) )
186 call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)
188 do i = 1, coef%Xh%lxyz
189 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
190 + us(i) * coef%dsdx(i,1,1,e) )
191 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
192 + us(i) * coef%dsdy(i,1,1,e) )
200 integer,
intent(in) :: n
201 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n, 0:n)
202 real(kind=
rp),
intent(inout) :: us(0:n, 0:n, 0:n)
203 real(kind=
rp),
intent(inout) :: ut(0:n, 0:n, 0:n)
204 real(kind=
rp),
intent(in) :: u(0:n, 0:n, 0:n)
205 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
206 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
213 call libxsmm_mmcall(lgrad_xmm1, d, u, ur)
215 call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))
217 call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)
223 integer,
intent(in) :: n
224 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n)
225 real(kind=
rp),
intent(inout) :: us(0:n, 0:n)
226 real(kind=
rp),
intent(in) :: u(0:n, 0:n)
227 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
228 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
233 call mxm(d, m1, u, m1, ur, m1)
234 call mxm(u, m1, dt, m1, us, m1)
239 type(
coef_t),
intent(in) :: coef
240 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
241 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
242 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
243 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
244 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
246 real(kind=
rp) :: wx(coef%Xh%lxyz)
247 real(kind=
rp) :: ta1(coef%Xh%lxyz)
248 real(kind=
rp) :: ta2(coef%Xh%lxyz)
249 real(kind=
rp) :: ta3(coef%Xh%lxyz)
250 integer :: e, i1,
i2, n1, n2, iz
253 type(libxsmm_dmmfunction),
save :: cdtp_xmm1
254 type(libxsmm_dmmfunction),
save :: cdtp_xmm2
255 type(libxsmm_dmmfunction),
save :: cdtp_xmm3
256 logical,
save :: cdtp_xsmm_init = .false.
262 if (.not. cdtp_xsmm_init)
then
263 call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
264 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
265 call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &
266 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
267 call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
268 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
269 cdtp_xsmm_init = .true.
272 do e = 1, coef%msh%nelv
273 call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)
274 call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)
275 call col3(ta1, wx, dr(1,e), xh%lxyz)
276 call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))
277 call col3 (ta1, wx, ds(1,e), xh%lxyz)
281 call libxsmm_mmcall(cdtp_xmm2, ta1(
i2), xh%dy, ta2(i1))
285 call add2(dtx(1,e), ta2, xh%lxyz)
286 call col3(ta1, wx, dt(1,e), xh%lxyz)
287 call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)
288 call add2 (dtx(1,e), ta2, xh%lxyz)
293 subroutine opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
294 type(
space_t),
intent(in) :: xh
296 integer,
intent(in) :: nelv, gdim
297 real(kind=
rp),
intent(inout) :: du(xh%lxyz, nelv)
298 real(kind=
rp),
intent(in),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
299 real(kind=
rp),
intent(in),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
300 real(kind=
rp),
intent(in),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
301 real(kind=
rp),
intent(in),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
304 real(kind=
rp),
dimension(Xh%lx, Xh%ly, Xh%lz) :: dudr, duds, dudt
307 type(libxsmm_dmmfunction),
save :: conv1_xmm1
308 type(libxsmm_dmmfunction),
save :: conv1_xmm2
309 type(libxsmm_dmmfunction),
save :: conv1_xmm3
310 logical,
save :: conv1_xsmm_init = .false.
312 if (.not. conv1_xsmm_init)
then
313 call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &
314 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
315 call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &
316 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
317 call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
318 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
319 conv1_xsmm_init = .true.
324 if (gdim .eq. 3)
then
325 call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1, ie), dudr)
327 call libxsmm_mmcall(conv1_xmm2, u(1,1, iz, ie), xh%dyt,&
330 call libxsmm_mmcall(conv1_xmm3, u(1,1,1, ie), xh%dzt, dudt)
332 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
334 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
335 + coef%dsdx(i,1,1, ie) * duds(i,1,1) &
336 + coef%dtdx(i,1,1, ie) * dudt(i,1,1)) &
337 + vy(i,1,1, ie) * ( &
338 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
339 + coef%dsdy(i,1,1, ie) * duds(i,1,1) &
340 + coef%dtdy(i,1,1, ie) * dudt(i,1,1)) &
341 + vz(i,1,1, ie) * ( &
342 coef%drdz(i,1,1, ie) * dudr(i,1,1) &
343 + coef%dsdz(i,1,1, ie) * duds(i,1,1) &
344 + coef%dtdz(i,1,1, ie) * dudt(i,1,1)))
348 call mxm(xh%dx, xh%lx, u(1,1,1, ie), xh%lx, dudr, xh%lyz)
349 call mxm(u(1,1,1, ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
351 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
353 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
354 + coef%dsdx(i,1,1, ie) * duds(i,1,1)) &
355 + vy(i,1,1, ie) * ( &
356 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
357 + coef%dsdy(i,1,1, ie) * duds(i,1,1)))
367 coef_GLL, coef_GL, GLL_to_GL)
369 type(
space_t),
intent(in) :: xh_gll
370 type(
coef_t),
intent(in) :: coef_gll
371 type(
coef_t),
intent(in) :: coef_gl
373 real(kind=
rp),
intent(inout) :: du(xh_gll%lx, xh_gll%ly, xh_gll%lz, &
375 real(kind=
rp),
intent(inout) :: u(xh_gl%lxyz, coef_gl%msh%nelv)
376 real(kind=
rp),
intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
377 real(kind=
rp),
intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
378 real(kind=
rp),
intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
379 real(kind=
rp) :: ur(xh_gl%lxyz)
380 real(kind=
rp) :: us(xh_gl%lxyz)
381 real(kind=
rp) :: ut(xh_gl%lxyz)
382 real(kind=
rp) :: ud(xh_gl%lxyz, coef_gl%msh%nelv)
383 logical,
save :: lgrad_xsmm_init = .false.
384 integer,
save :: init_size = 0
385 integer :: e, i, n, n_gll
386 n = coef_gl%Xh%lx - 1
387 n_gll = coef_gll%msh%nelv * xh_gll%lxyz
390 if ((.not. lgrad_xsmm_init) .or. &
391 (init_size .gt. 0 .and. init_size .ne. n))
then
392 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
393 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
394 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
395 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
396 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
397 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
398 lgrad_xsmm_init = .true.
402 do e = 1, coef_gll%msh%nelv
405 ud(i,e) = cr(i,e) * ur(i) + cs(i,e) * us(i) + ct(i,e) * ut(i)
409 call gll_to_gl%map(du, ud, coef_gl%msh%nelv, xh_gll)
410 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
411 call col2(du, coef_gll%Binv, n_gll)
414 subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
415 type(
coef_t),
intent(in) :: c_xh
416 real(kind=
rp),
intent(inout), &
417 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w1
418 real(kind=
rp),
intent(inout), &
419 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w2
420 real(kind=
rp),
intent(inout), &
421 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: w3
422 real(kind=
rp),
intent(in), &
423 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u1
424 real(kind=
rp),
intent(in), &
425 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u2
426 real(kind=
rp),
intent(in), &
427 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: u3
428 real(kind=
rp),
intent(inout), &
429 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work1
430 real(kind=
rp),
intent(inout), &
431 dimension(c_Xh%Xh%lx, c_Xh%Xh%ly, c_Xh%Xh%lz, c_Xh%msh%nelv) :: work2
439 if (gdim .eq. 3)
then
442 call sub3(w1, work1, work2, n)
444 call copy(w1, work1, n)
447 if (gdim .eq. 3)
then
452 call sub3(w2, work1, work2, n)
454 call rzero (work1, n)
457 call sub3(w2, work1, work2, n)
462 call sub3(w3, work1, work2, n)
465 call opcolv(w1, w2, w3, c_xh%B, gdim, n)
466 call c_xh%gs_h%op(w1, n, gs_op_add)
467 call c_xh%gs_h%op(w2, n, gs_op_add)
468 call c_xh%gs_h%op(w3, n, gs_op_add)
469 call opcolv(w1, w2, w3, c_xh%Binv, gdim, n)
474 type(
space_t),
intent(inout) :: xh
476 real(kind=
rp),
dimension(Xh%lxyz, coef%msh%nelv), &
477 intent(inout) :: cr, cs, ct
478 real(kind=
rp),
dimension(Xh%lxyz, coef%msh%nelv), &
479 intent(in) :: cx, cy, cz
480 integer :: e, i, t, nxyz
482 associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
483 dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
484 dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &
485 nelv => coef%msh%nelv, lx => xh%lx, w3 => xh%w3)
489 cr(i,e) = w3(i,1,1) * (cx(i,e) * drdx(i,1,1,e) &
490 + cy(i,e) * drdy(i,1,1,e) &
491 + cz(i,e) * drdz(i,1,1,e))
492 cs(i,e) = w3(i,1,1) * (cx(i,e) * dsdx(i,1,1,e) &
493 + cy(i,e) * dsdy(i,1,1,e) &
494 + cz(i,e) * dsdz(i,1,1,e))
495 ct(i,e) = w3(i,1,1) * (cx(i,e) * dtdx(i,1,1,e) &
496 + cy(i,e) * dtdy(i,1,1,e) &
497 + cz(i,e) * dtdz(i,1,1,e))
Routines to interpolate between different spaces.
subroutine, public invcol2(a, b, n)
Vector division .
subroutine, public sub3(a, b, c, n)
Vector subtraction .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public addcol3(a, b, c, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
subroutine, public rzero(a, n)
Zero a real vector.
Collection of vector field operations operating on and . Note that in general the indices and ....
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public i2
integer, parameter, public rp
Global precision used in computations.
Operators libxsmm backend.
subroutine local_grad3_xsmm(ur, us, ut, u, n, d, dt)
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
subroutine local_grad2(ur, us, u, n, d, dt)
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.