73 use libxsmm,
only: libxsmm_mmcall => libxsmm_dmmcall_abc, &
74 libxsmm_dmmfunction, libxsmm_dispatch, &
84 type(libxsmm_dmmfunction),
private :: lgrad_xmm1
85 type(libxsmm_dmmfunction),
private :: lgrad_xmm2
86 type(libxsmm_dmmfunction),
private :: lgrad_xmm3
92 type(
coef_t),
intent(in),
target :: coef
93 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(inout) :: du
94 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, &
coef%msh%nelv),
intent(in) :: u, dr, ds, dt
96 real(kind=
rp) :: drst(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz)
98 type(
mesh_t),
pointer :: msh
99 integer :: e, k, lxy, lyz, lxyz
100 type(libxsmm_dmmfunction),
save :: dudxyz_xmm1
101 type(libxsmm_dmmfunction),
save :: dudxyz_xmm2
102 type(libxsmm_dmmfunction),
save :: dudxyz_xmm3
103 logical,
save :: dudxyz_xsmm_init = .false.
109 lxyz = xh%lx*xh%ly*xh%lz
111 if (.not. dudxyz_xsmm_init)
then
112 call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
113 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
114 call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &
115 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
116 call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
117 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
118 dudxyz_xsmm_init = .true.
122 if (msh%gdim .eq. 2)
then
123 call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)
124 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
125 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)
126 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
128 call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))
129 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
131 call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))
133 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
134 call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)
135 call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)
138 call col2(du, coef%jacinv, coef%dof%n_dofs)
144 type(
coef_t),
intent(in) :: coef
145 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
146 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
147 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
148 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
150 real(kind=
rp) :: ur(coef%Xh%lxyz)
151 real(kind=
rp) :: us(coef%Xh%lxyz)
152 real(kind=
rp) :: ut(coef%Xh%lxyz)
153 logical,
save :: lgrad_xsmm_init = .false.
154 integer,
save :: init_size = 0
159 if ((.not. lgrad_xsmm_init) .or. &
160 (init_size .gt. 0 .and. init_size .ne. n))
then
161 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
162 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
163 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
164 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
165 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
166 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
167 lgrad_xsmm_init = .true.
171 do e = 1, coef%msh%nelv
172 if (coef%msh%gdim .eq. 3)
then
174 do i = 1, coef%Xh%lxyz
175 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
176 + us(i) * coef%dsdx(i,1,1,e) &
177 + ut(i) * coef%dtdx(i,1,1,e) )
178 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
179 + us(i) * coef%dsdy(i,1,1,e) &
180 + ut(i) * coef%dtdy(i,1,1,e) )
181 uz(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdz(i,1,1,e) &
182 + us(i) * coef%dsdz(i,1,1,e) &
183 + ut(i) * coef%dtdz(i,1,1,e) )
187 call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)
189 do i = 1, coef%Xh%lxyz
190 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
191 + us(i) * coef%dsdx(i,1,1,e) )
192 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
193 + us(i) * coef%dsdy(i,1,1,e) )
201 integer,
intent(in) :: n
202 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n, 0:n)
203 real(kind=
rp),
intent(inout) :: us(0:n, 0:n, 0:n)
204 real(kind=
rp),
intent(inout) :: ut(0:n, 0:n, 0:n)
205 real(kind=
rp),
intent(in) :: u(0:n, 0:n, 0:n)
206 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
207 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
214 call libxsmm_mmcall(lgrad_xmm1, d, u, ur)
216 call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))
218 call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)
224 integer,
intent(in) :: n
225 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n)
226 real(kind=
rp),
intent(inout) :: us(0:n, 0:n)
227 real(kind=
rp),
intent(in) :: u(0:n, 0:n)
228 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
229 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
234 call mxm(d, m1, u, m1, ur, m1)
235 call mxm(u, m1, dt, m1, us, m1)
240 type(
coef_t),
intent(in) :: coef
241 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
242 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
243 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
244 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
245 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
247 real(kind=
rp) :: wx(coef%Xh%lxyz)
248 real(kind=
rp) :: ta1(coef%Xh%lxyz)
249 real(kind=
rp) :: ta2(coef%Xh%lxyz)
250 real(kind=
rp) :: ta3(coef%Xh%lxyz)
251 integer :: e, i1,
i2, n1, n2, iz
254 type(libxsmm_dmmfunction),
save :: cdtp_xmm1
255 type(libxsmm_dmmfunction),
save :: cdtp_xmm2
256 type(libxsmm_dmmfunction),
save :: cdtp_xmm3
257 logical,
save :: cdtp_xsmm_init = .false.
263 if (.not. cdtp_xsmm_init)
then
264 call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
265 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
266 call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &
267 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
268 call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
269 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
270 cdtp_xsmm_init = .true.
273 do e = 1, coef%msh%nelv
274 call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)
275 call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)
276 call col3(ta1, wx, dr(1,e), xh%lxyz)
277 call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))
278 call col3 (ta1, wx, ds(1,e), xh%lxyz)
282 call libxsmm_mmcall(cdtp_xmm2, ta1(
i2), xh%dy, ta2(i1))
286 call add2(dtx(1,e), ta2, xh%lxyz)
287 call col3(ta1, wx, dt(1,e), xh%lxyz)
288 call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)
289 call add2 (dtx(1,e), ta2, xh%lxyz)
294 subroutine opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
295 type(
space_t),
intent(in) :: xh
297 integer,
intent(in) :: nelv, gdim
298 real(kind=
rp),
intent(inout) :: du(xh%lxyz, nelv)
299 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
300 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
301 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
302 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
305 real(kind=
rp),
dimension(Xh%lx, Xh%ly, Xh%lz) :: dudr, duds, dudt
308 type(libxsmm_dmmfunction),
save :: conv1_xmm1
309 type(libxsmm_dmmfunction),
save :: conv1_xmm2
310 type(libxsmm_dmmfunction),
save :: conv1_xmm3
311 logical,
save :: conv1_xsmm_init = .false.
313 if (.not. conv1_xsmm_init)
then
314 call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &
315 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
316 call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &
317 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
318 call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
319 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
320 conv1_xsmm_init = .true.
325 if (gdim .eq. 3)
then
326 call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1, ie), dudr)
328 call libxsmm_mmcall(conv1_xmm2, u(1,1, iz, ie), xh%dyt,&
331 call libxsmm_mmcall(conv1_xmm3, u(1,1,1, ie), xh%dzt, dudt)
333 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
335 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
336 + coef%dsdx(i,1,1, ie) * duds(i,1,1) &
337 + coef%dtdx(i,1,1, ie) * dudt(i,1,1)) &
338 + vy(i,1,1, ie) * ( &
339 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
340 + coef%dsdy(i,1,1, ie) * duds(i,1,1) &
341 + coef%dtdy(i,1,1, ie) * dudt(i,1,1)) &
342 + vz(i,1,1, ie) * ( &
343 coef%drdz(i,1,1, ie) * dudr(i,1,1) &
344 + coef%dsdz(i,1,1, ie) * duds(i,1,1) &
345 + coef%dtdz(i,1,1, ie) * dudt(i,1,1)))
349 call mxm(xh%dx, xh%lx, u(1,1,1, ie), xh%lx, dudr, xh%lyz)
350 call mxm(u(1,1,1, ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
352 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
354 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
355 + coef%dsdx(i,1,1, ie) * duds(i,1,1)) &
356 + vy(i,1,1, ie) * ( &
357 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
358 + coef%dsdy(i,1,1, ie) * duds(i,1,1)))
370 type(
space_t),
intent(in) :: xh_gll
371 type(
coef_t),
intent(in) :: coef_gll
372 type(
coef_t),
intent(in) :: coef_gl
374 real(kind=
rp),
intent(inout) :: du(xh_gll%lx, xh_gll%ly, xh_gll%lz, &
376 real(kind=
rp),
intent(inout) :: u(xh_gl%lxyz, coef_gl%msh%nelv)
377 real(kind=
rp),
intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)
378 real(kind=
rp) :: ur(xh_gl%lxyz)
379 real(kind=
rp) :: us(xh_gl%lxyz)
380 real(kind=
rp) :: ut(xh_gl%lxyz)
381 real(kind=
rp) :: ud(xh_gl%lxyz, coef_gl%msh%nelv)
382 logical,
save :: lgrad_xsmm_init = .false.
383 integer,
save :: init_size = 0
384 integer :: e, i, n, n_gll
385 n = coef_gl%Xh%lx - 1
386 n_gll = coef_gll%msh%nelv * xh_gll%lxyz
389 if ((.not. lgrad_xsmm_init) .or. &
390 (init_size .gt. 0 .and. init_size .ne. n))
then
391 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
392 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
393 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
394 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
395 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
396 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
397 lgrad_xsmm_init = .true.
401 do e = 1, coef_gll%msh%nelv
404 ud(i,e) = c(i,e,1) * ur(i) + c(i,e,2) * us(i) + c(i,e,3) * ut(i)
408 call gll_to_gl%map(du, ud, coef_gl%msh%nelv, xh_gll)
409 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
410 call col2(du, coef_gll%Binv, n_gll)
413 subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
414 type(
field_t),
intent(inout) :: w1
416 type(
field_t),
intent(inout) :: w3
417 type(
field_t),
intent(in) :: u1
418 type(
field_t),
intent(in) :: u2
419 type(
field_t),
intent(in) :: u3
420 type(
field_t),
intent(inout) :: work1
421 type(
field_t),
intent(inout) :: work2
422 type(
coef_t),
intent(in) :: c_xh
429 call opr_xsmm_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
430 if (gdim .eq. 3)
then
433 call sub3(w1%x, work1%x, work2%x, n)
435 call copy(w1%x, work1%x, n)
438 if (gdim .eq. 3)
then
443 call sub3(w2%x, work1%x, work2%x, n)
445 call rzero (work1%x, n)
448 call sub3(w2%x, work1%x, work2%x, n)
451 call opr_xsmm_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
452 call opr_xsmm_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
453 call sub3(w3%x, work1%x, work2%x, n)
456 call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
457 call c_xh%gs_h%op(w1, gs_op_add)
458 call c_xh%gs_h%op(w2, gs_op_add)
459 call c_xh%gs_h%op(w3, gs_op_add)
460 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
465 type(
space_t),
intent(inout) :: xh
467 real(kind=
rp),
dimension(Xh%lxyz, coef%msh%nelv), &
468 intent(inout) :: cr, cs, ct
469 real(kind=
rp),
dimension(Xh%lxyz, coef%msh%nelv), &
470 intent(in) :: cx, cy, cz
471 integer :: e, i, t, nxyz
473 associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
474 dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
475 dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &
476 nelv => coef%msh%nelv, lx => xh%lx, w3 => xh%w3)
480 cr(i,e) = w3(i,1,1) * (cx(i,e) * drdx(i,1,1,e) &
481 + cy(i,e) * drdy(i,1,1,e) &
482 + cz(i,e) * drdz(i,1,1,e))
483 cs(i,e) = w3(i,1,1) * (cx(i,e) * dsdx(i,1,1,e) &
484 + cy(i,e) * dsdy(i,1,1,e) &
485 + cz(i,e) * dsdz(i,1,1,e))
486 ct(i,e) = w3(i,1,1) * (cx(i,e) * dtdx(i,1,1,e) &
487 + cy(i,e) * dtdy(i,1,1,e) &
488 + cz(i,e) * dtdz(i,1,1,e))
Routines to interpolate between different spaces.
subroutine, public invcol2(a, b, n)
Vector division .
subroutine, public sub3(a, b, c, n)
Vector subtraction .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public addcol3(a, b, c, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
subroutine, public rzero(a, n)
Zero a real vector.
Collection of vector field operations operating on and . Note that in general the indices and ....
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public i2
integer, parameter, public rp
Global precision used in computations.
Operators libxsmm backend.
subroutine local_grad3_xsmm(ur, us, ut, u, n, d, dt)
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
subroutine, public opr_xsmm_convect_scalar(du, u, c, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine local_grad2(ur, us, u, n, d, dt)
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.