72 use libxsmm, libxsmm_mmcall => libxsmm_dmmcall_abc
80 type(libxsmm_dmmfunction),
private :: lgrad_xmm1
81 type(libxsmm_dmmfunction),
private :: lgrad_xmm2
82 type(libxsmm_dmmfunction),
private :: lgrad_xmm3
88 type(
coef_t),
intent(in),
target :: coef
89 real(kind=
rp),
dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv),
intent(inout) :: du
90 real(kind=
rp),
dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv),
intent(in) :: u, dr, ds, dt
91 real(kind=
rp) :: drst(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz)
93 type(
mesh_t),
pointer :: msh
94 integer :: e, k, lxy, lyz, lxyz
96 type(libxsmm_dmmfunction),
save :: dudxyz_xmm1
97 type(libxsmm_dmmfunction),
save :: dudxyz_xmm2
98 type(libxsmm_dmmfunction),
save :: dudxyz_xmm3
99 logical,
save :: dudxyz_xsmm_init = .false.
105 lxyz = xh%lx*xh%ly*xh%lz
107 if (.not. dudxyz_xsmm_init)
then
108 call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
109 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
110 call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &
111 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
112 call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
113 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
114 dudxyz_xsmm_init = .true.
118 if (msh%gdim .eq. 2)
then
119 call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)
120 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
121 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)
122 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e),lxyz)
124 call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))
125 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
127 call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))
129 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
130 call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)
131 call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)
134 call col2(du, coef%jacinv, coef%dof%n_dofs)
140 type(
coef_t),
intent(in) :: coef
141 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(inout) :: ux
142 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(inout) :: uy
143 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(inout) :: uz
144 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(in) :: u
145 real(kind=
rp) :: ur(coef%Xh%lxyz)
146 real(kind=
rp) :: us(coef%Xh%lxyz)
147 real(kind=
rp) :: ut(coef%Xh%lxyz)
148 logical,
save :: lgrad_xsmm_init = .false.
149 integer,
save :: init_size = 0
154 if ((.not. lgrad_xsmm_init) .or. &
155 (init_size .gt. 0 .and. init_size .ne. n))
then
156 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
157 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
158 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
159 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
160 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
161 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
162 lgrad_xsmm_init = .true.
168 if(coef%msh%gdim .eq. 3)
then
171 ux(i,e) = coef%Xh%w3(i,1,1)*(ur(i)*coef%drdx(i,1,1,e) &
172 + us(i)*coef%dsdx(i,1,1,e) &
173 + ut(i)*coef%dtdx(i,1,1,e) )
174 uy(i,e) = coef%Xh%w3(i,1,1)*(ur(i)*coef%drdy(i,1,1,e) &
175 + us(i)*coef%dsdy(i,1,1,e) &
176 + ut(i)*coef%dtdy(i,1,1,e) )
177 uz(i,e) = coef%Xh%w3(i,1,1)*(ur(i)*coef%drdz(i,1,1,e) &
178 + us(i)*coef%dsdz(i,1,1,e) &
179 + ut(i)*coef%dtdz(i,1,1,e) )
183 call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)
186 ux(i,e) = coef%Xh%w3(i,1,1)*(ur(i)*coef%drdx(i,1,1,e) &
187 + us(i)*coef%dsdx(i,1,1,e) )
188 uy(i,e) = coef%Xh%w3(i,1,1)*(ur(i)*coef%drdy(i,1,1,e) &
189 + us(i)*coef%dsdy(i,1,1,e) )
196 integer,
intent(in) :: n
197 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n, 0:n)
198 real(kind=
rp),
intent(inout) :: us(0:n, 0:n, 0:n)
199 real(kind=
rp),
intent(inout) :: ut(0:n, 0:n, 0:n)
200 real(kind=
rp),
intent(in) :: u(0:n, 0:n, 0:n)
201 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
202 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
208 call libxsmm_mmcall(lgrad_xmm1, d, u, ur)
210 call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))
212 call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)
218 integer,
intent(in) :: n
219 real(kind=
rp),
intent(inout) :: ur(0:n, 0:n)
220 real(kind=
rp),
intent(inout) :: us(0:n, 0:n)
221 real(kind=
rp),
intent(in) :: u(0:n, 0:n)
222 real(kind=
rp),
intent(in) :: d(0:n, 0:n)
223 real(kind=
rp),
intent(in) :: dt(0:n, 0:n)
228 call mxm(d ,m1,u,m1,ur,m1)
229 call mxm(u,m1,dt,m1,us,m1)
234 type(
coef_t),
intent(in) :: coef
235 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(inout) :: dtx
236 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(inout) :: x
237 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(in) :: dr
238 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(in) :: ds
239 real(kind=
rp),
dimension(coef%Xh%lxyz,coef%msh%nelv),
intent(in) :: dt
240 real(kind=
rp) :: wx(coef%Xh%lxyz)
241 real(kind=
rp) :: ta1(coef%Xh%lxyz)
242 real(kind=
rp) :: ta2(coef%Xh%lxyz)
243 real(kind=
rp) :: ta3(coef%Xh%lxyz)
244 integer :: e, i1, i2, n1, n2, iz
247 type(libxsmm_dmmfunction),
save :: cdtp_xmm1
248 type(libxsmm_dmmfunction),
save :: cdtp_xmm2
249 type(libxsmm_dmmfunction),
save :: cdtp_xmm3
250 logical,
save :: cdtp_xsmm_init = .false.
256 if (.not. cdtp_xsmm_init)
then
257 call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
258 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
259 call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &
260 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
261 call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
262 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
263 cdtp_xsmm_init = .true.
267 call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)
268 call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)
269 call col3(ta1, wx, dr(1,e), xh%lxyz)
270 call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))
271 call col3 (ta1, wx, ds(1,e), xh%lxyz)
275 call libxsmm_mmcall(cdtp_xmm2, ta1(i2), xh%dy, ta2(i1))
279 call add2(dtx(1,e), ta2, xh%lxyz)
280 call col3(ta1, wx, dt(1,e), xh%lxyz)
281 call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)
282 call add2 (dtx(1,e), ta2, xh%lxyz)
288 type(
space_t),
intent(in) :: xh
289 type(
coef_t),
intent(in) :: coef
290 integer,
intent(in) :: nelv, gdim
291 real(kind=
rp),
intent(inout) :: du(xh%lxyz,nelv)
292 real(kind=
rp),
intent(inout),
dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u
293 real(kind=
rp),
intent(inout),
dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vx
294 real(kind=
rp),
intent(inout),
dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vy
295 real(kind=
rp),
intent(inout),
dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: vz
297 real(kind=
rp),
dimension(Xh%lx,Xh%ly,Xh%lz) :: dudr, duds, dudt
300 type(libxsmm_dmmfunction),
save :: conv1_xmm1
301 type(libxsmm_dmmfunction),
save :: conv1_xmm2
302 type(libxsmm_dmmfunction),
save :: conv1_xmm3
303 logical,
save :: conv1_xsmm_init = .false.
305 if (.not. conv1_xsmm_init)
then
306 call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &
307 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
308 call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &
309 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
310 call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
311 alpha=1d0, beta=0d0, prefetch=libxsmm_prefetch_auto)
312 conv1_xsmm_init = .true.
317 if (gdim .eq. 3)
then
318 call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1,ie), dudr)
320 call libxsmm_mmcall(conv1_xmm2, u(1,1,iz,ie), xh%dyt, duds(1,1,iz))
322 call libxsmm_mmcall(conv1_xmm3, u(1,1,1,ie), xh%dzt, dudt)
324 du(i,ie) = coef%jacinv(i,1,1,ie)*( &
326 coef%drdx(i,1,1,ie)*dudr(i,1,1) &
327 + coef%dsdx(i,1,1,ie)*duds(i,1,1) &
328 + coef%dtdx(i,1,1,ie)*dudt(i,1,1)) &
330 coef%drdy(i,1,1,ie)*dudr(i,1,1) &
331 + coef%dsdy(i,1,1,ie)*duds(i,1,1) &
332 + coef%dtdy(i,1,1,ie)*dudt(i,1,1)) &
334 coef%drdz(i,1,1,ie)*dudr(i,1,1) &
335 + coef%dsdz(i,1,1,ie)*duds(i,1,1) &
336 + coef%dtdz(i,1,1,ie)*dudt(i,1,1)))
340 call mxm(xh%dx, xh%lx, u(1,1,1,ie), xh%lx, dudr, xh%lyz)
341 call mxm(u(1,1,1,ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
343 du(i,ie) = coef%jacinv(i,1,1,ie)*( &
345 coef%drdx(i,1,1,ie)*dudr(i,1,1) &
346 + coef%dsdx(i,1,1,ie)*duds(i,1,1)) &
348 coef%drdy(i,1,1,ie)*dudr(i,1,1) &
349 + coef%dsdy(i,1,1,ie)*duds(i,1,1)))
359 type(
field_t),
intent(inout) :: w1
360 type(
field_t),
intent(inout) :: w2
361 type(
field_t),
intent(inout) :: w3
362 type(
field_t),
intent(inout) :: u1
363 type(
field_t),
intent(inout) :: u2
364 type(
field_t),
intent(inout) :: u3
365 type(
field_t),
intent(inout) :: work1
366 type(
field_t),
intent(inout) :: work2
367 type(
coef_t),
intent(in) :: c_xh
374 call opr_xsmm_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
375 if (gdim .eq. 3)
then
376 call opr_xsmm_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
377 call sub3(w1%x, work1%x, work2%x, n)
379 call copy(w1%x, work1%x, n)
382 if (gdim .eq. 3)
then
383 call opr_xsmm_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
384 call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
385 call sub3(w2%x, work1%x, work2%x, n)
387 call rzero (work1%x, n)
388 call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
389 call sub3(w2%x, work1%x, work2%x, n)
392 call opr_xsmm_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
393 call opr_xsmm_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
394 call sub3(w3%x, work1%x, work2%x, n)
397 call opcolv(w1%x,w2%x,w3%x,c_xh%B, gdim, n)
398 call c_xh%gs_h%op(w1, gs_op_add)
399 call c_xh%gs_h%op(w2, gs_op_add)
400 call c_xh%gs_h%op(w3, gs_op_add)
401 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
subroutine, public invcol2(a, b, n)
Vector division .
subroutine, public sub3(a, b, c, n)
Vector subtraction .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public addcol3(a, b, c, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
subroutine, public rzero(a, n)
Zero a real vector.
Collection of vector field operations operating on and . Note that in general the indices and ....
subroutine opcolv(a1, a2, a3, c, gdim, n)
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Operators libxsmm backend.
subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
subroutine local_grad2(ur, us, u, n, D, Dt)
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.