Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_xsmm.F90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
62 use num_types, only : rp
63 use mxm_wrapper, only : mxm
64 use space, only : space_t
65 use coefs, only : coef_t
66 use math, only : rzero, col2, col3, sub3, add2, addcol3, invcol2, copy
67 use mesh, only : mesh_t
68 use field, only : field_t
70 use gather_scatter, only : gs_t, gs_op_add
71 use mathops, only : opcolv
72#ifdef HAVE_LIBXSMM
73 use libxsmm, only: libxsmm_mmcall => libxsmm_dmmcall_abc, &
74 libxsmm_dmmfunction, libxsmm_dispatch, &
75 libxsmm_prefetch_auto
76#endif
77 implicit none
78 private
79
82
83#ifdef HAVE_LIBXSMM
84 type(libxsmm_dmmfunction), private :: lgrad_xmm1
85 type(libxsmm_dmmfunction), private :: lgrad_xmm2
86 type(libxsmm_dmmfunction), private :: lgrad_xmm3
87#endif
88
89contains
90
91 subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
92 type(coef_t), intent(in), target :: coef
93 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
94 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
95#ifdef HAVE_LIBXSMM
96 real(kind=rp) :: drst(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz)
97 type(space_t), pointer :: xh
98 type(mesh_t), pointer :: msh
99 integer :: e, k, lxy, lyz, lxyz
100 type(libxsmm_dmmfunction), save :: dudxyz_xmm1
101 type(libxsmm_dmmfunction), save :: dudxyz_xmm2
102 type(libxsmm_dmmfunction), save :: dudxyz_xmm3
103 logical, save :: dudxyz_xsmm_init = .false.
104
105 xh => coef%Xh
106 msh => coef%msh
107 lxy = xh%lx*xh%ly
108 lyz = xh%ly*xh%lz
109 lxyz = xh%lx*xh%ly*xh%lz
110
111 if (.not. dudxyz_xsmm_init) then
112 call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
113 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
114 call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &
115 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
116 call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
117 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
118 dudxyz_xsmm_init = .true.
119 end if
120
121 do e = 1, msh%nelv
122 if (msh%gdim .eq. 2) then
123 call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)
124 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
125 call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)
126 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
127 else
128 call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))
129 call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
130 do k = 1, xh%lz
131 call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))
132 end do
133 call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
134 call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)
135 call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)
136 end if
137 end do
138 call col2(du, coef%jacinv, coef%dof%n_dofs)
139
140#endif
141 end subroutine opr_xsmm_dudxyz
142
143 subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef)
144 type(coef_t), intent(in) :: coef
145 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
146 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
147 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
148 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
149#ifdef HAVE_LIBXSMM
150 real(kind=rp) :: ur(coef%Xh%lxyz)
151 real(kind=rp) :: us(coef%Xh%lxyz)
152 real(kind=rp) :: ut(coef%Xh%lxyz)
153 logical, save :: lgrad_xsmm_init = .false.
154 integer, save :: init_size = 0
155 integer :: e, i, n
156 n = coef%Xh%lx - 1
157
158
159 if ((.not. lgrad_xsmm_init) .or. &
160 (init_size .gt. 0 .and. init_size .ne. n)) then
161 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
162 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
163 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
164 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
165 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
166 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
167 lgrad_xsmm_init = .true.
168 init_size = n
169 end if
170
171 do e = 1, coef%msh%nelv
172 if (coef%msh%gdim .eq. 3) then
173 call local_grad3_xsmm(ur, us, ut, u(1,e), n, coef%Xh%dx, coef%Xh%dxt)
174 do i = 1, coef%Xh%lxyz
175 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
176 + us(i) * coef%dsdx(i,1,1,e) &
177 + ut(i) * coef%dtdx(i,1,1,e) )
178 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
179 + us(i) * coef%dsdy(i,1,1,e) &
180 + ut(i) * coef%dtdy(i,1,1,e) )
181 uz(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdz(i,1,1,e) &
182 + us(i) * coef%dsdz(i,1,1,e) &
183 + ut(i) * coef%dtdz(i,1,1,e) )
184 end do
185 else
186
187 call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)
188
189 do i = 1, coef%Xh%lxyz
190 ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
191 + us(i) * coef%dsdx(i,1,1,e) )
192 uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
193 + us(i) * coef%dsdy(i,1,1,e) )
194 end do
195 end if
196 end do
197#endif
198 end subroutine opr_xsmm_opgrad
199
200 subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)
201 integer, intent(in) :: n
202 real(kind=rp), intent(inout) :: ur(0:n, 0:n, 0:n)
203 real(kind=rp), intent(inout) :: us(0:n, 0:n, 0:n)
204 real(kind=rp), intent(inout) :: ut(0:n, 0:n, 0:n)
205 real(kind=rp), intent(in) :: u(0:n, 0:n, 0:n)
206 real(kind=rp), intent(in) :: d(0:n, 0:n)
207 real(kind=rp), intent(in) :: dt(0:n, 0:n)
208#ifdef HAVE_LIBXSMM
209 integer :: m1, m2, k
210
211 m1 = n + 1
212 m2 = m1*m1
213
214 call libxsmm_mmcall(lgrad_xmm1, d, u, ur)
215 do k = 0, n
216 call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))
217 end do
218 call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)
219#endif
220
221 end subroutine local_grad3_xsmm
222
223 subroutine local_grad2(ur, us, u, n, D, Dt)
224 integer, intent(in) :: n
225 real(kind=rp), intent(inout) :: ur(0:n, 0:n)
226 real(kind=rp), intent(inout) :: us(0:n, 0:n)
227 real(kind=rp), intent(in) :: u(0:n, 0:n)
228 real(kind=rp), intent(in) :: d(0:n, 0:n)
229 real(kind=rp), intent(in) :: dt(0:n, 0:n)
230 integer :: m1
231
232 m1 = n + 1
233
234 call mxm(d, m1, u, m1, ur, m1)
235 call mxm(u, m1, dt, m1, us, m1)
236
237 end subroutine local_grad2
238
239 subroutine opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
240 type(coef_t), intent(in) :: coef
241 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
242 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
243 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
244 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
245 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
246#ifdef HAVE_LIBXSMM
247 real(kind=rp) :: wx(coef%Xh%lxyz)
248 real(kind=rp) :: ta1(coef%Xh%lxyz)
249 real(kind=rp) :: ta2(coef%Xh%lxyz)
250 real(kind=rp) :: ta3(coef%Xh%lxyz)
251 integer :: e, i1, i2, n1, n2, iz
252 type(space_t), pointer :: xh
253
254 type(libxsmm_dmmfunction), save :: cdtp_xmm1
255 type(libxsmm_dmmfunction), save :: cdtp_xmm2
256 type(libxsmm_dmmfunction), save :: cdtp_xmm3
257 logical, save :: cdtp_xsmm_init = .false.
258
259 xh => coef%Xh
260 n1 = xh%lx*xh%ly
261 n2 = xh%lx*xh%ly
262
263 if (.not. cdtp_xsmm_init) then
264 call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
265 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
266 call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &
267 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
268 call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
269 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
270 cdtp_xsmm_init = .true.
271 end if
272
273 do e = 1, coef%msh%nelv
274 call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)
275 call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)
276 call col3(ta1, wx, dr(1,e), xh%lxyz)
277 call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))
278 call col3 (ta1, wx, ds(1,e), xh%lxyz)
279 i1 = 1
280 i2 = 1
281 do iz = 1, xh%lz
282 call libxsmm_mmcall(cdtp_xmm2, ta1(i2), xh%dy, ta2(i1))
283 i1 = i1 + n1
284 i2 = i2 + n2
285 end do
286 call add2(dtx(1,e), ta2, xh%lxyz)
287 call col3(ta1, wx, dt(1,e), xh%lxyz)
288 call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)
289 call add2 (dtx(1,e), ta2, xh%lxyz)
290 end do
291#endif
292 end subroutine opr_xsmm_cdtp
293
294 subroutine opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
295 type(space_t), intent(in) :: xh
296 type(coef_t), intent(in) :: coef
297 integer, intent(in) :: nelv, gdim
298 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
299 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
300 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
301 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
302 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
303#ifdef HAVE_LIBXSMM
304 ! Store the inverse jacobian to speed this operation up
305 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz) :: dudr, duds, dudt
306 integer :: ie, iz, i
307
308 type(libxsmm_dmmfunction), save :: conv1_xmm1
309 type(libxsmm_dmmfunction), save :: conv1_xmm2
310 type(libxsmm_dmmfunction), save :: conv1_xmm3
311 logical, save :: conv1_xsmm_init = .false.
312
313 if (.not. conv1_xsmm_init) then
314 call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &
315 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
316 call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &
317 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
318 call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
319 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
320 conv1_xsmm_init = .true.
321 end if
322
323 ! Compute vel.grad(u)
324 do ie = 1, nelv
325 if (gdim .eq. 3) then
326 call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1, ie), dudr)
327 do iz = 1, xh%lz
328 call libxsmm_mmcall(conv1_xmm2, u(1,1, iz, ie), xh%dyt,&
329 duds(1,1, iz))
330 end do
331 call libxsmm_mmcall(conv1_xmm3, u(1,1,1, ie), xh%dzt, dudt)
332 do i = 1, xh%lxyz
333 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
334 vx(i,1,1, ie) * ( &
335 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
336 + coef%dsdx(i,1,1, ie) * duds(i,1,1) &
337 + coef%dtdx(i,1,1, ie) * dudt(i,1,1)) &
338 + vy(i,1,1, ie) * ( &
339 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
340 + coef%dsdy(i,1,1, ie) * duds(i,1,1) &
341 + coef%dtdy(i,1,1, ie) * dudt(i,1,1)) &
342 + vz(i,1,1, ie) * ( &
343 coef%drdz(i,1,1, ie) * dudr(i,1,1) &
344 + coef%dsdz(i,1,1, ie) * duds(i,1,1) &
345 + coef%dtdz(i,1,1, ie) * dudt(i,1,1)))
346 end do
347 else
348 ! 2D
349 call mxm(xh%dx, xh%lx, u(1,1,1, ie), xh%lx, dudr, xh%lyz)
350 call mxm(u(1,1,1, ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
351 do i = 1, xh%lxyz
352 du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
353 vx(i,1,1, ie) * ( &
354 coef%drdx(i,1,1, ie) * dudr(i,1,1) &
355 + coef%dsdx(i,1,1, ie) * duds(i,1,1)) &
356 + vy(i,1,1, ie) * ( &
357 coef%drdy(i,1,1, ie) * dudr(i,1,1) &
358 + coef%dsdy(i,1,1, ie) * duds(i,1,1)))
359 end do
360 end if
361 end do
362
363#endif
364
365 end subroutine opr_xsmm_conv1
366
367 subroutine opr_xsmm_convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, &
368 coef_GLL, coef_GL, GLL_to_GL)
369 type(space_t), intent(in) :: xh_gl
370 type(space_t), intent(in) :: xh_gll
371 type(coef_t), intent(in) :: coef_gll
372 type(coef_t), intent(in) :: coef_gl
373 type(interpolator_t), intent(inout) :: gll_to_gl
374 real(kind=rp), intent(inout) :: du(xh_gll%lx, xh_gll%ly, xh_gll%lz, &
375 coef_gl%msh%nelv)
376 real(kind=rp), intent(inout) :: u(xh_gl%lxyz, coef_gl%msh%nelv)
377 real(kind=rp), intent(inout) :: cr(xh_gl%lxyz, coef_gl%msh%nelv)
378 real(kind=rp), intent(inout) :: cs(xh_gl%lxyz, coef_gl%msh%nelv)
379 real(kind=rp), intent(inout) :: ct(xh_gl%lxyz, coef_gl%msh%nelv)
380 real(kind=rp) :: ur(xh_gl%lxyz)
381 real(kind=rp) :: us(xh_gl%lxyz)
382 real(kind=rp) :: ut(xh_gl%lxyz)
383 real(kind=rp) :: ud(xh_gl%lxyz, coef_gl%msh%nelv)
384 logical, save :: lgrad_xsmm_init = .false.
385 integer, save :: init_size = 0
386 integer :: e, i, n, n_gll
387 n = coef_gl%Xh%lx - 1
388 n_gll = coef_gll%msh%nelv * xh_gll%lxyz
389
390#ifdef HAVE_LIBXSMM
391 if ((.not. lgrad_xsmm_init) .or. &
392 (init_size .gt. 0 .and. init_size .ne. n)) then
393 call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
394 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
395 call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
396 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
397 call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
398 alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
399 lgrad_xsmm_init = .true.
400 init_size = n
401 end if
402
403 do e = 1, coef_gll%msh%nelv
404 call local_grad3_xsmm(ur, us, ut, u(1,e), n, xh_gl%dx, xh_gl%dxt)
405 do i = 1, xh_gl%lxyz
406 ud(i,e) = cr(i,e) * ur(i) + cs(i,e) * us(i) + ct(i,e) * ut(i)
407 end do
408 end do
409#endif
410 call gll_to_gl%map(du, ud, coef_gl%msh%nelv, xh_gll)
411 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
412 call col2(du, coef_gll%Binv, n_gll)
413 end subroutine opr_xsmm_convect_scalar
414
415 subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
416 type(field_t), intent(inout) :: w1
417 type(field_t), intent(inout) :: w2
418 type(field_t), intent(inout) :: w3
419 type(field_t), intent(in) :: u1
420 type(field_t), intent(in) :: u2
421 type(field_t), intent(in) :: u3
422 type(field_t), intent(inout) :: work1
423 type(field_t), intent(inout) :: work2
424 type(coef_t), intent(in) :: c_xh
425 integer :: gdim, n
426
427 n = w1%dof%size()
428 gdim = c_xh%msh%gdim
429
430 ! this%work1=dw/dy ; this%work2=dv/dz
431 call opr_xsmm_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
432 if (gdim .eq. 3) then
433 call opr_xsmm_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
434 c_xh%dtdz, c_xh)
435 call sub3(w1%x, work1%x, work2%x, n)
436 else
437 call copy(w1%x, work1%x, n)
438 end if
439 ! this%work1=du/dz ; this%work2=dw/dx
440 if (gdim .eq. 3) then
441 call opr_xsmm_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &
442 c_xh%dtdz, c_xh)
443 call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
444 c_xh%dtdx, c_xh)
445 call sub3(w2%x, work1%x, work2%x, n)
446 else
447 call rzero (work1%x, n)
448 call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
449 c_xh%dtdx, c_xh)
450 call sub3(w2%x, work1%x, work2%x, n)
451 end if
452 ! this%work1=dv/dx ; this%work2=du/dy
453 call opr_xsmm_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
454 call opr_xsmm_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
455 call sub3(w3%x, work1%x, work2%x, n)
456 !! BC dependent, Needs to change if cyclic
457
458 call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
459 call c_xh%gs_h%op(w1, gs_op_add)
460 call c_xh%gs_h%op(w2, gs_op_add)
461 call c_xh%gs_h%op(w3, gs_op_add)
462 call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
463
464 end subroutine opr_xsmm_curl
465
466 subroutine opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
467 type(space_t), intent(inout) :: xh
468 type(coef_t), intent(inout) :: coef
469 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
470 intent(inout) :: cr, cs, ct
471 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
472 intent(in) :: cx, cy, cz
473 integer :: e, i, t, nxyz
474
475 associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
476 dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
477 dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &
478 nelv => coef%msh%nelv, lx => xh%lx, w3 => xh%w3)
479 nxyz = lx * lx * lx
480 do e = 1, nelv
481 do i = 1, nxyz
482 cr(i,e) = w3(i,1,1) * (cx(i,e) * drdx(i,1,1,e) &
483 + cy(i,e) * drdy(i,1,1,e) &
484 + cz(i,e) * drdz(i,1,1,e))
485 cs(i,e) = w3(i,1,1) * (cx(i,e) * dsdx(i,1,1,e) &
486 + cy(i,e) * dsdy(i,1,1,e) &
487 + cz(i,e) * dsdz(i,1,1,e))
488 ct(i,e) = w3(i,1,1) * (cx(i,e) * dtdx(i,1,1,e) &
489 + cy(i,e) * dtdy(i,1,1,e) &
490 + cz(i,e) * dtdz(i,1,1,e))
491 end do
492 end do
493 end associate
494
495 end subroutine opr_xsmm_set_convect_rst
496
497end module opr_xsmm
498
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
Definition math.f90:60
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:958
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:867
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public i2
Definition num_types.f90:5
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine local_grad3_xsmm(ur, us, ut, u, n, d, dt)
Definition opr_xsmm.F90:203
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
subroutine local_grad2(ur, us, u, n, d, dt)
Definition opr_xsmm.F90:226
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:62