Neko  0.9.0
A portable framework for high-order spectral element flow simulations
opr_xsmm.F90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
61 module opr_xsmm
62  use num_types, only : rp
63  use mxm_wrapper, only : mxm
64  use space, only : space_t
65  use coefs, only : coef_t
66  use math, only : rzero, col2, col3, sub3, add2, addcol3, invcol2, copy
67  use mesh, only : mesh_t
68  use field, only : field_t
69  use interpolation, only : interpolator_t
70  use gather_scatter, only : gs_t, gs_op_add
71  use mathops, only : opcolv
72 #ifdef HAVE_LIBXSMM
73  use libxsmm, only: libxsmm_mmcall => libxsmm_dmmcall_abc, &
74  libxsmm_dmmfunction, libxsmm_dispatch, &
75  libxsmm_prefetch_auto
76 #endif
77  implicit none
78  private
79 
82 
83 #ifdef HAVE_LIBXSMM
84  type(libxsmm_dmmfunction), private :: lgrad_xmm1
85  type(libxsmm_dmmfunction), private :: lgrad_xmm2
86  type(libxsmm_dmmfunction), private :: lgrad_xmm3
87 #endif
88 
89 contains
90 
91  subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
92  type(coef_t), intent(in), target :: coef
93  real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
94  real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
95 #ifdef HAVE_LIBXSMM
96  real(kind=rp) :: drst(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz)
97  type(space_t), pointer :: xh
98  type(mesh_t), pointer :: msh
99  integer :: e, k, lxy, lyz, lxyz
100  type(libxsmm_dmmfunction), save :: dudxyz_xmm1
101  type(libxsmm_dmmfunction), save :: dudxyz_xmm2
102  type(libxsmm_dmmfunction), save :: dudxyz_xmm3
103  logical, save :: dudxyz_xsmm_init = .false.
104 
105  xh => coef%Xh
106  msh => coef%msh
107  lxy = xh%lx*xh%ly
108  lyz = xh%ly*xh%lz
109  lxyz = xh%lx*xh%ly*xh%lz
110 
111  if (.not. dudxyz_xsmm_init) then
112  call libxsmm_dispatch(dudxyz_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
113  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
114  call libxsmm_dispatch(dudxyz_xmm2, xh%lx, xh%ly, xh%ly, &
115  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
116  call libxsmm_dispatch(dudxyz_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
117  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
118  dudxyz_xsmm_init = .true.
119  end if
120 
121  do e = 1, msh%nelv
122  if (msh%gdim .eq. 2) then
123  call mxm(xh%dx, xh%lx, u(1,1,1,e), xh%lx, du(1,1,1,e), lyz)
124  call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
125  call mxm(u(1,1,1,e), xh%lx, xh%dyt, xh%ly, drst, xh%ly)
126  call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
127  else
128  call libxsmm_mmcall(dudxyz_xmm1, xh%dx, u(1,1,1,e), du(1,1,1,e))
129  call col2(du(1,1,1,e), dr(1,1,1,e), lxyz)
130  do k = 1, xh%lz
131  call libxsmm_mmcall(dudxyz_xmm2, u(1,1,k,e), xh%dyt, drst(1,1,k))
132  end do
133  call addcol3(du(1,1,1,e), drst, ds(1,1,1,e), lxyz)
134  call libxsmm_mmcall(dudxyz_xmm3, u(1,1,1,e), xh%dzt, drst)
135  call addcol3(du(1,1,1,e), drst, dt(1,1,1,e), lxyz)
136  end if
137  end do
138  call col2(du, coef%jacinv, coef%dof%n_dofs)
139 
140 #endif
141  end subroutine opr_xsmm_dudxyz
142 
143  subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef)
144  type(coef_t), intent(in) :: coef
145  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
146  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
147  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
148  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
149 #ifdef HAVE_LIBXSMM
150  real(kind=rp) :: ur(coef%Xh%lxyz)
151  real(kind=rp) :: us(coef%Xh%lxyz)
152  real(kind=rp) :: ut(coef%Xh%lxyz)
153  logical, save :: lgrad_xsmm_init = .false.
154  integer, save :: init_size = 0
155  integer :: e, i, n
156  n = coef%Xh%lx - 1
157 
158 
159  if ((.not. lgrad_xsmm_init) .or. &
160  (init_size .gt. 0 .and. init_size .ne. n)) then
161  call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
162  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
163  call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
164  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
165  call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
166  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
167  lgrad_xsmm_init = .true.
168  init_size = n
169  end if
170 
171  do e = 1, coef%msh%nelv
172  if (coef%msh%gdim .eq. 3) then
173  call local_grad3_xsmm(ur, us, ut, u(1,e), n, coef%Xh%dx, coef%Xh%dxt)
174  do i = 1, coef%Xh%lxyz
175  ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
176  + us(i) * coef%dsdx(i,1,1,e) &
177  + ut(i) * coef%dtdx(i,1,1,e) )
178  uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
179  + us(i) * coef%dsdy(i,1,1,e) &
180  + ut(i) * coef%dtdy(i,1,1,e) )
181  uz(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdz(i,1,1,e) &
182  + us(i) * coef%dsdz(i,1,1,e) &
183  + ut(i) * coef%dtdz(i,1,1,e) )
184  end do
185  else
186 
187  call local_grad2(ur, us, u(1,e), n, coef%Xh%dx, coef%Xh%dyt)
188 
189  do i = 1, coef%Xh%lxyz
190  ux(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdx(i,1,1,e) &
191  + us(i) * coef%dsdx(i,1,1,e) )
192  uy(i,e) = coef%Xh%w3(i,1,1) * (ur(i) * coef%drdy(i,1,1,e) &
193  + us(i) * coef%dsdy(i,1,1,e) )
194  end do
195  end if
196  end do
197 #endif
198  end subroutine opr_xsmm_opgrad
199 
200  subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)
201  integer, intent(in) :: n
202  real(kind=rp), intent(inout) :: ur(0:n, 0:n, 0:n)
203  real(kind=rp), intent(inout) :: us(0:n, 0:n, 0:n)
204  real(kind=rp), intent(inout) :: ut(0:n, 0:n, 0:n)
205  real(kind=rp), intent(in) :: u(0:n, 0:n, 0:n)
206  real(kind=rp), intent(in) :: d(0:n, 0:n)
207  real(kind=rp), intent(in) :: dt(0:n, 0:n)
208 #ifdef HAVE_LIBXSMM
209  integer :: m1, m2, k
210 
211  m1 = n + 1
212  m2 = m1*m1
213 
214  call libxsmm_mmcall(lgrad_xmm1, d, u, ur)
215  do k = 0, n
216  call libxsmm_mmcall(lgrad_xmm2, u(0,0,k), dt, us(0,0,k))
217  end do
218  call libxsmm_mmcall(lgrad_xmm3, u, dt, ut)
219 #endif
220 
221  end subroutine local_grad3_xsmm
222 
223  subroutine local_grad2(ur, us, u, n, D, Dt)
224  integer, intent(in) :: n
225  real(kind=rp), intent(inout) :: ur(0:n, 0:n)
226  real(kind=rp), intent(inout) :: us(0:n, 0:n)
227  real(kind=rp), intent(in) :: u(0:n, 0:n)
228  real(kind=rp), intent(in) :: d(0:n, 0:n)
229  real(kind=rp), intent(in) :: dt(0:n, 0:n)
230  integer :: m1
231 
232  m1 = n + 1
233 
234  call mxm(d, m1, u, m1, ur, m1)
235  call mxm(u, m1, dt, m1, us, m1)
236 
237  end subroutine local_grad2
238 
239  subroutine opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
240  type(coef_t), intent(in) :: coef
241  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
242  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
243  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
244  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
245  real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
246 #ifdef HAVE_LIBXSMM
247  real(kind=rp) :: wx(coef%Xh%lxyz)
248  real(kind=rp) :: ta1(coef%Xh%lxyz)
249  real(kind=rp) :: ta2(coef%Xh%lxyz)
250  real(kind=rp) :: ta3(coef%Xh%lxyz)
251  integer :: e, i1, i2, n1, n2, iz
252  type(space_t), pointer :: xh
253 
254  type(libxsmm_dmmfunction), save :: cdtp_xmm1
255  type(libxsmm_dmmfunction), save :: cdtp_xmm2
256  type(libxsmm_dmmfunction), save :: cdtp_xmm3
257  logical, save :: cdtp_xsmm_init = .false.
258 
259  xh => coef%Xh
260  n1 = xh%lx*xh%ly
261  n2 = xh%lx*xh%ly
262 
263  if (.not. cdtp_xsmm_init) then
264  call libxsmm_dispatch(cdtp_xmm1, xh%lx, xh%ly*xh%lz, xh%lx, &
265  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
266  call libxsmm_dispatch(cdtp_xmm2, xh%lx, xh%ly, xh%ly, &
267  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
268  call libxsmm_dispatch(cdtp_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
269  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
270  cdtp_xsmm_init = .true.
271  end if
272 
273  do e = 1, coef%msh%nelv
274  call col3(wx, coef%B(1,1,1,e), x(1,e), xh%lxyz)
275  call invcol2(wx, coef%jac(1,1,1,e), xh%lxyz)
276  call col3(ta1, wx, dr(1,e), xh%lxyz)
277  call libxsmm_mmcall(cdtp_xmm1, xh%dxt, ta1, dtx(1,e))
278  call col3 (ta1, wx, ds(1,e), xh%lxyz)
279  i1 = 1
280  i2 = 1
281  do iz = 1, xh%lz
282  call libxsmm_mmcall(cdtp_xmm2, ta1(i2), xh%dy, ta2(i1))
283  i1 = i1 + n1
284  i2 = i2 + n2
285  end do
286  call add2(dtx(1,e), ta2, xh%lxyz)
287  call col3(ta1, wx, dt(1,e), xh%lxyz)
288  call libxsmm_mmcall(cdtp_xmm3, ta1, xh%dz, ta2)
289  call add2 (dtx(1,e), ta2, xh%lxyz)
290  end do
291 #endif
292  end subroutine opr_xsmm_cdtp
293 
294  subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
295  type(space_t), intent(in) :: xh
296  type(coef_t), intent(in) :: coef
297  integer, intent(in) :: nelv, gdim
298  real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
299  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
300  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
301  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
302  real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
303 #ifdef HAVE_LIBXSMM
304  ! Store the inverse jacobian to speed this operation up
305  real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz) :: dudr, duds, dudt
306  integer :: ie, iz, i
307 
308  type(libxsmm_dmmfunction), save :: conv1_xmm1
309  type(libxsmm_dmmfunction), save :: conv1_xmm2
310  type(libxsmm_dmmfunction), save :: conv1_xmm3
311  logical, save :: conv1_xsmm_init = .false.
312 
313  if (.not. conv1_xsmm_init) then
314  call libxsmm_dispatch(conv1_xmm1, xh%lx, xh%ly*xh%lx, xh%lx, &
315  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
316  call libxsmm_dispatch(conv1_xmm2, xh%lx, xh%ly, xh%ly, &
317  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
318  call libxsmm_dispatch(conv1_xmm3, xh%lx*xh%ly, xh%lz, xh%lz, &
319  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
320  conv1_xsmm_init = .true.
321  end if
322 
323  ! Compute vel.grad(u)
324  do ie = 1, nelv
325  if (gdim .eq. 3) then
326  call libxsmm_mmcall(conv1_xmm1, xh%dx, u(1,1,1, ie), dudr)
327  do iz = 1, xh%lz
328  call libxsmm_mmcall(conv1_xmm2, u(1,1, iz, ie), xh%dyt,&
329  duds(1,1, iz))
330  end do
331  call libxsmm_mmcall(conv1_xmm3, u(1,1,1, ie), xh%dzt, dudt)
332  do i = 1, xh%lxyz
333  du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
334  vx(i,1,1, ie) * ( &
335  coef%drdx(i,1,1, ie) * dudr(i,1,1) &
336  + coef%dsdx(i,1,1, ie) * duds(i,1,1) &
337  + coef%dtdx(i,1,1, ie) * dudt(i,1,1)) &
338  + vy(i,1,1, ie) * ( &
339  coef%drdy(i,1,1, ie) * dudr(i,1,1) &
340  + coef%dsdy(i,1,1, ie) * duds(i,1,1) &
341  + coef%dtdy(i,1,1, ie) * dudt(i,1,1)) &
342  + vz(i,1,1, ie) * ( &
343  coef%drdz(i,1,1, ie) * dudr(i,1,1) &
344  + coef%dsdz(i,1,1, ie) * duds(i,1,1) &
345  + coef%dtdz(i,1,1, ie) * dudt(i,1,1)))
346  end do
347  else
348  ! 2D
349  call mxm(xh%dx, xh%lx, u(1,1,1, ie), xh%lx, dudr, xh%lyz)
350  call mxm(u(1,1,1, ie), xh%lx, xh%dyt, xh%ly, duds, xh%ly)
351  do i = 1, xh%lxyz
352  du(i, ie) = coef%jacinv(i,1,1, ie) * ( &
353  vx(i,1,1, ie) * ( &
354  coef%drdx(i,1,1, ie) * dudr(i,1,1) &
355  + coef%dsdx(i,1,1, ie) * duds(i,1,1)) &
356  + vy(i,1,1, ie) * ( &
357  coef%drdy(i,1,1, ie) * dudr(i,1,1) &
358  + coef%dsdy(i,1,1, ie) * duds(i,1,1)))
359  end do
360  end if
361  end do
362 
363 #endif
364 
365  end subroutine opr_xsmm_conv1
366 
367  subroutine opr_xsmm_convect_scalar(du, u, c, Xh_GLL, Xh_GL, coef_GLL, &
368  coef_GL, GLL_to_GL)
369  type(space_t), intent(in) :: xh_gl
370  type(space_t), intent(in) :: xh_gll
371  type(coef_t), intent(in) :: coef_gll
372  type(coef_t), intent(in) :: coef_gl
373  type(interpolator_t), intent(inout) :: gll_to_gl
374  real(kind=rp), intent(inout) :: du(xh_gll%lx, xh_gll%ly, xh_gll%lz, &
375  coef_gl%msh%nelv)
376  real(kind=rp), intent(inout) :: u(xh_gl%lxyz, coef_gl%msh%nelv)
377  real(kind=rp), intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)
378  real(kind=rp) :: ur(xh_gl%lxyz)
379  real(kind=rp) :: us(xh_gl%lxyz)
380  real(kind=rp) :: ut(xh_gl%lxyz)
381  real(kind=rp) :: ud(xh_gl%lxyz, coef_gl%msh%nelv)
382  logical, save :: lgrad_xsmm_init = .false.
383  integer, save :: init_size = 0
384  integer :: e, i, n, n_gll
385  n = coef_gl%Xh%lx - 1
386  n_gll = coef_gll%msh%nelv * xh_gll%lxyz
387 
388 #ifdef HAVE_LIBXSMM
389  if ((.not. lgrad_xsmm_init) .or. &
390  (init_size .gt. 0 .and. init_size .ne. n)) then
391  call libxsmm_dispatch(lgrad_xmm1, (n+1), (n+1)**2, (n+1), &
392  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
393  call libxsmm_dispatch(lgrad_xmm2, (n+1), (n+1), (n+1), &
394  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
395  call libxsmm_dispatch(lgrad_xmm3, (n+1)**2, (n+1), (n+1), &
396  alpha = 1d0, beta = 0d0, prefetch = libxsmm_prefetch_auto)
397  lgrad_xsmm_init = .true.
398  init_size = n
399  end if
400 
401  do e = 1, coef_gll%msh%nelv
402  call local_grad3_xsmm(ur, us, ut, u(1,e), n, xh_gl%dx, xh_gl%dxt)
403  do i = 1, xh_gl%lxyz
404  ud(i,e) = c(i,e,1) * ur(i) + c(i,e,2) * us(i) + c(i,e,3) * ut(i)
405  end do
406  end do
407 #endif
408  call gll_to_gl%map(du, ud, coef_gl%msh%nelv, xh_gll)
409  call coef_gll%gs_h%op(du, n_gll, gs_op_add)
410  call col2(du, coef_gll%Binv, n_gll)
411  end subroutine opr_xsmm_convect_scalar
412 
413  subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
414  type(field_t), intent(inout) :: w1
415  type(field_t), intent(inout) :: w2
416  type(field_t), intent(inout) :: w3
417  type(field_t), intent(inout) :: u1
418  type(field_t), intent(inout) :: u2
419  type(field_t), intent(inout) :: u3
420  type(field_t), intent(inout) :: work1
421  type(field_t), intent(inout) :: work2
422  type(coef_t), intent(in) :: c_xh
423  integer :: gdim, n
424 
425  n = w1%dof%size()
426  gdim = c_xh%msh%gdim
427 
428  ! this%work1=dw/dy ; this%work2=dv/dz
429  call opr_xsmm_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
430  if (gdim .eq. 3) then
431  call opr_xsmm_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &
432  c_xh%dtdz, c_xh)
433  call sub3(w1%x, work1%x, work2%x, n)
434  else
435  call copy(w1%x, work1%x, n)
436  end if
437  ! this%work1=du/dz ; this%work2=dw/dx
438  if (gdim .eq. 3) then
439  call opr_xsmm_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &
440  c_xh%dtdz, c_xh)
441  call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
442  c_xh%dtdx, c_xh)
443  call sub3(w2%x, work1%x, work2%x, n)
444  else
445  call rzero (work1%x, n)
446  call opr_xsmm_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &
447  c_xh%dtdx, c_xh)
448  call sub3(w2%x, work1%x, work2%x, n)
449  end if
450  ! this%work1=dv/dx ; this%work2=du/dy
451  call opr_xsmm_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
452  call opr_xsmm_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
453  call sub3(w3%x, work1%x, work2%x, n)
454  !! BC dependent, Needs to change if cyclic
455 
456  call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
457  call c_xh%gs_h%op(w1, gs_op_add)
458  call c_xh%gs_h%op(w2, gs_op_add)
459  call c_xh%gs_h%op(w3, gs_op_add)
460  call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
461 
462  end subroutine opr_xsmm_curl
463 
464  subroutine opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
465  type(space_t), intent(inout) :: xh
466  type(coef_t), intent(inout) :: coef
467  real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
468  intent(inout) :: cr, cs, ct
469  real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
470  intent(in) :: cx, cy, cz
471  integer :: e, i, t, nxyz
472 
473  associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
474  dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
475  dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &
476  nelv => coef%msh%nelv, lx => xh%lx, w3 => xh%w3)
477  nxyz = lx * lx * lx
478  do e = 1, nelv
479  do i = 1, nxyz
480  cr(i,e) = w3(i,1,1) * (cx(i,e) * drdx(i,1,1,e) &
481  + cy(i,e) * drdy(i,1,1,e) &
482  + cz(i,e) * drdz(i,1,1,e))
483  cs(i,e) = w3(i,1,1) * (cx(i,e) * dsdx(i,1,1,e) &
484  + cy(i,e) * dsdy(i,1,1,e) &
485  + cz(i,e) * dsdz(i,1,1,e))
486  ct(i,e) = w3(i,1,1) * (cx(i,e) * dtdx(i,1,1,e) &
487  + cy(i,e) * dtdy(i,1,1,e) &
488  + cz(i,e) * dtdz(i,1,1,e))
489  end do
490  end do
491  end associate
492 
493  end subroutine opr_xsmm_set_convect_rst
494 
495 end module opr_xsmm
496 
Coefficients.
Definition: coef.f90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
Definition: math.f90:60
subroutine, public invcol2(a, b, n)
Vector division .
Definition: math.f90:715
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition: math.f90:642
subroutine, public add2(a, b, n)
Vector addition .
Definition: math.f90:587
subroutine, public addcol3(a, b, c, n)
Returns .
Definition: math.f90:801
subroutine, public col2(a, b, n)
Vector multiplication .
Definition: math.f90:729
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition: math.f90:742
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition: mathops.f90:65
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition: mathops.f90:97
Defines a mesh.
Definition: mesh.f90:34
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators libxsmm backend.
Definition: opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, c, Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
Definition: opr_xsmm.F90:371
subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)
Definition: opr_xsmm.F90:203
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_xsmm.F90:242
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
Definition: opr_xsmm.F90:467
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition: opr_xsmm.F90:146
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_xsmm.F90:416
subroutine local_grad2(ur, us, u, n, D, Dt)
Definition: opr_xsmm.F90:226
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
Definition: opr_xsmm.F90:297
Defines a function space.
Definition: space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition: space.f90:62