Neko  0.8.1
A portable framework for high-order spectral element flow simulations
opr_cpu.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module opr_cpu
35  use cpu_dudxyz
36  use cpu_opgrad
37  use cpu_cdtp
38  use cpu_conv1
39  use num_types, only : rp
40  use space, only : space_t
41  use coefs, only : coef_t
42  use math
43  use field, only : field_t
44  use gather_scatter
45  use mathops
46  implicit none
47  private
48 
51 
52 contains
53 
54  subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
55  type(coef_t), intent(in), target :: coef
56  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
57  intent(inout) :: du
58  real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
59  intent(in) :: u, dr, ds, dt
60 
61  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
62  select case(coef%Xh%lx)
63  case(14)
64  call cpu_dudxyz_lx14(du, u, dr, ds, dt, &
65  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
66  case(13)
67  call cpu_dudxyz_lx13(du, u, dr, ds, dt, &
68  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
69  case(12)
70  call cpu_dudxyz_lx12(du, u, dr, ds, dt, &
71  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
72  case(11)
73  call cpu_dudxyz_lx11(du, u, dr, ds, dt, &
74  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
75  case(10)
76  call cpu_dudxyz_lx10(du, u, dr, ds, dt, &
77  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
78  case(9)
79  call cpu_dudxyz_lx9(du, u, dr, ds, dt, &
80  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
81  case(8)
82  call cpu_dudxyz_lx8(du, u, dr, ds, dt, &
83  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
84  case(7)
85  call cpu_dudxyz_lx7(du, u, dr, ds, dt, &
86  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
87  case(6)
88  call cpu_dudxyz_lx6(du, u, dr, ds, dt, &
89  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
90  case(5)
91  call cpu_dudxyz_lx5(du, u, dr, ds, dt, &
92  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
93  case(4)
94  call cpu_dudxyz_lx4(du, u, dr, ds, dt, &
95  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
96  case(3)
97  call cpu_dudxyz_lx3(du, u, dr, ds, dt, &
98  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
99  case(2)
100  call cpu_dudxyz_lx2(du, u, dr, ds, dt, &
101  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv)
102  case default
103  call cpu_dudxyz_lx(du, u, dr, ds, dt, &
104  xh%dx, xh%dy, xh%dz, coef%jacinv, msh%nelv, xh%lx)
105  end select
106 
107  end associate
108 
109  end subroutine opr_cpu_dudxyz
110 
111  subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
112  type(coef_t), intent(in) :: coef
113  integer, intent(in) :: e_start, e_end
114  real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: ux
115  real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: uy
116  real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: uz
117  real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(in) :: u
118  integer :: e_len
119  e_len = e_end-e_start+1
120  associate(xh => coef%Xh, msh => coef%msh, &
121  drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
122  dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
123  dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz)
124 
125  select case(xh%lx)
126  case(18)
127  call cpu_opgrad_lx18(ux, uy, uz, u, &
128  xh%dx, xh%dy, xh%dz, &
129  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
130  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
131  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
132  xh%w3, e_len)
133  case(17)
134  call cpu_opgrad_lx17(ux, uy, uz, u, &
135  xh%dx, xh%dy, xh%dz, &
136  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
137  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
138  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
139  xh%w3, e_len)
140  case(16)
141  call cpu_opgrad_lx16(ux, uy, uz, u, &
142  xh%dx, xh%dy, xh%dz, &
143  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
144  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
145  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
146  xh%w3, e_len)
147  case(15)
148  call cpu_opgrad_lx15(ux, uy, uz, u, &
149  xh%dx, xh%dy, xh%dz, &
150  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
151  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
152  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
153  xh%w3, e_len)
154  case(14)
155  call cpu_opgrad_lx14(ux, uy, uz, u, &
156  xh%dx, xh%dy, xh%dz, &
157  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
158  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
159  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
160  xh%w3, e_len)
161  case(13)
162  call cpu_opgrad_lx13(ux, uy, uz, u, &
163  xh%dx, xh%dy, xh%dz, &
164  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
165  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
166  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
167  xh%w3, e_len)
168  case(12)
169  call cpu_opgrad_lx12(ux, uy, uz, u, &
170  xh%dx, xh%dy, xh%dz, &
171  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
172  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
173  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
174  xh%w3, e_len)
175  case(11)
176  call cpu_opgrad_lx11(ux, uy, uz, u, &
177  xh%dx, xh%dy, xh%dz, &
178  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
179  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
180  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
181  xh%w3, e_len)
182  case(10)
183  call cpu_opgrad_lx10(ux, uy, uz, u, &
184  xh%dx, xh%dy, xh%dz, &
185  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
186  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
187  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
188  xh%w3, e_len)
189 
190  case(9)
191  call cpu_opgrad_lx9(ux, uy, uz, u, &
192  xh%dx, xh%dy, xh%dz, &
193  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
194  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
195  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
196  xh%w3, e_len)
197  case(8)
198  call cpu_opgrad_lx8(ux, uy, uz, u, &
199  xh%dx, xh%dy, xh%dz, &
200  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
201  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
202  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
203  xh%w3, e_len)
204  case(7)
205  call cpu_opgrad_lx7(ux, uy, uz, u, &
206  xh%dx, xh%dy, xh%dz, &
207  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
208  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
209  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
210  xh%w3, e_len)
211  case(6)
212  call cpu_opgrad_lx6(ux, uy, uz, u, &
213  xh%dx, xh%dy, xh%dz, &
214  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
215  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
216  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
217  xh%w3, e_len)
218  case(5)
219  call cpu_opgrad_lx5(ux, uy, uz, u, &
220  xh%dx, xh%dy, xh%dz, &
221  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
222  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
223  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
224  xh%w3, e_len)
225  case(4)
226  call cpu_opgrad_lx4(ux, uy, uz, u, &
227  xh%dx, xh%dy, xh%dz, &
228  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
229  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
230  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
231  xh%w3, e_len)
232  case(3)
233  call cpu_opgrad_lx3(ux, uy, uz, u, &
234  xh%dx, xh%dy, xh%dz, &
235  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
236  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
237  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
238  xh%w3, e_len)
239  case(2)
240  call cpu_opgrad_lx2(ux, uy, uz, u, &
241  xh%dx, xh%dy, xh%dz, &
242  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
243  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
244  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
245  xh%w3, e_len)
246  case default
247  call cpu_opgrad_lx(ux, uy, uz, u, &
248  xh%dx, xh%dy, xh%dz, &
249  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
250  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
251  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
252  xh%w3, e_len, xh%lx)
253  end select
254  end associate
255 
256  end subroutine opr_cpu_opgrad
257 
258  subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef)
259  type(coef_t), intent(in) :: coef
260  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: dtx
261  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: x
262  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dr
263  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: ds
264  real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dt
265 
266  associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
267  select case(xh%lx)
268  case(14)
269  call cpu_cdtp_lx14(dtx, x, dr, ds, dt, &
270  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
271  case(13)
272  call cpu_cdtp_lx13(dtx, x, dr, ds, dt, &
273  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
274  case(12)
275  call cpu_cdtp_lx12(dtx, x, dr, ds, dt, &
276  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
277  case(11)
278  call cpu_cdtp_lx11(dtx, x, dr, ds, dt, &
279  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
280  case(10)
281  call cpu_cdtp_lx10(dtx, x, dr, ds, dt, &
282  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
283  case(9)
284  call cpu_cdtp_lx9(dtx, x, dr, ds, dt, &
285  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
286  case(8)
287  call cpu_cdtp_lx8(dtx, x, dr, ds, dt, &
288  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
289  case(7)
290  call cpu_cdtp_lx7(dtx, x, dr, ds, dt, &
291  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
292  case(6)
293  call cpu_cdtp_lx6(dtx, x, dr, ds, dt, &
294  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
295  case(5)
296  call cpu_cdtp_lx5(dtx, x, dr, ds, dt, &
297  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
298  case(4)
299  call cpu_cdtp_lx4(dtx, x, dr, ds, dt, &
300  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
301  case(3)
302  call cpu_cdtp_lx3(dtx, x, dr, ds, dt, &
303  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
304  case(2)
305  call cpu_cdtp_lx2(dtx, x, dr, ds, dt, &
306  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv)
307  case default
308  call cpu_cdtp_lx(dtx, x, dr, ds, dt, &
309  xh%dxt, xh%dyt, xh%dzt, coef%B, coef%jac, msh%nelv, xh%lx)
310  end select
311  end associate
312 
313  end subroutine opr_cpu_cdtp
314 
315  subroutine opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end)
316  type(space_t), intent(in) :: xh
317  type(coef_t), intent(in) :: coef
318  integer, intent(in) :: e_start, e_end
319  real(kind=rp), intent(inout) :: du(xh%lxyz,e_end-e_start+1)
320  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,e_end-e_start+1) :: u
321  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,e_end-e_start+1) :: vx
322  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,e_end-e_start+1) :: vy
323  real(kind=rp), intent(inout), dimension(Xh%lx,Xh%ly,Xh%lz,e_end-e_start+1) :: vz
324  integer :: e_len
325 
326  e_len = e_end-e_start+1
327  associate(drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
328  dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
329  dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz, &
330  jacinv => coef%jacinv)
331  select case(xh%lx)
332  case(14)
333  call cpu_conv1_lx14(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
334  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
335  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
336  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
337  jacinv(1,1,1,e_start), e_len)
338  case(13)
339  call cpu_conv1_lx13(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
340  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
341  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
342  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
343  jacinv(1,1,1,e_start), e_len)
344  case(12)
345  call cpu_conv1_lx12(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
346  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
347  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
348  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
349  jacinv(1,1,1,e_start), e_len)
350  case(11)
351  call cpu_conv1_lx11(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
352  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
353  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
354  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
355  jacinv(1,1,1,e_start), e_len)
356  case(10)
357  call cpu_conv1_lx10(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
358  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
359  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
360  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
361  jacinv(1,1,1,e_start), e_len)
362  case(9)
363  call cpu_conv1_lx9(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
364  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
365  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
366  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
367  jacinv(1,1,1,e_start), e_len)
368  case(8)
369  call cpu_conv1_lx8(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
370  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
371  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
372  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
373  jacinv(1,1,1,e_start), e_len)
374  case(7)
375  call cpu_conv1_lx7(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
376  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
377  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
378  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
379  jacinv(1,1,1,e_start), e_len)
380  case(6)
381  call cpu_conv1_lx6(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
382  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
383  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
384  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
385  jacinv(1,1,1,e_start), e_len)
386  case(5)
387  call cpu_conv1_lx5(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
388  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
389  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
390  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
391  jacinv(1,1,1,e_start), e_len)
392  case(4)
393  call cpu_conv1_lx4(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
394  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
395  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
396  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
397  jacinv(1,1,1,e_start), e_len)
398  case(3)
399  call cpu_conv1_lx3(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
400  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
401  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
402  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
403  jacinv(1,1,1,e_start), e_len)
404  case(2)
405  call cpu_conv1_lx2(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
406  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
407  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
408  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
409  jacinv(1,1,1,e_start), e_len)
410  case default
411  call cpu_conv1_lx(du, u, vx, vy, vz, xh%dx, xh%dy, xh%dz, &
412  drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
413  drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
414  drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
415  jacinv(1,1,1,e_start), e_len, xh%lx)
416  end select
417  end associate
418 
419  end subroutine opr_cpu_conv1
420 
421  subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
422  type(field_t), intent(inout) :: w1
423  type(field_t), intent(inout) :: w2
424  type(field_t), intent(inout) :: w3
425  type(field_t), intent(inout) :: u1
426  type(field_t), intent(inout) :: u2
427  type(field_t), intent(inout) :: u3
428  type(field_t), intent(inout) :: work1
429  type(field_t), intent(inout) :: work2
430  type(coef_t), intent(in) :: c_xh
431  integer :: gdim, n
432 
433  n = w1%dof%size()
434  gdim = c_xh%msh%gdim
435 
436  ! this%work1=dw/dy ; this%work2=dv/dz
437  call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
438  if (gdim .eq. 3) then
439  call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
440  call sub3(w1%x, work1%x, work2%x, n)
441  else
442  call copy(w1%x, work1%x, n)
443  end if
444  ! this%work1=du/dz ; this%work2=dw/dx
445  if (gdim .eq. 3) then
446  call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, c_xh%dtdz, c_xh)
447  call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
448  call sub3(w2%x, work1%x, work2%x, n)
449  else
450  call rzero(work1%x, n)
451  call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
452  call sub3(w2%x, work1%x, work2%x, n)
453  end if
454  ! this%work1=dv/dx ; this%work2=du/dy
455  call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)
456  call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)
457  call sub3(w3%x, work1%x, work2%x, n)
458  !! BC dependent, Needs to change if cyclic
459 
460  call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)
461  call c_xh%gs_h%op(w1, gs_op_add)
462  call c_xh%gs_h%op(w2, gs_op_add)
463  call c_xh%gs_h%op(w3, gs_op_add)
464  call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)
465 
466  end subroutine opr_cpu_curl
467 
468  function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
469  type(space_t) :: xh
470  type(coef_t) :: coef
471  integer :: nelv, gdim
472  real(kind=rp) :: dt
473  real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w
474  real(kind=rp) :: cflr, cfls, cflt, cflm
475  real(kind=rp) :: ur, us, ut
476  real(kind=rp) :: cfl
477  integer :: i, j, k, e
478  cfl = 0d0
479  if (gdim .eq. 3) then
480  do e = 1,nelv
481  do k = 1,xh%lz
482  do j = 1,xh%ly
483  do i = 1,xh%lx
484  ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &
485  + v(i,j,k,e)*coef%drdy(i,j,k,e) &
486  + w(i,j,k,e)*coef%drdz(i,j,k,e) ) * coef%jacinv(i,j,k,e)
487  us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &
488  + v(i,j,k,e)*coef%dsdy(i,j,k,e) &
489  + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) * coef%jacinv(i,j,k,e)
490  ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
491  + v(i,j,k,e)*coef%dtdy(i,j,k,e) &
492  + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) * coef%jacinv(i,j,k,e)
493 
494  cflr = abs(dt*ur*xh%dr_inv(i))
495  cfls = abs(dt*us*xh%ds_inv(j))
496  cflt = abs(dt*ut*xh%dt_inv(k))
497 
498  cflm = cflr + cfls + cflt
499  cfl = max(cfl,cflm)
500  end do
501  end do
502  end do
503  end do
504  else
505  do e = 1,nelv
506  do j = 1,xh%ly
507  do i = 1,xh%lx
508  ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &
509  + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
510  us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &
511  + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)
512 
513  cflr = abs(dt*ur*xh%dr_inv(i))
514  cfls = abs(dt*us*xh%ds_inv(j))
515 
516  cflm = cflr + cfls
517  cfl = max(cfl,cflm)
518 
519  end do
520  end do
521  end do
522  end if
523  end function opr_cpu_cfl
524 
525  subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
526  type(coef_t), intent(in) :: coef
527  type(field_t), intent(inout) :: lambda2
528  type(field_t), intent(in) :: u, v, w
529  real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
530  integer :: temp_indices(9), e, i, ind_sort(3)
531  real(kind=rp) :: eigen(3), b, c, d, q, r, theta, l2
532  real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
533  real(kind=rp) :: a11, a22, a33, a12, a13, a23
534  real(kind=rp) :: msk1, msk2, msk3
535 
536  do e = 1, coef%msh%nelv
537  call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
538  u%x(1,1,1,e),coef,e,e)
539  call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
540  v%x(1,1,1,e),coef,e,e)
541  call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
542  w%x(1,1,1,e),coef,e,e)
543 
544  do i = 1, coef%Xh%lxyz
545  s11 = grad(i,1,1)
546  s22 = grad(i,2,2)
547  s33 = grad(i,3,3)
548 
549 
550  s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
551  s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
552  s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
553 
554  o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
555  o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
556  o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
557 
558  a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
559  a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23
560  a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23
561 
562  a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
563  a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
564  a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
565 
566 
567  b = -(a11 + a22 + a33)
568  c = -(a12*a12 + a13*a13 + a23*a23 &
569  - a11 * a22 - a11 * a33 - a22 * a33)
570  d = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
571  - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)
572 
573 
574  q = (3.0 * c - b*b) / 9.0
575  r = (9.0 * c * b - 27.0 * d - 2.0 * b*b*b) / 54.0
576  theta = acos( r / sqrt(-q*q*q) )
577 
578  eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - b / 3.0
579  eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - b / 3.0
580  eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - b / 3.0
581 
582  msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
583  .and. eigen(1) .le. eigen(3) .or. eigen(3) &
584  .le. eigen(1) .and. eigen(1) .le. eigen(2) )
585  msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
586  .and. eigen(2) .le. eigen(3) .or. eigen(3) &
587  .le. eigen(2) .and. eigen(2) .le. eigen(1))
588  msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
589  .and. eigen(3) .le. eigen(2) .or. eigen(2) &
590  .le. eigen(3) .and. eigen(3) .le. eigen(1))
591 
592  l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
593 
594  lambda2%x(i,1,1,e) = l2/(coef%B(i,1,1,e)**2)
595  end do
596  end do
597 
598  end subroutine opr_cpu_lambda2
599 
600 end module opr_cpu
Coefficients.
Definition: coef.f90:34
DT*X kernels.
Definition: cdtp.f90:34
conv1 kernels
Definition: conv1.f90:34
Derivative kernels.
Definition: dudxyz.f90:34
subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:869
subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:281
subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:194
subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:446
subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:365
subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:104
subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:671
subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:986
subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:524
subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:929
subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:740
subroutine cpu_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, lx)
Definition: dudxyz.f90:41
subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:599
subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
Definition: dudxyz.f90:806
Gradient kernels.
Definition: opgrad.f90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition: lambda2.f90:37
Definition: math.f90:60
Collection of vector field operations operating on and . Note that in general the indices and ....
Definition: mathops.f90:65
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Operators CPU backend.
Definition: opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
Definition: opr_cpu.f90:422
subroutine, public opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
Definition: opr_cpu.f90:55
subroutine, public opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end)
Definition: opr_cpu.f90:316
subroutine, public opr_cpu_cdtp(dtx, x, dr, ds, dt, coef)
Definition: opr_cpu.f90:259
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition: opr_cpu.f90:526
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
Definition: opr_cpu.f90:469
subroutine, public opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
Definition: opr_cpu.f90:112
Defines a function space.
Definition: space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
The function space for the SEM solution fields.
Definition: space.f90:62
#define max(a, b)
Definition: tensor.cu:40