Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst, &
42 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
52 use space, only : space_t
53 use coefs, only : coef_t
54 use field, only : field_t
55 use field_list, only : field_list_t
56 use field_math, only : field_rzero
58 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
65 use vector, only : vector_t
67 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use, intrinsic :: iso_c_binding, only : c_ptr
69 implicit none
70 private
71
74
75 interface rotate_cyc
76 module procedure rotate_cyc_r1
77 module procedure rotate_cyc_r4
78 end interface rotate_cyc
79
80contains
81
89 subroutine dudxyz (du, u, dr, ds, dt, coef)
90 type(coef_t), intent(in), target :: coef
91 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
92 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
93
94 if (neko_bcknd_sx .eq. 1) then
95 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
96 else if (neko_bcknd_xsmm .eq. 1) then
97 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
98 else if (neko_bcknd_device .eq. 1) then
99 call opr_device_dudxyz(du, u, dr, ds, dt, coef)
100 else
101 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
102 end if
103
104 end subroutine dudxyz
105
112 subroutine div(res, ux, uy, uz, coef)
113 type(coef_t), intent(in), target :: coef
114 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: res
115 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: ux, uy, uz
116 type(field_t), pointer :: work
117 integer :: ind
118 type(c_ptr) :: res_d
119
120 if (neko_bcknd_device .eq. 1) then
121 res_d = device_get_ptr(res)
122 end if
123
124 call neko_scratch_registry%request_field(work, ind, .false.)
125
126 ! Get dux / dx
127 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
128
129 ! Get duy / dy
130 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
131 if (neko_bcknd_device .eq. 1) then
132 call device_add2(res_d, work%x_d, work%size())
133 else
134 call add2(res, work%x, work%size())
135 end if
136
137 ! Get dux / dz
138 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
139 if (neko_bcknd_device .eq. 1) then
140 call device_add2(res_d, work%x_d, work%size())
141 else
142 call add2(res, work%x, work%size())
143 end if
144
145 call neko_scratch_registry%relinquish_field(ind)
146
147 end subroutine div
148
155 subroutine grad(ux, uy, uz, u, coef)
156 type(coef_t), intent(in) :: coef
157 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
158 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
159 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
160 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
161
162 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
163 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
164 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
165
166 end subroutine grad
167
180 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
181 type(coef_t), intent(in) :: coef
182 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
183 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
184 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
185 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
186 integer, optional :: es, ee
187 integer :: eblk_start, eblk_end
188
189 if (present(es)) then
190 eblk_start = es
191 else
192 eblk_start = 1
193 end if
194
195 if (present(ee)) then
196 eblk_end = ee
197 else
198 eblk_end = coef%msh%nelv
199 end if
200
201 if (neko_bcknd_sx .eq. 1) then
202 call opr_sx_opgrad(ux, uy, uz, u, coef)
203 else if (neko_bcknd_xsmm .eq. 1) then
204 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
205 else if (neko_bcknd_device .eq. 1) then
206 call opr_device_opgrad(ux, uy, uz, u, coef)
207 else
208 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
209 end if
210
211 end subroutine opgrad
212
217 subroutine ortho(x, glb_n_points, n)
218 integer, intent(in) :: n
219 integer(kind=i8), intent(in) :: glb_n_points
220 real(kind=rp), dimension(n), intent(inout) :: x
221 real(kind=rp) :: c
222 type(c_ptr) :: x_d
223 if (neko_bcknd_device .eq. 1) then
224 x_d = device_get_ptr(x)
225 c = device_glsum(x_d, n)/glb_n_points
226 call device_cadd(x_d, -c, n)
227 else
228 c = glsum(x, n)/glb_n_points
229 call cadd(x, -c, n)
230 end if
231
232 end subroutine ortho
233
245 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
246 type(coef_t), intent(in) :: coef
247 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
248 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
249 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
250 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
251 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
252 integer, optional :: es, ee
253 integer :: eblk_start, eblk_end
254
255 if (present(es)) then
256 eblk_start = es
257 else
258 eblk_start = 1
259 end if
260
261 if (present(ee)) then
262 eblk_end = ee
263 else
264 eblk_end = coef%msh%nelv
265 end if
266
267 if (neko_bcknd_sx .eq. 1) then
268 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
269 else if (neko_bcknd_xsmm .eq. 1) then
270 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
271 else if (neko_bcknd_device .eq. 1) then
272 call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
273 else
274 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
275 end if
276
277 end subroutine cdtp
278
289 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
290 type(space_t), intent(in) :: xh
291 type(coef_t), intent(in) :: coef
292 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
293 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
294 real(kind=rp), intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
295 real(kind=rp), intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
296 real(kind=rp), intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
297 integer, optional :: es, ee
298 integer :: eblk_end, eblk_start
299
300 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
301 if (present(es)) then
302 eblk_start = es
303 else
304 eblk_start = 1
305 end if
306
307 if (present(ee)) then
308 eblk_end = ee
309 else
310 eblk_end = coef%msh%nelv
311 end if
312
313 if (neko_bcknd_sx .eq. 1) then
314 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
315 else if (neko_bcknd_xsmm .eq. 1) then
316 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
317 else if (neko_bcknd_device .eq. 1) then
318 call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
319 else
320 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
321 end if
322 end associate
323
324 end subroutine conv1
325
343 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
344 coef_GL, GLL_to_GL)
345 type(space_t), intent(in) :: xh_gl
346 type(space_t), intent(in) :: xh_gll
347 type(coef_t), intent(in) :: coef_GLL
348 type(coef_t), intent(in) :: coef_GL
349 type(interpolator_t), intent(inout) :: GLL_to_GL
350 real(kind=rp), intent(inout) :: &
351 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
352 real(kind=rp), intent(inout) :: &
353 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
354 type(field_t), intent(inout) :: cr, cs, ct
355 type(c_ptr) :: u_d
356
357 if (neko_bcknd_sx .eq. 1) then
358 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
359 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
360 else if (neko_bcknd_xsmm .eq. 1) then
361 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
362 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
363 else if (neko_bcknd_device .eq. 1) then
364 u_d = device_get_ptr(u)
365 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
366 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
367 else
368 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
369 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
370 end if
371
372 end subroutine convect_scalar
373
374 !! Compute the curl fo a vector field.
375 !! @param w1 Will store the x component of the curl.
376 !! @param w2 Will store the y component of the curl.
377 !! @param w3 Will store the z component of the curl.
378 !! @param u1 The x component of the vector field.
379 !! @param u2 The y component of the vector field.
380 !! @param u3 The z component of the vector field.
381 !! @param work1 A temporary array for computations.
382 !! @param work2 A temporary array for computations.
383 !! @param coef The SEM coefficients.
384 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
385 type(field_t), intent(inout) :: w1
386 type(field_t), intent(inout) :: w2
387 type(field_t), intent(inout) :: w3
388 type(field_t), intent(in) :: u1
389 type(field_t), intent(in) :: u2
390 type(field_t), intent(in) :: u3
391 type(field_t), intent(inout) :: work1
392 type(field_t), intent(inout) :: work2
393 type(coef_t), intent(in) :: coef
394 type(c_ptr), optional, intent(inout) :: event
395
396 if (neko_bcknd_sx .eq. 1) then
397 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
398 else if (neko_bcknd_xsmm .eq. 1) then
399 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
400 else if (neko_bcknd_device .eq. 1) then
401 if (present(event)) then
402 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
403 work1, work2, coef, event)
404 else
405 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
406 end if
407 else
408 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
409 end if
410
411 end subroutine curl
412
413 !! Compute the CFL number
414 !! @param dt The timestep.
415 !! @param u The x component of velocity.
416 !! @param v The y component of velocity.
417 !! @param w The z component of velocity.
418 !! @param Xh The SEM function space.
419 !! @param coef The SEM coefficients.
420 !! @param nelv The total number of elements.
421 !! @param gdim Number of geometric dimensions.
422 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
423 type(space_t), intent(in) :: xh
424 type(coef_t), intent(in) :: coef
425 integer, intent(in) :: nelv, gdim
426 real(kind=rp), intent(in) :: dt
427 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: u, v, w
428 real(kind=rp) :: cfl
429 integer :: ierr
430
431 if (neko_bcknd_sx .eq. 1) then
432 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
433 else if (neko_bcknd_device .eq. 1) then
434 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
435 else
436 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
437 end if
438
439 if (.not. neko_device_mpi) then
440 call mpi_allreduce(mpi_in_place, cfl, 1, &
441 mpi_real_precision, mpi_max, neko_comm, ierr)
442 end if
443
444 end function cfl
445
446 !! Compute the CFL number for compressible flows
447 !! @param dt The timestep.
448 !! @param max_wave_speed The precomputed maximum wave speed field.
449 !! @param Xh The SEM function space.
450 !! @param coef The SEM coefficients.
451 !! @param nelv The total number of elements.
452 !! @param gdim Number of geometric dimensions.
453 function cfl_compressible(dt, max_wave_speed, Xh, coef, nelv, gdim)
454 type(space_t), intent(in) :: xh
455 type(coef_t), intent(in) :: coef
456 integer, intent(in) :: nelv, gdim
457 real(kind=rp), intent(in) :: dt
458 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: max_wave_speed
459 real(kind=rp) :: cfl_compressible
460 integer :: ierr, n
461 type(field_t), pointer :: zero_vector
462 integer :: ind
463
464 n = xh%lx * xh%ly * xh%lz * nelv
465
466 ! Request a scratch field for zero vector
467 call neko_scratch_registry%request_field(zero_vector, ind, .false.)
468
469 ! Initialize zero vector
470 call field_rzero(zero_vector)
471
472 ! Use incompressible CFL with max_wave_speed as u-component, zero v and w
473 cfl_compressible = cfl(dt, max_wave_speed, zero_vector%x, zero_vector%x, xh, coef, nelv, gdim)
474
475 ! Release the scratch field
476 call neko_scratch_registry%relinquish_field(ind)
477
478 end function cfl_compressible
479
492 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
493 type(field_t), intent(in) :: u, v, w
494 type(coef_t), intent(in) :: coef
495 real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
496 real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
497 real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
498 real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
499 real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
500 real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
501
502 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
503
504 integer :: nelv, lxyz
505
506 if (neko_bcknd_device .eq. 1) then
507 s11_d = device_get_ptr(s11)
508 s22_d = device_get_ptr(s22)
509 s33_d = device_get_ptr(s33)
510 s12_d = device_get_ptr(s12)
511 s23_d = device_get_ptr(s23)
512 s13_d = device_get_ptr(s13)
513 end if
514
515 nelv = u%msh%nelv
516 lxyz = u%Xh%lxyz
517
518 ! we use s11 as a work array here
519 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
520 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
521 if (neko_bcknd_device .eq. 1) then
522 call device_add2(s12_d, s11_d, nelv*lxyz)
523 else
524 call add2(s12, s11, nelv*lxyz)
525 end if
526
527 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
528 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
529 if (neko_bcknd_device .eq. 1) then
530 call device_add2(s13_d, s11_d, nelv*lxyz)
531 else
532 call add2(s13, s11, nelv*lxyz)
533 end if
534
535 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
536 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
537 if (neko_bcknd_device .eq. 1) then
538 call device_add2(s23_d, s11_d, nelv*lxyz)
539 else
540 call add2(s23, s11, nelv*lxyz)
541 end if
542
543 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
544 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
545 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
546
547 if (neko_bcknd_device .eq. 1) then
548 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
549 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
550 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
551 else
552 call cmult(s12, 0.5_rp, nelv*lxyz)
553 call cmult(s13, 0.5_rp, nelv*lxyz)
554 call cmult(s23, 0.5_rp, nelv*lxyz)
555 end if
556
557 end subroutine strain_rate
558
565 subroutine lambda2op(lambda2, u, v, w, coef)
566 type(coef_t), intent(in) :: coef
567 type(field_t), intent(inout) :: lambda2
568 type(field_t), intent(in) :: u, v, w
570 if (neko_bcknd_sx .eq. 1) then
571 call opr_sx_lambda2(lambda2, u, v, w, coef)
572 else if (neko_bcknd_device .eq. 1) then
573 call opr_device_lambda2(lambda2, u, v, w, coef)
574 else
575 call opr_cpu_lambda2(lambda2, u, v, w, coef)
576 end if
577
578 end subroutine lambda2op
579
590 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
591 type(space_t), intent(inout) :: xh
592 type(coef_t), intent(inout) :: coef
593 type(field_t), intent(inout) :: cr, cs, ct
594 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
595 intent(in) :: cx, cy, cz
596 type(c_ptr) :: cx_d, cy_d, cz_d
597
598 if (neko_bcknd_sx .eq. 1) then
599 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
600 else if (neko_bcknd_xsmm .eq. 1) then
601 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
602 else if (neko_bcknd_device .eq. 1) then
603 cx_d = device_get_ptr(cx)
604 cy_d = device_get_ptr(cy)
605 cz_d = device_get_ptr(cz)
606 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
607 cx_d, cy_d, cz_d, xh, coef)
608 else
609 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
610 end if
611
612 end subroutine set_convect_rst
613
629 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
630 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
631 type(space_t), intent(in) :: xh_gll
632 type(space_t), intent(inout) :: xh_gl
633 type(coef_t), intent(in) :: coef
634 type(coef_t), intent(inout) :: coef_gl
635 type(interpolator_t) :: gll_to_gl
636 real(kind=rp), intent(inout) :: tau, dtau
637 integer, intent(in) :: n, nel, n_gl
638 type(field_t), intent(inout) :: phi
639 type(field_list_t) :: conv_k1, conv_k23, conv_k4
640 real(kind=rp) :: c1, c2, c3
641 type(field_t), pointer :: u1, k1, k2, k3, k4
642 type(vector_t), pointer :: u1_gl
643 integer :: ind(6), i, e
644
645 call neko_scratch_registry%request_field(u1, ind(1), .false.)
646 call neko_scratch_registry%request_field(k1, ind(2), .false.)
647 call neko_scratch_registry%request_field(k2, ind(3), .false.)
648 call neko_scratch_registry%request_field(k3, ind(4), .false.)
649 call neko_scratch_registry%request_field(k4, ind(5), .false.)
650 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
651
652 c1 = 1.0_rp
653 c2 = -dtau/2.
654 c3 = -dtau
655
656 if (neko_bcknd_device .eq. 1) then
657
658 ! Stage 1:
659 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
660 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
661 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
662 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
663 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
664 call device_col2(k1%x_d, coef%B_d, n)
665
666 ! Stage 2:
667 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
668 call device_invcol2(u1%x_d, coef%B_d, n)
669 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
670 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
671 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
672 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
673 call device_col2(k2%x_d, coef%B_d, n)
674
675 ! Stage 3:
676 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
677 call device_invcol2(u1%x_d, coef%B_d, n)
678 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
679 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
680 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
681 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
682 call device_col2(k3%x_d, coef%B_d, n)
683
684 ! Stage 4:
685 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
686 call device_invcol2(u1%x_d, coef%B_d, n)
687 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
688 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
689 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
690 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
691 call device_col2(k4%x_d, coef%B_d, n)
692
693 c1 = -dtau/6.
694 c2 = -dtau/3.
695
696 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
697 c1, c2, c2, c1, n)
698
699 else
700
701 ! Stage 1:
702 call invcol3(u1%x, phi%x, coef%B, n)
703 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
704 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
705 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
706 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
707 call col2(k1%x, coef%B, n)
708
709 ! Stage 2:
710 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
711 call invcol2(u1%x, coef%B, n)
712 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
713 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
714 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
715 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
716 call col2(k2%x, coef%B, n)
717
718 ! Stage 3:
719 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
720 call invcol2(u1%x, coef%B, n)
721 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
722 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
723 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
724 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
725 call col2(k3%x, coef%B, n)
726
727 ! Stage 4:
728 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
729 call invcol2(u1%x, coef%B, n)
730 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
731 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
732 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
733 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
734 call col2(k4%x, coef%B, n)
735
736 c1 = -dtau/6.
737 c2 = -dtau/3.
738 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
739 end if
740
741 call neko_scratch_registry%relinquish_field(ind)
742
743 end subroutine runge_kutta
744
745 subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
746 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
747 integer, intent(in) :: idir
748 type(coef_t), intent(in) :: coef
749 if (coef%cyclic) then
750 if (neko_bcknd_device .eq. 1) then
751 call opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
752 else
753 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
754 end if
755 end if
756 end subroutine rotate_cyc_r1
757
758 subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
759 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
760 integer, intent(in) :: idir
761 type(coef_t), intent(in) :: coef
762 if (coef%cyclic) then
763 if (neko_bcknd_device .eq. 1) then
764 call opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
765 else
766 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
767 end if
768 end if
769 end subroutine rotate_cyc_r4
770
771
772end module operators
773
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:462
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:626
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:926
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:90
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:314
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:181
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:241
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:342
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:102
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Defines a vector.
Definition vector.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63