Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst, &
42 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
52 use space, only : space_t
53 use coefs, only : coef_t
54 use field, only : field_t
55 use field_list, only : field_list_t
56 use field_math, only : field_rzero
58 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
65 use vector, only : vector_t
67 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use, intrinsic :: iso_c_binding, only : c_ptr
69 implicit none
70 private
71
75
76 interface rotate_cyc
77 module procedure rotate_cyc_r1
78 module procedure rotate_cyc_r4
79 end interface rotate_cyc
80
81contains
82
90 subroutine dudxyz (du, u, dr, ds, dt, coef)
91 type(coef_t), intent(in), target :: coef
92 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
93 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
94
95 if (neko_bcknd_sx .eq. 1) then
96 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
97 else if (neko_bcknd_xsmm .eq. 1) then
98 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
99 else if (neko_bcknd_device .eq. 1) then
100 call opr_device_dudxyz(du, u, dr, ds, dt, coef)
101 else
102 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
103 end if
104
105 end subroutine dudxyz
106
113 subroutine div(res, ux, uy, uz, coef)
114 type(coef_t), intent(in), target :: coef
115 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: res
116 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: ux, uy, uz
117 type(field_t), pointer :: work
118 integer :: ind
119 type(c_ptr) :: res_d
120
121 if (neko_bcknd_device .eq. 1) then
122 res_d = device_get_ptr(res)
123 end if
124
125 call neko_scratch_registry%request_field(work, ind, .false.)
126
127 ! Get dux / dx
128 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
129
130 ! Get duy / dy
131 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
132 if (neko_bcknd_device .eq. 1) then
133 call device_add2(res_d, work%x_d, work%size())
134 else
135 call add2(res, work%x, work%size())
136 end if
137
138 ! Get dux / dz
139 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
140 if (neko_bcknd_device .eq. 1) then
141 call device_add2(res_d, work%x_d, work%size())
142 else
143 call add2(res, work%x, work%size())
144 end if
145
146 call neko_scratch_registry%relinquish_field(ind)
147
148 end subroutine div
149
156 subroutine grad(ux, uy, uz, u, coef)
157 type(coef_t), intent(in) :: coef
158 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
159 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
160 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
161 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
162
163 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
164 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
165 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
166
167 end subroutine grad
168
181 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
182 type(coef_t), intent(in) :: coef
183 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
184 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
185 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
186 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
187 integer, optional :: es, ee
188 integer :: eblk_start, eblk_end
189
190 if (present(es)) then
191 eblk_start = es
192 else
193 eblk_start = 1
194 end if
195
196 if (present(ee)) then
197 eblk_end = ee
198 else
199 eblk_end = coef%msh%nelv
200 end if
201
202 if (neko_bcknd_sx .eq. 1) then
203 call opr_sx_opgrad(ux, uy, uz, u, coef)
204 else if (neko_bcknd_xsmm .eq. 1) then
205 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
206 else if (neko_bcknd_device .eq. 1) then
207 call opr_device_opgrad(ux, uy, uz, u, coef)
208 else
209 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
210 end if
211
212 end subroutine opgrad
213
218 subroutine ortho(x, glb_n_points, n)
219 integer, intent(in) :: n
220 integer(kind=i8), intent(in) :: glb_n_points
221 real(kind=rp), dimension(n), intent(inout) :: x
222 real(kind=rp) :: c
223 type(c_ptr) :: x_d
224 if (neko_bcknd_device .eq. 1) then
225 x_d = device_get_ptr(x)
226 c = device_glsum(x_d, n)/glb_n_points
227 call device_cadd(x_d, -c, n)
228 else
229 c = glsum(x, n)/glb_n_points
230 call cadd(x, -c, n)
231 end if
232
233 end subroutine ortho
234
246 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
247 type(coef_t), intent(in) :: coef
248 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
249 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
250 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
251 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
252 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
253 integer, optional :: es, ee
254 integer :: eblk_start, eblk_end
255
256 if (present(es)) then
257 eblk_start = es
258 else
259 eblk_start = 1
260 end if
261
262 if (present(ee)) then
263 eblk_end = ee
264 else
265 eblk_end = coef%msh%nelv
266 end if
267
268 if (neko_bcknd_sx .eq. 1) then
269 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
270 else if (neko_bcknd_xsmm .eq. 1) then
271 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
272 else if (neko_bcknd_device .eq. 1) then
273 call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
274 else
275 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
276 end if
277
278 end subroutine cdtp
279
290 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
291 type(space_t), intent(in) :: xh
292 type(coef_t), intent(in) :: coef
293 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
294 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
295 real(kind=rp), intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
296 real(kind=rp), intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
297 real(kind=rp), intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
298 integer, optional :: es, ee
299 integer :: eblk_end, eblk_start
300
301 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
302 if (present(es)) then
303 eblk_start = es
304 else
305 eblk_start = 1
306 end if
307
308 if (present(ee)) then
309 eblk_end = ee
310 else
311 eblk_end = coef%msh%nelv
312 end if
313
314 if (neko_bcknd_sx .eq. 1) then
315 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
316 else if (neko_bcknd_xsmm .eq. 1) then
317 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
318 else if (neko_bcknd_device .eq. 1) then
319 call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
320 else
321 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
322 end if
323 end associate
324
325 end subroutine conv1
326
344 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
345 coef_GL, GLL_to_GL)
346 type(space_t), intent(in) :: xh_gl
347 type(space_t), intent(in) :: xh_gll
348 type(coef_t), intent(in) :: coef_GLL
349 type(coef_t), intent(in) :: coef_GL
350 type(interpolator_t), intent(inout) :: GLL_to_GL
351 real(kind=rp), intent(inout) :: &
352 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
353 real(kind=rp), intent(inout) :: &
354 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
355 type(field_t), intent(inout) :: cr, cs, ct
356 type(c_ptr) :: u_d
357
358 if (neko_bcknd_sx .eq. 1) then
359 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
360 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
361 else if (neko_bcknd_xsmm .eq. 1) then
362 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
363 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
364 else if (neko_bcknd_device .eq. 1) then
365 u_d = device_get_ptr(u)
366 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
367 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
368 else
369 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
370 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
371 end if
372
373 end subroutine convect_scalar
374
375 !! Compute the curl fo a vector field.
376 !! @param w1 Will store the x component of the curl.
377 !! @param w2 Will store the y component of the curl.
378 !! @param w3 Will store the z component of the curl.
379 !! @param u1 The x component of the vector field.
380 !! @param u2 The y component of the vector field.
381 !! @param u3 The z component of the vector field.
382 !! @param work1 A temporary array for computations.
383 !! @param work2 A temporary array for computations.
384 !! @param coef The SEM coefficients.
385 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
386 type(field_t), intent(inout) :: w1
387 type(field_t), intent(inout) :: w2
388 type(field_t), intent(inout) :: w3
389 type(field_t), intent(in) :: u1
390 type(field_t), intent(in) :: u2
391 type(field_t), intent(in) :: u3
392 type(field_t), intent(inout) :: work1
393 type(field_t), intent(inout) :: work2
394 type(coef_t), intent(in) :: coef
395 type(c_ptr), optional, intent(inout) :: event
396
397 if (neko_bcknd_sx .eq. 1) then
398 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
399 else if (neko_bcknd_xsmm .eq. 1) then
400 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
401 else if (neko_bcknd_device .eq. 1) then
402 if (present(event)) then
403 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
404 work1, work2, coef, event)
405 else
406 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
407 end if
408 else
409 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
410 end if
411
412 end subroutine curl
413
414 !! Compute the CFL number
415 !! @param dt The timestep.
416 !! @param u The x component of velocity.
417 !! @param v The y component of velocity.
418 !! @param w The z component of velocity.
419 !! @param Xh The SEM function space.
420 !! @param coef The SEM coefficients.
421 !! @param nelv The total number of elements.
422 !! @param gdim Number of geometric dimensions.
423 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
424 type(space_t), intent(in) :: xh
425 type(coef_t), intent(in) :: coef
426 integer, intent(in) :: nelv, gdim
427 real(kind=rp), intent(in) :: dt
428 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: u, v, w
429 real(kind=rp) :: cfl
430 integer :: ierr
431
432 if (neko_bcknd_sx .eq. 1) then
433 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
434 else if (neko_bcknd_device .eq. 1) then
435 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
436 else
437 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
438 end if
439
440 if (.not. neko_device_mpi) then
441 call mpi_allreduce(mpi_in_place, cfl, 1, &
442 mpi_real_precision, mpi_max, neko_comm, ierr)
443 end if
444
445 end function cfl
446
447 !! Compute the CFL number for compressible flows
448 !! @param dt The timestep.
449 !! @param max_wave_speed The precomputed maximum wave speed field.
450 !! @param Xh The SEM function space.
451 !! @param coef The SEM coefficients.
452 !! @param nelv The total number of elements.
453 !! @param gdim Number of geometric dimensions.
454 function cfl_compressible(dt, max_wave_speed, Xh, coef, nelv, gdim)
455 type(space_t), intent(in) :: xh
456 type(coef_t), intent(in) :: coef
457 integer, intent(in) :: nelv, gdim
458 real(kind=rp), intent(in) :: dt
459 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: &
460 max_wave_speed
461 real(kind=rp) :: cfl_compressible
462
463 cfl_compressible = cfl(dt, max_wave_speed, max_wave_speed, max_wave_speed, &
464 xh, coef, nelv, gdim)
465
466 end function cfl_compressible
467
480 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
481 type(field_t), intent(in) :: u, v, w
482 type(coef_t), intent(in) :: coef
483 real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
484 real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
485 real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
486 real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
487 real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
488 real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
489
490 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
491
492 integer :: nelv, lxyz
493
494 if (neko_bcknd_device .eq. 1) then
495 s11_d = device_get_ptr(s11)
496 s22_d = device_get_ptr(s22)
497 s33_d = device_get_ptr(s33)
498 s12_d = device_get_ptr(s12)
499 s23_d = device_get_ptr(s23)
500 s13_d = device_get_ptr(s13)
501 end if
502
503 nelv = u%msh%nelv
504 lxyz = u%Xh%lxyz
505
506 ! we use s11 as a work array here
507 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
508 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
509 if (neko_bcknd_device .eq. 1) then
510 call device_add2(s12_d, s11_d, nelv*lxyz)
511 else
512 call add2(s12, s11, nelv*lxyz)
513 end if
514
515 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
516 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
517 if (neko_bcknd_device .eq. 1) then
518 call device_add2(s13_d, s11_d, nelv*lxyz)
519 else
520 call add2(s13, s11, nelv*lxyz)
521 end if
522
523 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
524 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
525 if (neko_bcknd_device .eq. 1) then
526 call device_add2(s23_d, s11_d, nelv*lxyz)
527 else
528 call add2(s23, s11, nelv*lxyz)
529 end if
530
531 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
532 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
533 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
534
535 if (neko_bcknd_device .eq. 1) then
536 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
537 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
538 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
539 else
540 call cmult(s12, 0.5_rp, nelv*lxyz)
541 call cmult(s13, 0.5_rp, nelv*lxyz)
542 call cmult(s23, 0.5_rp, nelv*lxyz)
543 end if
544
545 end subroutine strain_rate
546
553 subroutine lambda2op(lambda2, u, v, w, coef)
554 type(coef_t), intent(in) :: coef
555 type(field_t), intent(inout) :: lambda2
556 type(field_t), intent(in) :: u, v, w
558 if (neko_bcknd_sx .eq. 1) then
559 call opr_sx_lambda2(lambda2, u, v, w, coef)
560 else if (neko_bcknd_device .eq. 1) then
561 call opr_device_lambda2(lambda2, u, v, w, coef)
562 else
563 call opr_cpu_lambda2(lambda2, u, v, w, coef)
564 end if
565
566 end subroutine lambda2op
567
578 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
579 type(space_t), intent(inout) :: xh
580 type(coef_t), intent(inout) :: coef
581 type(field_t), intent(inout) :: cr, cs, ct
582 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
583 intent(in) :: cx, cy, cz
584 type(c_ptr) :: cx_d, cy_d, cz_d
585
586 if (neko_bcknd_sx .eq. 1) then
587 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
588 else if (neko_bcknd_xsmm .eq. 1) then
589 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
590 else if (neko_bcknd_device .eq. 1) then
591 cx_d = device_get_ptr(cx)
592 cy_d = device_get_ptr(cy)
593 cz_d = device_get_ptr(cz)
594 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
595 cx_d, cy_d, cz_d, xh, coef)
596 else
597 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
598 end if
599
600 end subroutine set_convect_rst
601
617 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
618 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
619 type(space_t), intent(in) :: xh_gll
620 type(space_t), intent(inout) :: xh_gl
621 type(coef_t), intent(in) :: coef
622 type(coef_t), intent(inout) :: coef_gl
623 type(interpolator_t) :: gll_to_gl
624 real(kind=rp), intent(inout) :: tau, dtau
625 integer, intent(in) :: n, nel, n_gl
626 type(field_t), intent(inout) :: phi
627 type(field_list_t) :: conv_k1, conv_k23, conv_k4
628 real(kind=rp) :: c1, c2, c3
629 type(field_t), pointer :: u1, k1, k2, k3, k4
630 type(vector_t), pointer :: u1_gl
631 integer :: ind(6), i, e
632
633 call neko_scratch_registry%request_field(u1, ind(1), .false.)
634 call neko_scratch_registry%request_field(k1, ind(2), .false.)
635 call neko_scratch_registry%request_field(k2, ind(3), .false.)
636 call neko_scratch_registry%request_field(k3, ind(4), .false.)
637 call neko_scratch_registry%request_field(k4, ind(5), .false.)
638 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
639
640 c1 = 1.0_rp
641 c2 = -dtau/2.
642 c3 = -dtau
643
644 if (neko_bcknd_device .eq. 1) then
645
646 ! Stage 1:
647 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
648 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
649 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
650 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
651 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
652 call device_col2(k1%x_d, coef%B_d, n)
653
654 ! Stage 2:
655 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
656 call device_invcol2(u1%x_d, coef%B_d, n)
657 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
658 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
659 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
660 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
661 call device_col2(k2%x_d, coef%B_d, n)
662
663 ! Stage 3:
664 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
665 call device_invcol2(u1%x_d, coef%B_d, n)
666 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
667 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
668 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
669 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
670 call device_col2(k3%x_d, coef%B_d, n)
671
672 ! Stage 4:
673 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
674 call device_invcol2(u1%x_d, coef%B_d, n)
675 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
676 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
677 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
678 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
679 call device_col2(k4%x_d, coef%B_d, n)
680
681 c1 = -dtau/6.
682 c2 = -dtau/3.
683
684 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
685 c1, c2, c2, c1, n)
686
687 else
688
689 ! Stage 1:
690 call invcol3(u1%x, phi%x, coef%B, n)
691 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
692 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
693 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
694 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
695 call col2(k1%x, coef%B, n)
696
697 ! Stage 2:
698 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
699 call invcol2(u1%x, coef%B, n)
700 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
701 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
702 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
703 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
704 call col2(k2%x, coef%B, n)
705
706 ! Stage 3:
707 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
708 call invcol2(u1%x, coef%B, n)
709 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
710 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
711 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
712 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
713 call col2(k3%x, coef%B, n)
714
715 ! Stage 4:
716 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
717 call invcol2(u1%x, coef%B, n)
718 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
719 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
720 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
721 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
722 call col2(k4%x, coef%B, n)
723
724 c1 = -dtau/6.
725 c2 = -dtau/3.
726 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
727 end if
728
729 call neko_scratch_registry%relinquish_field(ind)
730
731 end subroutine runge_kutta
732
733 subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
734 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
735 integer, intent(in) :: idir
736 type(coef_t), intent(in) :: coef
737 if (coef%cyclic) then
738 if (neko_bcknd_device .eq. 1) then
739 call opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
740 else
741 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
742 end if
743 end if
744 end subroutine rotate_cyc_r1
745
746 subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
747 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
748 integer, intent(in) :: idir
749 type(coef_t), intent(in) :: coef
750 if (coef%cyclic) then
751 if (neko_bcknd_device .eq. 1) then
752 call opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
753 else
754 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
755 end if
756 end if
757 end subroutine rotate_cyc_r4
758
759
760end module operators
761
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:462
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:626
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:926
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:91
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:313
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:178
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:238
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:340
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:102
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Defines a vector.
Definition vector.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63