39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
67 use mpi_f08,
only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use,
intrinsic :: iso_c_binding, only : c_ptr
87 module procedure div_d
99 module procedure cfl_d
100 module procedure cfl_f
134 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: du
135 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(in) :: u, dr, ds, dt
136 type(
coef_t),
intent(in) :: coef
137 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
140 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
144 call neko_log%deprecated(
'Operator: dudxyz, implicit device', &
145 '2.0.0',
'Please call opr_device_dudxyz instead.')
155 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
168 type(
field_t),
intent(inout) :: du
169 type(
field_t),
intent(in) :: u, dr, ds, dt
170 type(
coef_t),
intent(in) :: coef
173 call opr_sx_dudxyz(du%x, u%x, dr%x, ds%x, dt%x, coef)
179 call opr_cpu_dudxyz(du%x, u%x, dr%x, ds%x, dt%x, coef)
191 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: res
192 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(in) :: ux, uy, uz
193 type(
coef_t),
intent(in),
target :: coef
199 call neko_log%deprecated(
'Operator: div, implicit device', &
200 '2.0.0',
'Please call div_d instead.')
207 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
210 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
214 call add2(res, work%x, work%size())
218 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
222 call add2(res, work%x, work%size())
235 subroutine div_d(res_d, ux_d, uy_d, uz_d, coef)
236 type(c_ptr),
intent(inout) :: res_d
237 type(c_ptr),
intent(in) :: ux_d, uy_d, uz_d
238 type(
coef_t),
intent(in),
target :: coef
245 call dudxyz(res_d, ux_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
248 call dudxyz(work%x_d, uy_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
252 call dudxyz(work%x_d, uz_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
266 type(
coef_t),
intent(in) :: coef
267 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: ux
268 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: uy
269 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: uz
270 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(in) :: u
272 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
273 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
274 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
284 subroutine grad_d(ux_d, uy_d, uz_d, u_d, coef)
285 type(
coef_t),
intent(in) :: coef
286 type(c_ptr),
intent(inout) :: ux_d, uy_d, uz_d
287 type(c_ptr),
intent(in) :: u_d
289 call dudxyz(ux_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
290 call dudxyz(uy_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
291 call dudxyz(uz_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
307 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
308 type(
coef_t),
intent(in) :: coef
309 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
310 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
311 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
312 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
313 integer,
optional :: es, ee
314 integer :: eblk_start, eblk_end
315 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
317 if (
present(es))
then
323 if (
present(ee))
then
326 eblk_end = coef%msh%nelv
330 call opr_sx_opgrad(ux, uy, uz, u, coef)
340 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
349 subroutine ortho(x, glb_n_points, n)
350 integer,
intent(in) :: n
351 integer(kind=i8),
intent(in) :: glb_n_points
352 real(kind=
rp),
dimension(n),
intent(inout) :: x
357 call neko_log%deprecated(
'Operator: ortho, implicit device', &
358 '2.0.0',
'Please call device_ortho instead.')
364 c =
glsum(x, n) / glb_n_points
381 subroutine cdtp(dtx, x, dr, ds, dt, coef, es, ee)
382 type(
coef_t),
intent(in) :: coef
383 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
384 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
385 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
386 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
387 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
388 integer,
optional :: es, ee
389 integer :: eblk_start, eblk_end
390 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
392 if (
present(es))
then
398 if (
present(ee))
then
401 eblk_end = coef%msh%nelv
405 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
416 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
431 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
432 type(
space_t),
intent(in) :: xh
433 type(
coef_t),
intent(in) :: coef
434 real(kind=
rp),
intent(inout) :: du(xh%lxyz, coef%msh%nelv)
435 real(kind=
rp),
intent(in) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
436 real(kind=
rp),
intent(in) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
437 real(kind=
rp),
intent(in) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
438 real(kind=
rp),
intent(in) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
439 integer,
optional :: es, ee
440 integer :: eblk_end, eblk_start
441 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
443 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
444 if (
present(es))
then
450 if (
present(ee))
then
453 eblk_end = coef%msh%nelv
457 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
468 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
493 type(space_t),
intent(in) :: Xh_GL
494 type(space_t),
intent(in) :: Xh_GLL
495 type(coef_t),
intent(in) :: coef_GLL
496 type(coef_t),
intent(in) :: coef_GL
497 type(interpolator_t),
intent(inout) :: GLL_to_GL
498 real(kind=rp),
intent(inout) :: &
499 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
500 real(kind=rp),
intent(inout) :: &
501 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
502 type(field_t),
intent(inout) :: cr, cs, ct
505 if (neko_bcknd_sx .eq. 1)
then
506 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
507 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
508 else if (neko_bcknd_xsmm .eq. 1)
then
509 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
510 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
511 else if (neko_bcknd_device .eq. 1)
then
512 u_d = device_get_ptr(u)
513 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
514 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
516 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
517 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
532 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
533 type(field_t),
intent(inout) :: w1
534 type(field_t),
intent(inout) :: w2
535 type(field_t),
intent(inout) :: w3
536 type(field_t),
intent(in) :: u1
537 type(field_t),
intent(in) :: u2
538 type(field_t),
intent(in) :: u3
539 type(field_t),
intent(inout) :: work1
540 type(field_t),
intent(inout) :: work2
541 type(coef_t),
intent(in) :: coef
542 type(c_ptr),
optional,
intent(inout) :: event
544 if (neko_bcknd_sx .eq. 1)
then
545 call opr_sx_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
546 work1%x, work2%x, coef)
547 else if (neko_bcknd_xsmm .eq. 1)
then
548 call opr_xsmm_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
549 work1%x, work2%x, coef)
550 else if (neko_bcknd_device .eq. 1)
then
551 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
552 work1, work2, coef, event)
554 call opr_cpu_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
555 work1%x, work2%x, coef)
569 function cfl_r4(dt, u, v, w, Xh, coef, nelv, gdim)
570 real(kind=rp),
intent(in) :: dt
571 real(kind=rp),
contiguous,
dimension(:,:,:,:),
intent(in) :: u, v, w
572 type(space_t),
intent(in) :: xh
573 type(coef_t),
intent(in) :: coef
574 integer,
intent(in) :: nelv, gdim
577 type(c_ptr) :: u_d, v_d, w_d
579 if (neko_bcknd_sx .eq. 1)
then
580 cfl_r4 = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
581 else if (neko_bcknd_device .eq. 1)
then
582 call neko_log%deprecated(
'Operator: cfl_r4, implicit device', &
583 '2.0.0',
'Please call cfl_d instead.')
585 u_d = device_get_ptr(u)
586 v_d = device_get_ptr(v)
587 w_d = device_get_ptr(w)
589 cfl_r4 = opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
591 cfl_r4 = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
594 if (.not. neko_device_mpi)
then
595 call mpi_allreduce(mpi_in_place,
cfl_r4, 1, &
596 mpi_real_precision, mpi_max, neko_comm, ierr)
601 function cfl_d(dt, u_d, v_d, w_d, Xh, coef, nelv, gdim)
602 real(kind=rp),
intent(in) :: dt
603 type(c_ptr),
intent(in) :: u_d, v_d, w_d
604 type(space_t),
intent(in) :: xh
605 type(coef_t),
intent(in) :: coef
606 integer,
intent(in) :: nelv, gdim
607 real(kind=rp) :: cfl_d
610 cfl_d = opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
612 if (.not. neko_device_mpi)
then
613 call mpi_allreduce(mpi_in_place, cfl_d, 1, &
614 mpi_real_precision, mpi_max, neko_comm, ierr)
619 function cfl_f(dt, u, v, w, Xh, coef, nelv, gdim)
620 real(kind=rp),
intent(in) :: dt
621 type(field_t),
intent(in) :: u, v, w
622 type(space_t),
intent(in) :: xh
623 type(coef_t),
intent(in) :: coef
624 integer,
intent(in) :: nelv, gdim
625 real(kind=rp) ::
cfl_f
628 if (neko_bcknd_sx .eq. 1)
then
629 cfl_f = opr_sx_cfl(dt, u%x, v%x, w%x, xh, coef, nelv)
630 else if (neko_bcknd_device .eq. 1)
then
631 cfl_f = opr_device_cfl(dt, u%x_d, v%x_d, w%x_d, xh, coef, nelv, gdim)
633 cfl_f = opr_cpu_cfl(dt, u%x, v%x, w%x, xh, coef, nelv, gdim)
636 if (.not. neko_device_mpi)
then
637 call mpi_allreduce(mpi_in_place,
cfl_f, 1, &
638 mpi_real_precision, mpi_max, neko_comm, ierr)
651 real(kind=rp),
intent(in) :: dt
652 real(kind=rp),
contiguous,
dimension(:,:,:,:),
intent(in) :: max_wave_speed
653 type(space_t),
intent(in) :: xh
654 type(coef_t),
intent(in) :: coef
655 integer,
intent(in) :: nelv, gdim
659 max_wave_speed, xh, coef, nelv, gdim)
671 real(kind=rp),
intent(in) :: dt
672 type(c_ptr),
intent(in) :: max_wave_speed
673 type(space_t),
intent(in) :: xh
674 type(coef_t),
intent(in) :: coef
675 integer,
intent(in) :: nelv, gdim
679 max_wave_speed, xh, coef, nelv, gdim)
691 real(kind=rp),
intent(in) :: dt
692 type(field_t),
intent(in) :: max_wave_speed
693 type(space_t),
intent(in) :: xh
694 type(coef_t),
intent(in) :: coef
695 integer,
intent(in) :: nelv, gdim
699 max_wave_speed, xh, coef, nelv, gdim)
716 real(kind=rp),
contiguous,
intent(inout) :: s11(:,:,:,:)
717 real(kind=rp),
contiguous,
intent(inout) :: s22(:,:,:,:)
718 real(kind=rp),
contiguous,
intent(inout) :: s33(:,:,:,:)
719 real(kind=rp),
contiguous,
intent(inout) :: s12(:,:,:,:)
720 real(kind=rp),
contiguous,
intent(inout) :: s13(:,:,:,:)
721 real(kind=rp),
contiguous,
intent(inout) :: s23(:,:,:,:)
722 real(kind=rp),
contiguous,
intent(in) :: u(:,:,:,:), v(:,:,:,:), w(:,:,:,:)
723 type(coef_t),
intent(in) :: coef
725 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d, u_d, v_d, w_d
727 integer :: nelv, lxyz
732 if (neko_bcknd_device .eq. 1)
then
733 call neko_log%deprecated(
'Operator: strain_rate_r4, implicit device', &
734 '2.0.0',
'Please call strain_rate_d instead.')
735 s11_d = device_get_ptr(s11)
736 s22_d = device_get_ptr(s22)
737 s33_d = device_get_ptr(s33)
738 s12_d = device_get_ptr(s12)
739 s23_d = device_get_ptr(s23)
740 s13_d = device_get_ptr(s13)
741 u_d = device_get_ptr(u)
742 v_d = device_get_ptr(v)
743 w_d = device_get_ptr(w)
745 call dudxyz(s12_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
746 call dudxyz(s11_d, v_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
747 call device_add2(s12_d, s11_d, nelv*lxyz)
749 call dudxyz(s13_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
750 call dudxyz(s11_d, w_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
751 call device_add2(s13_d, s11_d, nelv*lxyz)
753 call dudxyz(s23_d, v_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
754 call dudxyz(s11_d, w_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
755 call device_add2(s23_d, s11_d, nelv*lxyz)
757 call dudxyz(s11_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
758 call dudxyz(s22_d, v_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
759 call dudxyz(s33_d, w_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
760 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
761 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
762 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
764 call dudxyz(s12, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
765 call dudxyz(s11, v, coef%drdx, coef%dsdx, coef%dtdx, coef)
766 call add2(s12, s11, nelv*lxyz)
768 call dudxyz(s13, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
769 call dudxyz(s11, w, coef%drdx, coef%dsdx, coef%dtdx, coef)
770 call add2(s13, s11, nelv*lxyz)
772 call dudxyz(s23, v, coef%drdz, coef%dsdz, coef%dtdz, coef)
773 call dudxyz(s11, w, coef%drdy, coef%dsdy, coef%dtdy, coef)
774 call add2(s23, s11, nelv*lxyz)
776 call dudxyz(s11, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
777 call dudxyz(s22, v, coef%drdy, coef%dsdy, coef%dtdy, coef)
778 call dudxyz(s33, w, coef%drdz, coef%dsdz, coef%dtdz, coef)
779 call cmult(s12, 0.5_rp, nelv*lxyz)
780 call cmult(s13, 0.5_rp, nelv*lxyz)
781 call cmult(s23, 0.5_rp, nelv*lxyz)
800 type(c_ptr),
intent(inout) :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
801 type(c_ptr),
intent(in) :: u_d, v_d, w_d
802 type(coef_t),
intent(in) :: coef
804 integer :: nelv, lxyz
809 call dudxyz(s12_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
810 call dudxyz(s11_d, v_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
811 call device_add2(s12_d, s11_d, nelv*lxyz)
813 call dudxyz(s13_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
814 call dudxyz(s11_d, w_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
815 call device_add2(s13_d, s11_d, nelv*lxyz)
817 call dudxyz(s23_d, v_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
818 call dudxyz(s11_d, w_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
819 call device_add2(s23_d, s11_d, nelv*lxyz)
821 call dudxyz(s11_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
822 call dudxyz(s22_d, v_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
823 call dudxyz(s33_d, w_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
824 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
825 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
826 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
843 type(field_t),
intent(inout) :: s11, s22, s33, s12, s13, s23
844 type(field_t),
intent(in) :: u, v, w
845 type(coef_t),
intent(in) :: coef
848 n = coef%Xh%lxyz * coef%msh%nelv
850 if (neko_bcknd_device .eq. 1)
then
851 call dudxyz(s12%x_d, u%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
852 call dudxyz(s11%x_d, v%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
853 call device_add2(s12%x_d, s11%x_d, n)
855 call dudxyz(s13%x_d, u%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
856 call dudxyz(s11%x_d, w%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
857 call device_add2(s13%x_d, s11%x_d, n)
859 call dudxyz(s23%x_d, v%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
860 call dudxyz(s11%x_d, w%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
861 call device_add2(s23%x_d, s11%x_d, n)
863 call dudxyz(s11%x_d, u%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
864 call dudxyz(s22%x_d, v%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
865 call dudxyz(s33%x_d, w%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
866 call device_cmult(s12%x_d, 0.5_rp, n)
867 call device_cmult(s13%x_d, 0.5_rp, n)
868 call device_cmult(s23%x_d, 0.5_rp, n)
870 call dudxyz(s12%x, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
871 call dudxyz(s11%x, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
872 call add2(s12%x, s11%x, n)
874 call dudxyz(s13%x, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
875 call dudxyz(s11%x, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
876 call add2(s13%x, s11%x, n)
878 call dudxyz(s23%x, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
879 call dudxyz(s11%x, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
880 call add2(s23%x, s11%x, n)
882 call dudxyz(s11%x, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
883 call dudxyz(s22%x, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
884 call dudxyz(s33%x, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
885 call cmult(s12%x, 0.5_rp, n)
886 call cmult(s13%x, 0.5_rp, n)
887 call cmult(s23%x, 0.5_rp, n)
899 type(coef_t),
intent(in) :: coef
900 type(field_t),
intent(inout) ::
lambda2
901 type(field_t),
intent(in) :: u, v, w
903 if (neko_bcknd_sx .eq. 1)
then
904 call opr_sx_lambda2(
lambda2%x, u%x, v%x, w%x, coef)
905 else if (neko_bcknd_device .eq. 1)
then
906 call opr_device_lambda2(
lambda2%x_d, u%x_d, v%x_d, w%x_d, coef)
908 call opr_cpu_lambda2(
lambda2%x, u%x, v%x, w%x, coef)
924 type(space_t),
intent(inout) :: xh
925 type(coef_t),
intent(inout) :: coef
926 type(field_t),
intent(inout) :: cr, cs, ct
927 real(kind=rp),
dimension(Xh%lxyz, coef%msh%nelv), &
928 intent(in) :: cx, cy, cz
929 type(c_ptr) :: cx_d, cy_d, cz_d
931 if (neko_bcknd_sx .eq. 1)
then
932 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
933 else if (neko_bcknd_xsmm .eq. 1)
then
934 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
935 else if (neko_bcknd_device .eq. 1)
then
936 cx_d = device_get_ptr(cx)
937 cy_d = device_get_ptr(cy)
938 cz_d = device_get_ptr(cz)
939 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
940 cx_d, cy_d, cz_d, xh, coef)
942 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
962 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
963 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
964 type(space_t),
intent(in) :: xh_gll
965 type(space_t),
intent(inout) :: xh_gl
966 type(coef_t),
intent(in) :: coef
967 type(coef_t),
intent(inout) :: coef_gl
968 type(interpolator_t) :: gll_to_gl
969 real(kind=rp),
intent(inout) :: tau, dtau
970 integer,
intent(in) :: n, nel, n_gl
971 type(field_t),
intent(inout) :: phi
972 type(field_list_t) :: conv_k1, conv_k23, conv_k4
973 real(kind=rp) :: c1, c2, c3
974 type(field_t),
pointer :: u1, k1, k2, k3, k4
975 type(vector_t),
pointer :: u1_gl
976 integer :: ind(6), i, e
978 call neko_scratch_registry%request_field(u1, ind(1), .false.)
979 call neko_scratch_registry%request_field(k1, ind(2), .false.)
980 call neko_scratch_registry%request_field(k2, ind(3), .false.)
981 call neko_scratch_registry%request_field(k3, ind(4), .false.)
982 call neko_scratch_registry%request_field(k4, ind(5), .false.)
983 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
989 if (neko_bcknd_device .eq. 1)
then
992 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
993 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
995 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
996 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
997 call device_col2(k1%x_d, coef%B_d, n)
1000 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
1001 call device_invcol2(u1%x_d, coef%B_d, n)
1002 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1004 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1005 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1006 call device_col2(k2%x_d, coef%B_d, n)
1009 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
1010 call device_invcol2(u1%x_d, coef%B_d, n)
1011 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1013 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1014 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1015 call device_col2(k3%x_d, coef%B_d, n)
1018 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
1019 call device_invcol2(u1%x_d, coef%B_d, n)
1020 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1022 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
1023 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1024 call device_col2(k4%x_d, coef%B_d, n)
1029 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
1035 call invcol3(u1%x, phi%x, coef%B, n)
1036 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1038 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
1039 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1040 call col2(k1%x, coef%B, n)
1043 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
1044 call invcol2(u1%x, coef%B, n)
1045 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1047 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1048 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1049 call col2(k2%x, coef%B, n)
1052 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
1053 call invcol2(u1%x, coef%B, n)
1054 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1056 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1057 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1058 call col2(k3%x, coef%B, n)
1061 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
1062 call invcol2(u1%x, coef%B, n)
1063 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1065 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
1066 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1067 call col2(k4%x, coef%B, n)
1071 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
1074 call neko_scratch_registry%relinquish(ind)
1079 real(kind=rp),
contiguous,
dimension(:),
intent(inout) :: vx, vy, vz
1080 integer,
intent(in) :: idir
1081 type(coef_t),
intent(in) :: coef
1082 type(c_ptr) :: vx_d, vy_d, vz_d
1084 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1)
then
1085 if (neko_bcknd_device .eq. 1)
then
1086 call neko_log%deprecated(
'Operator: rotate_cyc_r1, implicit device', &
1087 '2.0.0',
'Please call rotate_cyc_d instead.')
1089 vx_d = device_get_ptr(vx)
1090 vy_d = device_get_ptr(vy)
1091 vz_d = device_get_ptr(vz)
1092 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1094 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
1100 real(kind=rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: vx, vy, vz
1101 integer,
intent(in) :: idir
1102 type(coef_t),
intent(in) :: coef
1103 type(c_ptr) :: vx_d, vy_d, vz_d
1105 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1)
then
1106 if (neko_bcknd_device .eq. 1)
then
1107 call neko_log%deprecated(
'Operator: rotate_cyc_r4, implicit device', &
1108 '2.0.0',
'Please call rotate_cyc_d instead.')
1110 vx_d = device_get_ptr(vx)
1111 vy_d = device_get_ptr(vy)
1112 vz_d = device_get_ptr(vz)
1113 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1115 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
1121 type(c_ptr),
intent(inout) :: vx_d, vy_d, vz_d
1122 integer,
intent(in) :: idir
1123 type(coef_t),
intent(in) :: coef
1125 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1)
then
1126 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1131 type(field_t),
intent(inout) :: vx, vy, vz
1132 integer,
intent(in) :: idir
1133 type(coef_t),
intent(in) :: coef
1135 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1)
then
1136 if (neko_bcknd_device .eq. 1)
then
1137 call opr_device_rotate_cyc(vx%x_d, vy%x_d, vz%x_d, idir, coef)
1139 call opr_cpu_rotate_cyc_r4(vx%x, vy%x, vz%x, idir, coef)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Compute CFL condition for compressible flow.
Compute the divergence of a vector field.
Compute derivative of a scalar field along a single direction.
Compute the gradient of a scalar field, multiplied by the mass matrix.
Apply cyclic boundary condition to a vector field.
Compute the strain rate tensor of a vector field.
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public field_rzero(a, n)
Zero a real vector.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
type(log_t), public neko_log
Global log stream.
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public invcol2(a, b, n)
Vector division .
subroutine, public cadd(a, s, n)
Add a scalar to vector .
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public invcol3(a, b, c, n)
Invert a vector .
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
integer, parameter, public rp
Global precision used in computations.
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine strain_rate_d(s11_d, s22_d, s33_d, s12_d, s13_d, s23_d, u_d, v_d, w_d, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine dudxyz_r4(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
real(kind=rp) function cfl_compressible_r4(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine div_r4(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine grad_r4(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine strain_rate_r4(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function cfl_compressible_d(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine grad_d(ux_d, uy_d, uz_d, u_d, coef)
Compute the gradient of a scalar field.
real(kind=rp) function cfl_compressible_f(dt, max_wave_speed, xh, coef, nelv, gdim)
real(kind=rp) function cfl_f(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine div_d(res_d, ux_d, uy_d, uz_d, coef)
Compute the divergence of a vector field.
real(kind=rp) function cfl_r4(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine strain_rate_f(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine rotate_cyc_d(vx_d, vy_d, vz_d, idir, coef)
subroutine dudxyz_f(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine rotate_cyc_f(vx, vy, vz, idir, coef)
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
subroutine, public opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
real(kind=rp) function, public opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
subroutine, public opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
subroutine, public opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
subroutine, public opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
Operators SX-Aurora backend.
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Operators libxsmm backend.
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.