d6/d4c/opr__cpu_8f90_source.html

! Copyright (c) 2021-2024, The Neko Authors

! All rights reserved.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

!   * Redistributions of source code must retain the above copyright

!     notice, this list of conditions and the following disclaimer.

!

!   * Redistributions in binary form must reproduce the above

!     copyright notice, this list of conditions and the following

!     disclaimer in the documentation and/or other materials provided

!     with the distribution.

!

!   * Neither the name of the authors nor the names of its

!     contributors may be used to endorse or promote products derived

!     from this software without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

! POSSIBILITY OF SUCH DAMAGE.

!

module opr_cpu

  use num_types, only : rp, dp, xp

  use space, only : space_t

  use coefs, only : coef_t

  use math, only : sub3, copy, rzero, pi

  use field, only : field_t

  use gather_scatter, only : gs_op_add

  use interpolation, only : interpolator_t

  use mathops, only : opcolv

  implicit none

  private


  public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &

       opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2, &

       opr_cpu_convect_scalar, opr_cpu_set_convect_rst


  interface

     module subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)

       type(coef_t), intent(in), target :: coef

       real(kind=rp), intent(inout), &

            dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: du

       real(kind=rp), intent(in), &

            dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &

            u, dr, ds, dt

     end subroutine opr_cpu_dudxyz


     module subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)

       type(coef_t), intent(in) :: coef

       integer, intent(in) :: e_start, e_end

       real(kind=rp), intent(inout) :: ux(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: uy(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: uz(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(in) :: u(coef%Xh%lxyz, e_end - e_start + 1)

     end subroutine opr_cpu_opgrad


     module subroutine opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, e_start, e_end)

       type(coef_t), intent(in) :: coef

       integer, intent(in) :: e_start, e_end

       real(kind=rp), intent(inout) :: dtx(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: x(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(in) :: dr(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(in) :: ds(coef%Xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(in) :: dt(coef%Xh%lxyz, e_end - e_start + 1)

     end subroutine opr_cpu_cdtp


     module subroutine opr_cpu_conv1(du, u, vx, vy, vz, xh, &

          coef, e_start, e_end)

       type(space_t), intent(in) :: Xh

       type(coef_t), intent(in) :: coef

       integer, intent(in) :: e_start, e_end

       real(kind=rp), intent(inout) :: du(xh%lxyz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: &

            u(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: &

            vx(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: &

            vy(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)

       real(kind=rp), intent(inout) :: &

            vz(xh%lx, xh%ly, xh%lz, e_end - e_start + 1)

     end subroutine opr_cpu_conv1


     module subroutine opr_cpu_convect_scalar(du, u, c, xh_gll, xh_gl, &

                                              coef_gll, coef_gl, gll_to_gl)

        type(space_t), intent(in) :: Xh_GL

        type(space_t), intent(in) :: Xh_GLL

        type(coef_t), intent(in) :: coef_GLL

        type(coef_t), intent(in) :: coef_GL

        type(interpolator_t), intent(inout) :: GLL_to_GL

        real(kind=rp), intent(inout) :: &

                   du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)

        real(kind=rp), intent(inout) :: &

                   u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)

        real(kind=rp), intent(inout) :: c(xh_gl%lxyz, coef_gl%msh%nelv, 3)


      end subroutine opr_cpu_convect_scalar


      module subroutine opr_cpu_set_convect_rst(cr, cs, ct, cx, cy, cz, &

                                                xh, coef)

         type(space_t), intent(inout) :: Xh

         type(coef_t), intent(inout) :: coef

         real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &

                        intent(inout) :: cr, cs, ct

         real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &

                        intent(in) :: cx, cy, cz

       end subroutine opr_cpu_set_convect_rst

  end interface


contains


  subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)

    type(field_t), intent(inout) :: w1

    type(field_t), intent(inout) :: w2

    type(field_t), intent(inout) :: w3

    type(field_t), intent(in) :: u1

    type(field_t), intent(in) :: u2

    type(field_t), intent(in) :: u3

    type(field_t), intent(inout) :: work1

    type(field_t), intent(inout) :: work2

    type(coef_t), intent(in) :: c_xh

    integer :: gdim, n


    n = w1%dof%size()

    gdim = c_xh%msh%gdim


    !     this%work1=dw/dy ; this%work2=dv/dz

    call opr_cpu_dudxyz(work1%x, u3%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)

    if (gdim .eq. 3) then

       call opr_cpu_dudxyz(work2%x, u2%x, c_xh%drdz, c_xh%dsdz, &

                           c_xh%dtdz, c_xh)

       call sub3(w1%x, work1%x, work2%x, n)

    else

       call copy(w1%x, work1%x, n)

    end if

    !     this%work1=du/dz ; this%work2=dw/dx

    if (gdim .eq. 3) then

       call opr_cpu_dudxyz(work1%x, u1%x, c_xh%drdz, c_xh%dsdz, &

                           c_xh%dtdz, c_xh)

       call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &

                           c_xh%dtdx, c_xh)

       call sub3(w2%x, work1%x, work2%x, n)

    else

       call rzero(work1%x, n)

       call opr_cpu_dudxyz(work2%x, u3%x, c_xh%drdx, c_xh%dsdx, &

                           c_xh%dtdx, c_xh)

       call sub3(w2%x, work1%x, work2%x, n)

    end if

    !     this%work1=dv/dx ; this%work2=du/dy

    call opr_cpu_dudxyz(work1%x, u2%x, c_xh%drdx, c_xh%dsdx, c_xh%dtdx, c_xh)

    call opr_cpu_dudxyz(work2%x, u1%x, c_xh%drdy, c_xh%dsdy, c_xh%dtdy, c_xh)

    call sub3(w3%x, work1%x, work2%x, n)

    !!    BC dependent, Needs to change if cyclic


    call opcolv(w1%x, w2%x, w3%x, c_xh%B, gdim, n)

    call c_xh%gs_h%op(w1, gs_op_add)

    call c_xh%gs_h%op(w2, gs_op_add)

    call c_xh%gs_h%op(w3, gs_op_add)

    call opcolv(w1%x, w2%x, w3%x, c_xh%Binv, gdim, n)


  subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) …

  end subroutine opr_cpu_curl


  function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)

    type(space_t) :: xh

    type(coef_t) :: coef

    integer :: nelv, gdim

    real(kind=rp) :: dt

    real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w

    real(kind=rp) :: cflr, cfls, cflt, cflm

    real(kind=rp) :: ur, us, ut

    real(kind=rp) :: cfl

    integer :: i, j, k, e

    cfl = 0d0

    if (gdim .eq. 3) then

       do e = 1, nelv

          do k = 1, xh%lz

             do j = 1, xh%ly

                do i = 1, xh%lx

                   ur = ( u(i,j,k,e)*coef%drdx(i,j,k,e) &

                        + v(i,j,k,e)*coef%drdy(i,j,k,e) &

                        + w(i,j,k,e)*coef%drdz(i,j,k,e) ) &

                        * coef%jacinv(i,j,k,e)

                   us = ( u(i,j,k,e)*coef%dsdx(i,j,k,e) &

                        + v(i,j,k,e)*coef%dsdy(i,j,k,e) &

                        + w(i,j,k,e)*coef%dsdz(i,j,k,e) ) &

                        * coef%jacinv(i,j,k,e)

                   ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &

                        + v(i,j,k,e)*coef%dtdy(i,j,k,e) &

                        + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) &

                        * coef%jacinv(i,j,k,e)


                   cflr = abs(dt*ur*xh%dr_inv(i))

                   cfls = abs(dt*us*xh%ds_inv(j))

                   cflt = abs(dt*ut*xh%dt_inv(k))


                   cflm = cflr + cfls + cflt

                   cfl = max(cfl, cflm)

                end do

             end do

          end do

       end do

    else

       do e = 1, nelv

          do j = 1, xh%ly

             do i = 1, xh%lx

                ur = ( u(i,j,1,e)*coef%drdx(i,j,1,e) &

                     + v(i,j,1,e)*coef%drdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)

                us = ( u(i,j,1,e)*coef%dsdx(i,j,1,e) &

                     + v(i,j,1,e)*coef%dsdy(i,j,1,e) ) * coef%jacinv(i,j,1,e)


                cflr = abs(dt*ur*xh%dr_inv(i))

                cfls = abs(dt*us*xh%ds_inv(j))


                cflm = cflr + cfls

                cfl = max(cfl, cflm)


             end do

          end do

       end do

    end if

  function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) …

  end function opr_cpu_cfl


  subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)

    type(coef_t), intent(in) :: coef

    type(field_t), intent(inout) :: lambda2

    type(field_t), intent(in) :: u, v, w

    real(kind=rp) :: grad(coef%Xh%lxyz,3,3)

    integer :: e, i

    real(kind=xp) :: eigen(3), b, c, d, q, r, theta, l2

    real(kind=xp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23

    real(kind=xp) :: a11, a22, a33, a12, a13, a23

    real(kind=xp) :: msk1, msk2, msk3


    do e = 1, coef%msh%nelv

       call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &

            u%x(1,1,1,e), coef,e,e)

       call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &

            v%x(1,1,1,e), coef,e,e)

       call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &

            w%x(1,1,1,e), coef,e,e)


       do i = 1, coef%Xh%lxyz

          s11 = grad(i,1,1)

          s22 = grad(i,2,2)

          s33 = grad(i,3,3)


          s12 = 0.5_xp*(grad(i,1,2) + grad(i,2,1))

          s13 = 0.5_xp*(grad(i,1,3) + grad(i,3,1))

          s23 = 0.5_xp*(grad(i,2,3) + grad(i,3,2))


          o12 = 0.5_xp*(grad(i,1,2) - grad(i,2,1))

          o13 = 0.5_xp*(grad(i,1,3) - grad(i,3,1))

          o23 = 0.5_xp*(grad(i,2,3) - grad(i,3,2))


          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13

          a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23

          a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23


          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23

          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13

          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23


          b = -(a11 + a22 + a33)

          c = -(a12*a12 + a13*a13 + a23*a23 &

               - a11 * a22 - a11 * a33 - a22 * a33)

          d = -(2.0_xp * a12 * a13 * a23 - a11 * a23*a23 &

               - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33)


          q = (3.0_xp * c - b*b) / 9.0_xp

          r = (9.0_xp * c * b - 27.0_xp * d - 2.0_xp * b*b*b) / 54.0_xp

          theta = acos( r / sqrt(-q*q*q) )


          eigen(1) = 2.0_xp * sqrt(-q) * cos(theta / 3.0_xp) - b / 3.0_xp

          eigen(2) = 2.0_xp * sqrt(-q) * cos((theta + 2.0_xp * pi) / 3.0_xp) - b / 3.0_xp

          eigen(3) = 2.0_xp * sqrt(-q) * cos((theta + 4.0_xp * pi) / 3.0_xp) - b / 3.0_xp

          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &

               .and. eigen(1) .le. eigen(3) .or. eigen(3) &

               .le. eigen(1) .and. eigen(1) .le. eigen(2) )

          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &

               .and. eigen(2) .le. eigen(3) .or. eigen(3) &

               .le. eigen(2) .and. eigen(2) .le. eigen(1))

          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &

               .and. eigen(3) .le. eigen(2) .or. eigen(2) &

               .le. eigen(3) .and. eigen(3) .le. eigen(1))


          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)

          lambda2%x(i,1,1,e) = l2/(real(coef%B(i,1,1,e)**2,xp))

       end do

    end do


  subroutine opr_cpu_lambda2(lambda2, u, v, w, coef) …

  end subroutine opr_cpu_lambda2


end module opr_cpu

real
double real
Definition device_config.h:12

coefs
Coefficients.
Definition coef.f90:34

field
Defines a field.
Definition field.f90:34

gather_scatter
Gather-scatter.
Definition gather_scatter.f90:34

interpolation
Routines to interpolate between different spaces.
Definition interpolation.f90:34

lambda2
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37

math
Definition math.f90:60

math::pi
real(kind=rp), parameter, public pi
Definition math.f90:75

math::sub3
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:641

math::copy
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238

math::rzero
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194

mathops
Collection of vector field operations operating on  and . Note that in general the indices  and ....
Definition mathops.f90:65

mathops::opcolv
subroutine, public opcolv(a1, a2, a3, c, gdim, n)
Definition mathops.f90:97

num_types
Definition num_types.f90:1

num_types::xp
integer, parameter, public xp
Definition num_types.f90:14

num_types::dp
integer, parameter, public dp
Definition num_types.f90:9

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

opr_cpu
Operators CPU backend.
Definition opr_cpu.f90:34

opr_cpu::opr_cpu_curl
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:124

opr_cpu::opr_cpu_cfl
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:175

opr_cpu::opr_cpu_lambda2
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:235

space
Defines a function space.
Definition space.f90:34

coefs::coef_t
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55

field::field_t
Definition field.f90:47

interpolation::interpolator_t
Interpolation between two space::space_t.
Definition interpolation.f90:53

space::space_t
The function space for the SEM solution fields.
Definition space.f90:62

max
#define max(a, b)
Definition tensor.cu:40