d6/ddf/gs__mpi_8f90_source.html

! Copyright (c) 2020-2022, The Neko Authors

! All rights reserved.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

!   * Redistributions of source code must retain the above copyright

!     notice, this list of conditions and the following disclaimer.

!

!   * Redistributions in binary form must reproduce the above

!     copyright notice, this list of conditions and the following

!     disclaimer in the documentation and/or other materials provided

!     with the distribution.

!

!   * Neither the name of the authors nor the names of its

!     contributors may be used to endorse or promote products derived

!     from this software without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

! POSSIBILITY OF SUCH DAMAGE.

!

module gs_mpi

  use num_types, only : rp

  use gs_comm, only : gs_comm_t, gs_comm_mpi, gs_comm_mpigpu

  use gs_ops, only : gs_op_add, gs_op_max, gs_op_min, gs_op_mul

  use stack, only : stack_i4_t

  use comm

  use, intrinsic :: iso_c_binding

  !$ use omp_lib

  implicit none

  private


  type, private :: gs_comm_mpi_t

     type(mpi_status) :: status

     type(mpi_request) :: request

     logical :: flag

     real(kind=rp), allocatable :: data(:)

  end type gs_comm_mpi_t


  type, public, extends(gs_comm_t) :: gs_mpi_t

     type(gs_comm_mpi_t), allocatable :: send_buf(:)

     type(gs_comm_mpi_t), allocatable :: recv_buf(:)

   contains

     procedure, pass(this) :: init => gs_mpi_init

     procedure, pass(this) :: free => gs_mpi_free

     procedure, pass(this) :: nbsend => gs_nbsend_mpi

     procedure, pass(this) :: nbrecv => gs_nbrecv_mpi

     procedure, pass(this) :: nbwait => gs_nbwait_mpi

  end type gs_mpi_t


contains


  subroutine gs_mpi_init(this, send_pe, recv_pe)

    class(gs_mpi_t), intent(inout) :: this

    type(stack_i4_t), intent(inout) :: send_pe

    type(stack_i4_t), intent(inout) :: recv_pe

    integer, pointer :: sp(:), rp(:)

    integer :: i


    call this%init_order(send_pe, recv_pe)


    allocate(this%send_buf(send_pe%size()))


    sp => send_pe%array()

    do i = 1, send_pe%size()

       allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))

    end do


    allocate(this%recv_buf(recv_pe%size()))


    rp => recv_pe%array()

    do i = 1, recv_pe%size()

       allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))

    end do


  end subroutine gs_mpi_init


  subroutine gs_mpi_free(this)

    class(gs_mpi_t), intent(inout) :: this

    integer :: i


    if (allocated(this%send_buf)) then

       do i = 1, size(this%send_buf)

          if (allocated(this%send_buf(i)%data)) then

             deallocate(this%send_buf(i)%data)

          end if

       end do

       deallocate(this%send_buf)

    end if


    if (allocated(this%recv_buf)) then

       do i = 1, size(this%recv_buf)

          if (allocated(this%recv_buf(i)%data)) then

             deallocate(this%recv_buf(i)%data)

          end if

       end do

       deallocate(this%recv_buf)

    end if


    call this%free_order()

    call this%free_dofs()


  end subroutine gs_mpi_free


  subroutine gs_nbsend_mpi(this, u, n, deps, strm)

    class(gs_mpi_t), intent(inout) :: this

    integer, intent(in) :: n

    real(kind=rp), dimension(n), intent(inout) :: u

    type(c_ptr), intent(inout) :: deps

    type(c_ptr), intent(inout) :: strm

    integer ::  i, j, ierr, dst, thrdid

    integer , pointer :: sp(:)


    thrdid = 0

    !$ thrdid = omp_get_thread_num()


    do i = 1, size(this%send_pe)

       dst = this%send_pe(i)

       sp => this%send_dof(dst)%array()

       do concurrent(j = 1:this%send_dof(dst)%size())

          this%send_buf(i)%data(j) = u(sp(j))

       end do

       ! We should not need this extra associate block, ant it works

       ! great without it for GNU, Intel, NEC and Cray, but throws an

       ! ICE with NAG.

       associate(send_data => this%send_buf(i)%data)

         call mpi_isend(send_data, size(send_data), &

              mpi_real_precision, this%send_pe(i), thrdid, &

              neko_comm, this%send_buf(i)%request, ierr)

       end associate

       this%send_buf(i)%flag = .false.

    end do


  end subroutine gs_nbsend_mpi


  subroutine gs_nbrecv_mpi(this)

    class(gs_mpi_t), intent(inout) :: this

    integer :: i, ierr, thrdid


    thrdid = 0

    !$ thrdid = omp_get_thread_num()


    do i = 1, size(this%recv_pe)

       ! We should not need this extra associate block, ant it works

       ! great without it for GNU, Intel, NEC and Cray, but throws an

       ! ICE with NAG.

       associate(recv_data => this%recv_buf(i)%data)

         call mpi_irecv(recv_data, size(recv_data), &

              mpi_real_precision, this%recv_pe(i), thrdid, &

              neko_comm, this%recv_buf(i)%request, ierr)

       end associate

       this%recv_buf(i)%flag = .false.

    end do


  end subroutine gs_nbrecv_mpi


  subroutine gs_nbwait_mpi(this, u, n, op, strm)

    class(gs_mpi_t), intent(inout) :: this

    integer, intent(in) :: n

    real(kind=rp), dimension(n), intent(inout) :: u

    type(c_ptr), intent(inout) :: strm

    integer :: i, j, src, ierr

    integer :: op

    integer , pointer :: sp(:)

    integer :: nreqs


    nreqs = size(this%recv_pe)


    do while (nreqs .gt. 0)

       do i = 1, size(this%recv_pe)

          if (.not. this%recv_buf(i)%flag) then

             call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &

                  this%recv_buf(i)%status, ierr)

             if (this%recv_buf(i)%flag) then

                nreqs = nreqs - 1

                src = this%recv_pe(i)

                sp => this%recv_dof(src)%array()

                select case(op)

                case (gs_op_add)

                   !NEC$ IVDEP

                   do concurrent(j = 1:this%send_dof(src)%size())

                      u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)

                   end do

                case (gs_op_mul)

                   !NEC$ IVDEP

                   do concurrent(j = 1:this%send_dof(src)%size())

                      u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)

                   end do

                case (gs_op_min)

                   !NEC$ IVDEP

                   do concurrent(j = 1:this%send_dof(src)%size())

                      u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))

                   end do

                case (gs_op_max)

                   !NEC$ IVDEP

                   do concurrent(j = 1:this%send_dof(src)%size())

                      u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))

                   end do

                end select

             end if

          end if

       end do

    end do


    nreqs = size(this%send_pe)

    do while (nreqs .gt. 0)

       do i = 1, size(this%send_pe)

          if (.not. this%send_buf(i)%flag) then

             call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &

                  mpi_status_ignore, ierr)

             if (this%send_buf(i)%flag) nreqs = nreqs - 1

          end if

       end do

    end do


  end subroutine gs_nbwait_mpi


end module gs_mpi

comm
Definition comm.F90:1

comm::neko_comm
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16

comm::mpi_real_precision
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23

gs_comm
Defines a gather-scatter communication method.
Definition gs_comm.f90:34

gs_comm::gs_comm_mpigpu
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42

gs_comm::gs_comm_mpi
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42

gs_mpi
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34

gs_mpi::gs_nbwait_mpi
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition gs_mpi.f90:176

gs_mpi::gs_nbrecv_mpi
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition gs_mpi.f90:154

gs_mpi::gs_mpi_init
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method.
Definition gs_mpi.f90:69

gs_mpi::gs_mpi_free
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition gs_mpi.f90:95

gs_mpi::gs_nbsend_mpi
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition gs_mpi.f90:123

gs_ops
Defines Gather-scatter operations.
Definition gs_ops.f90:34

gs_ops::gs_op_add
integer, parameter, public gs_op_add
Definition gs_ops.f90:36

gs_ops::gs_op_max
integer, parameter, public gs_op_max
Definition gs_ops.f90:36

gs_ops::gs_op_min
integer, parameter, public gs_op_min
Definition gs_ops.f90:36

gs_ops::gs_op_mul
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

stack
Implements a dynamic stack ADT.
Definition stack.f90:35

gs_comm::gs_comm_t
Gather-scatter communication method.
Definition gs_comm.f90:45

gs_mpi::gs_comm_mpi_t
MPI buffer for non-blocking operations.
Definition gs_mpi.f90:46

gs_mpi::gs_mpi_t
Gather-scatter communication using MPI.
Definition gs_mpi.f90:54

stack::stack_i4_t
Integer based stack.
Definition stack.f90:63

max
#define max(a, b)
Definition tensor.cu:40