Neko  0.9.0
A portable framework for high-order spectral element flow simulations
gs_mpi.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module gs_mpi
35  use num_types, only : rp
38  use stack, only : stack_i4_t
39  use comm
40  use, intrinsic :: iso_c_binding
41  !$ use omp_lib
42  implicit none
43  private
44 
46  type, private :: gs_comm_mpi_t
47  type(mpi_status) :: status
48  type(mpi_request) :: request
49  logical :: flag
50  real(kind=rp), allocatable :: data(:)
51  end type gs_comm_mpi_t
52 
54  type, public, extends(gs_comm_t) :: gs_mpi_t
55  type(gs_comm_mpi_t), allocatable :: send_buf(:)
56  type(gs_comm_mpi_t), allocatable :: recv_buf(:)
57  contains
58  procedure, pass(this) :: init => gs_mpi_init
59  procedure, pass(this) :: free => gs_mpi_free
60  procedure, pass(this) :: nbsend => gs_nbsend_mpi
61  procedure, pass(this) :: nbrecv => gs_nbrecv_mpi
62  procedure, pass(this) :: nbwait => gs_nbwait_mpi
63  end type gs_mpi_t
64 
65 contains
66 
68  subroutine gs_mpi_init(this, send_pe, recv_pe)
69  class(gs_mpi_t), intent(inout) :: this
70  type(stack_i4_t), intent(inout) :: send_pe
71  type(stack_i4_t), intent(inout) :: recv_pe
72  integer, pointer :: sp(:), rp(:)
73  integer :: i
74 
75  call this%init_order(send_pe, recv_pe)
76 
77  allocate(this%send_buf(send_pe%size()))
78 
79  sp => send_pe%array()
80  do i = 1, send_pe%size()
81  allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))
82  end do
83 
84  allocate(this%recv_buf(recv_pe%size()))
85 
86  rp => recv_pe%array()
87  do i = 1, recv_pe%size()
88  allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))
89  end do
90 
91  end subroutine gs_mpi_init
92 
94  subroutine gs_mpi_free(this)
95  class(gs_mpi_t), intent(inout) :: this
96  integer :: i
97 
98  if (allocated(this%send_buf)) then
99  do i = 1, size(this%send_buf)
100  if (allocated(this%send_buf(i)%data)) then
101  deallocate(this%send_buf(i)%data)
102  end if
103  end do
104  deallocate(this%send_buf)
105  end if
106 
107  if (allocated(this%recv_buf)) then
108  do i = 1, size(this%recv_buf)
109  if (allocated(this%recv_buf(i)%data)) then
110  deallocate(this%recv_buf(i)%data)
111  end if
112  end do
113  deallocate(this%recv_buf)
114  end if
115 
116  call this%free_order()
117  call this%free_dofs()
118 
119  end subroutine gs_mpi_free
120 
122  subroutine gs_nbsend_mpi(this, u, n, deps, strm)
123  class(gs_mpi_t), intent(inout) :: this
124  integer, intent(in) :: n
125  real(kind=rp), dimension(n), intent(inout) :: u
126  type(c_ptr), intent(inout) :: deps
127  type(c_ptr), intent(inout) :: strm
128  integer :: i, j, ierr, dst, thrdid
129  integer , pointer :: sp(:)
130 
131  thrdid = 0
132  !$ thrdid = omp_get_thread_num()
133 
134  do i = 1, size(this%send_pe)
135  dst = this%send_pe(i)
136  sp => this%send_dof(dst)%array()
137  do concurrent(j = 1:this%send_dof(dst)%size())
138  this%send_buf(i)%data(j) = u(sp(j))
139  end do
140  ! We should not need this extra associate block, ant it works
141  ! great without it for GNU, Intel, NEC and Cray, but throws an
142  ! ICE with NAG.
143  associate(send_data => this%send_buf(i)%data)
144  call mpi_isend(send_data, size(send_data), &
145  mpi_real_precision, this%send_pe(i), thrdid, &
146  neko_comm, this%send_buf(i)%request, ierr)
147  end associate
148  this%send_buf(i)%flag = .false.
149  end do
150  end subroutine gs_nbsend_mpi
151 
153  subroutine gs_nbrecv_mpi(this)
154  class(gs_mpi_t), intent(inout) :: this
155  integer :: i, ierr, thrdid
156 
157  thrdid = 0
158  !$ thrdid = omp_get_thread_num()
159 
160  do i = 1, size(this%recv_pe)
161  ! We should not need this extra associate block, ant it works
162  ! great without it for GNU, Intel, NEC and Cray, but throws an
163  ! ICE with NAG.
164  associate(recv_data => this%recv_buf(i)%data)
165  call mpi_irecv(recv_data, size(recv_data), &
166  mpi_real_precision, this%recv_pe(i), thrdid, &
167  neko_comm, this%recv_buf(i)%request, ierr)
168  end associate
169  this%recv_buf(i)%flag = .false.
170  end do
171 
172  end subroutine gs_nbrecv_mpi
173 
175  subroutine gs_nbwait_mpi(this, u, n, op, strm)
176  class(gs_mpi_t), intent(inout) :: this
177  integer, intent(in) :: n
178  real(kind=rp), dimension(n), intent(inout) :: u
179  type(c_ptr), intent(inout) :: strm
180  integer :: i, j, src, ierr
181  integer :: op
182  integer , pointer :: sp(:)
183  integer :: nreqs
184 
185  nreqs = size(this%recv_pe)
186 
187  do while (nreqs .gt. 0)
188  do i = 1, size(this%recv_pe)
189  if (.not. this%recv_buf(i)%flag) then
190  call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
191  this%recv_buf(i)%status, ierr)
192  if (this%recv_buf(i)%flag) then
193  nreqs = nreqs - 1
195  src = this%recv_pe(i)
196  sp => this%recv_dof(src)%array()
197  select case(op)
198  case (gs_op_add)
199  !NEC$ IVDEP
200  do concurrent(j = 1:this%send_dof(src)%size())
201  u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)
202  end do
203  case (gs_op_mul)
204  !NEC$ IVDEP
205  do concurrent(j = 1:this%send_dof(src)%size())
206  u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)
207  end do
208  case (gs_op_min)
209  !NEC$ IVDEP
210  do concurrent(j = 1:this%send_dof(src)%size())
211  u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))
212  end do
213  case (gs_op_max)
214  !NEC$ IVDEP
215  do concurrent(j = 1:this%send_dof(src)%size())
216  u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))
217  end do
218  end select
219  end if
220  end if
221  end do
222  end do
223 
224  nreqs = size(this%send_pe)
225  do while (nreqs .gt. 0)
226  do i = 1, size(this%send_pe)
227  if (.not. this%send_buf(i)%flag) then
228  call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &
229  mpi_status_ignore, ierr)
230  if (this%send_buf(i)%flag) nreqs = nreqs - 1
231  end if
232  end do
233  end do
234 
235  end subroutine gs_nbwait_mpi
236 
237 end module gs_mpi
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:23
Defines a gather-scatter communication method.
Definition: gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition: gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition: gs_comm.f90:42
Defines MPI gather-scatter communication.
Definition: gs_mpi.f90:34
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition: gs_mpi.f90:176
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition: gs_mpi.f90:154
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method.
Definition: gs_mpi.f90:69
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition: gs_mpi.f90:95
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition: gs_mpi.f90:123
Defines Gather-scatter operations.
Definition: gs_ops.f90:34
integer, parameter, public gs_op_add
Definition: gs_ops.f90:36
integer, parameter, public gs_op_max
Definition: gs_ops.f90:36
integer, parameter, public gs_op_min
Definition: gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition: gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Implements a dynamic stack ADT.
Definition: stack.f90:35
Gather-scatter communication method.
Definition: gs_comm.f90:45
MPI buffer for non-blocking operations.
Definition: gs_mpi.f90:46
Gather-scatter communication using MPI.
Definition: gs_mpi.f90:54
Integer based stack.
Definition: stack.f90:63
#define max(a, b)
Definition: tensor.cu:40