Neko  0.8.1
A portable framework for high-order spectral element flow simulations
gs_mpi.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module gs_mpi
35  use neko_config
36  use num_types
37  use gs_comm
38  use gs_ops
39  use stack, only : stack_i4_t
40  use comm
41  use, intrinsic :: iso_c_binding
42  !$ use omp_lib
43  implicit none
44  private
45 
47  type, private :: gs_comm_mpi_t
48  type(mpi_status) :: status
49  type(mpi_request) :: request
50  logical :: flag
51  real(kind=rp), allocatable :: data(:)
52  end type gs_comm_mpi_t
53 
55  type, public, extends(gs_comm_t) :: gs_mpi_t
56  type(gs_comm_mpi_t), allocatable :: send_buf(:)
57  type(gs_comm_mpi_t), allocatable :: recv_buf(:)
58  contains
59  procedure, pass(this) :: init => gs_mpi_init
60  procedure, pass(this) :: free => gs_mpi_free
61  procedure, pass(this) :: nbsend => gs_nbsend_mpi
62  procedure, pass(this) :: nbrecv => gs_nbrecv_mpi
63  procedure, pass(this) :: nbwait => gs_nbwait_mpi
64  end type gs_mpi_t
65 
66 contains
67 
69  subroutine gs_mpi_init(this, send_pe, recv_pe)
70  class(gs_mpi_t), intent(inout) :: this
71  type(stack_i4_t), intent(inout) :: send_pe
72  type(stack_i4_t), intent(inout) :: recv_pe
73  integer, pointer :: sp(:), rp(:)
74  integer :: i
75 
76  call this%init_order(send_pe, recv_pe)
77 
78  allocate(this%send_buf(send_pe%size()))
79 
80  sp => send_pe%array()
81  do i = 1, send_pe%size()
82  allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))
83  end do
84 
85  allocate(this%recv_buf(recv_pe%size()))
86 
87  rp => recv_pe%array()
88  do i = 1, recv_pe%size()
89  allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))
90  end do
91 
92  end subroutine gs_mpi_init
93 
95  subroutine gs_mpi_free(this)
96  class(gs_mpi_t), intent(inout) :: this
97  integer :: i
98 
99  if (allocated(this%send_buf)) then
100  do i = 1, size(this%send_buf)
101  if (allocated(this%send_buf(i)%data)) then
102  deallocate(this%send_buf(i)%data)
103  end if
104  end do
105  deallocate(this%send_buf)
106  end if
107 
108  if (allocated(this%recv_buf)) then
109  do i = 1, size(this%recv_buf)
110  if (allocated(this%recv_buf(i)%data)) then
111  deallocate(this%recv_buf(i)%data)
112  end if
113  end do
114  deallocate(this%recv_buf)
115  end if
116 
117  call this%free_order()
118  call this%free_dofs()
119 
120  end subroutine gs_mpi_free
121 
123  subroutine gs_nbsend_mpi(this, u, n, deps, strm)
124  class(gs_mpi_t), intent(inout) :: this
125  integer, intent(in) :: n
126  real(kind=rp), dimension(n), intent(inout) :: u
127  type(c_ptr), intent(inout) :: deps
128  type(c_ptr), intent(inout) :: strm
129  integer :: i, j, ierr, dst, thrdid
130  integer , pointer :: sp(:)
131 
132  thrdid = 0
133 !$ thrdid = omp_get_thread_num()
134 
135  do i = 1, size(this%send_pe)
136  dst = this%send_pe(i)
137  sp => this%send_dof(dst)%array()
138  do j = 1, this%send_dof(dst)%size()
139  this%send_buf(i)%data(j) = u(sp(j))
140  end do
141  ! We should not need this extra associate block, ant it works
142  ! great without it for GNU, Intel, NEC and Cray, but throws an
143  ! ICE with NAG.
144  associate(send_data => this%send_buf(i)%data)
145  call mpi_isend(send_data, size(send_data), &
146  mpi_real_precision, this%send_pe(i), thrdid, &
147  neko_comm, this%send_buf(i)%request, ierr)
148  end associate
149  this%send_buf(i)%flag = .false.
150  end do
151  end subroutine gs_nbsend_mpi
152 
154  subroutine gs_nbrecv_mpi(this)
155  class(gs_mpi_t), intent(inout) :: this
156  integer :: i, ierr, thrdid
157 
158  thrdid = 0
159  !$ thrdid = omp_get_thread_num()
160 
161  do i = 1, size(this%recv_pe)
162  ! We should not need this extra associate block, ant it works
163  ! great without it for GNU, Intel, NEC and Cray, but throws an
164  ! ICE with NAG.
165  associate(recv_data => this%recv_buf(i)%data)
166  call mpi_irecv(recv_data, size(recv_data), &
167  mpi_real_precision, this%recv_pe(i), thrdid, &
168  neko_comm, this%recv_buf(i)%request, ierr)
169  end associate
170  this%recv_buf(i)%flag = .false.
171  end do
172 
173  end subroutine gs_nbrecv_mpi
174 
176  subroutine gs_nbwait_mpi(this, u, n, op, strm)
177  class(gs_mpi_t), intent(inout) :: this
178  integer, intent(in) :: n
179  real(kind=rp), dimension(n), intent(inout) :: u
180  type(c_ptr), intent(inout) :: strm
181  integer :: i, j, src, ierr
182  integer :: op
183  integer , pointer :: sp(:)
184  integer :: nreqs
185 
186  nreqs = size(this%recv_pe)
187 
188  do while (nreqs .gt. 0)
189  do i = 1, size(this%recv_pe)
190  if (.not. this%recv_buf(i)%flag) then
191  call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
192  this%recv_buf(i)%status, ierr)
193  if (this%recv_buf(i)%flag) then
194  nreqs = nreqs - 1
196  src = this%recv_pe(i)
197  sp => this%recv_dof(src)%array()
198  select case(op)
199  case (gs_op_add)
200  !NEC$ IVDEP
201  do j = 1, this%send_dof(src)%size()
202  u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)
203  end do
204  case (gs_op_mul)
205  !NEC$ IVDEP
206  do j = 1, this%send_dof(src)%size()
207  u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)
208  end do
209  case (gs_op_min)
210  !NEC$ IVDEP
211  do j = 1, this%send_dof(src)%size()
212  u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))
213  end do
214  case (gs_op_max)
215  !NEC$ IVDEP
216  do j = 1, this%send_dof(src)%size()
217  u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))
218  end do
219  end select
220  end if
221  end if
222  end do
223  end do
224 
225  nreqs = size(this%send_pe)
226  do while (nreqs .gt. 0)
227  do i = 1, size(this%send_pe)
228  if (.not. this%send_buf(i)%flag) then
229  call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &
230  mpi_status_ignore, ierr)
231  if (this%send_buf(i)%flag) nreqs = nreqs - 1
232  end if
233  end do
234  end do
235 
236  end subroutine gs_nbwait_mpi
237 
238 end module gs_mpi
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:22
Defines a gather-scatter communication method.
Definition: gs_comm.f90:34
Defines MPI gather-scatter communication.
Definition: gs_mpi.f90:34
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition: gs_mpi.f90:177
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition: gs_mpi.f90:155
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method.
Definition: gs_mpi.f90:70
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition: gs_mpi.f90:96
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition: gs_mpi.f90:124
Defines Gather-scatter operations.
Definition: gs_ops.f90:34
Build configurations.
Definition: neko_config.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Implements a dynamic stack ADT.
Definition: stack.f90:35
Gather-scatter communication method.
Definition: gs_comm.f90:45
MPI buffer for non-blocking operations.
Definition: gs_mpi.f90:47
Gather-scatter communication using MPI.
Definition: gs_mpi.f90:55
Integer based stack.
Definition: stack.f90:63
#define max(a, b)
Definition: tensor.cu:40