Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_mpi.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_mpi
35 use num_types, only : rp
38 use stack, only : stack_i4_t
39 use mpi_f08, only : mpi_test, mpi_status_ignore, mpi_status, &
40 mpi_request, mpi_isend, mpi_irecv
42 use, intrinsic :: iso_c_binding
43 use utils, only : neko_error
44 !$ use omp_lib
45 implicit none
46 private
47
49 type, private :: gs_comm_mpi_t
51 type(mpi_status) :: status
53 type(mpi_request) :: request
56 logical :: flag
58 real(kind=rp), allocatable :: data(:)
59 end type gs_comm_mpi_t
60
62 type, public, extends(gs_comm_t) :: gs_mpi_t
64 type(gs_comm_mpi_t), allocatable :: send_buf(:)
66 type(gs_comm_mpi_t), allocatable :: recv_buf(:)
67 contains
68 procedure, pass(this) :: init => gs_mpi_init
69 procedure, pass(this) :: free => gs_mpi_free
71 procedure, pass(this) :: nbsend => gs_nbsend_mpi
72 procedure, pass(this) :: nbrecv => gs_nbrecv_mpi
73 procedure, pass(this) :: nbwait => gs_nbwait_mpi
74 end type gs_mpi_t
75
76contains
77
80 subroutine gs_mpi_init(this, send_pe, recv_pe)
81 class(gs_mpi_t), intent(inout) :: this
82 type(stack_i4_t), intent(inout) :: send_pe
83 type(stack_i4_t), intent(inout) :: recv_pe
84 integer, pointer :: sp(:), rp(:)
85 integer :: i
86
87 call this%init_order(send_pe, recv_pe)
88
89 allocate(this%send_buf(send_pe%size()))
90
91 sp => send_pe%array()
92 do i = 1, send_pe%size()
93 allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))
94 end do
95
96 allocate(this%recv_buf(recv_pe%size()))
97
98 rp => recv_pe%array()
99 do i = 1, recv_pe%size()
100 allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))
101 end do
102
103 end subroutine gs_mpi_init
104
106 subroutine gs_mpi_free(this)
107 class(gs_mpi_t), intent(inout) :: this
108 integer :: i
109
110 if (allocated(this%send_buf)) then
111 do i = 1, size(this%send_buf)
112 if (allocated(this%send_buf(i)%data)) then
113 deallocate(this%send_buf(i)%data)
114 end if
115 end do
116 deallocate(this%send_buf)
117 end if
118
119 if (allocated(this%recv_buf)) then
120 do i = 1, size(this%recv_buf)
121 if (allocated(this%recv_buf(i)%data)) then
122 deallocate(this%recv_buf(i)%data)
123 end if
124 end do
125 deallocate(this%recv_buf)
126 end if
127
128 call this%free_order()
129 call this%free_dofs()
130
131 end subroutine gs_mpi_free
132
134 subroutine gs_nbsend_mpi(this, u, n, deps, strm)
135 class(gs_mpi_t), intent(inout) :: this
136 integer, intent(in) :: n
137 real(kind=rp), dimension(n), intent(inout) :: u
138 type(c_ptr), intent(inout) :: deps
139 type(c_ptr), intent(inout) :: strm
140 integer :: i, j, ierr, dst, thrdid
141 integer , pointer :: sp(:)
142
143 thrdid = 0
144 !$ thrdid = omp_get_thread_num()
145
146 do i = 1, size(this%send_pe)
147 dst = this%send_pe(i)
148 ! Gather data from u into buffers according to indices in send_dof
149 ! We want to send contigous data to each process in send_pe
150 sp => this%send_dof(dst)%array()
151 do concurrent(j = 1:this%send_dof(dst)%size())
152 this%send_buf(i)%data(j) = u(sp(j))
153 end do
154 ! We should not need this extra associate block, ant it works
155 ! great without it for GNU, Intel, NEC and Cray, but throws an
156 ! ICE with NAG.
157 associate(send_data => this%send_buf(i)%data)
158 call mpi_isend(send_data, size(send_data), &
159 mpi_real_precision, this%send_pe(i), thrdid, &
160 neko_comm, this%send_buf(i)%request, ierr)
161 end associate
162 this%send_buf(i)%flag = .false.
163 end do
164 end subroutine gs_nbsend_mpi
165
167 subroutine gs_nbrecv_mpi(this)
168 class(gs_mpi_t), intent(inout) :: this
169 integer :: i, ierr, thrdid
170
171 thrdid = 0
172 !$ thrdid = omp_get_thread_num()
173
174 do i = 1, size(this%recv_pe)
175 ! We should not need this extra associate block, ant it works
176 ! great without it for GNU, Intel, NEC and Cray, but throws an
177 ! ICE with NAG.
178 ! Issue recv requests, we will later check that these have finished
179 ! in nbwait
180 associate(recv_data => this%recv_buf(i)%data)
181 call mpi_irecv(recv_data, size(recv_data), &
182 mpi_real_precision, this%recv_pe(i), thrdid, &
183 neko_comm, this%recv_buf(i)%request, ierr)
184 end associate
185 this%recv_buf(i)%flag = .false.
186 end do
187
188 end subroutine gs_nbrecv_mpi
189
191 subroutine gs_nbwait_mpi(this, u, n, op, strm)
192 class(gs_mpi_t), intent(inout) :: this
193 integer, intent(in) :: n
194 real(kind=rp), dimension(n), intent(inout) :: u
195 type(c_ptr), intent(inout) :: strm
196 integer :: i, j, src, ierr
197 integer :: op
198 integer , pointer :: sp(:)
199 integer :: nreqs
200
201 nreqs = size(this%recv_pe)
202
203 do while (nreqs .gt. 0)
204 do i = 1, size(this%recv_pe)
205 if (.not. this%recv_buf(i)%flag) then
206 ! Check if we have recieved the data we want
207 call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
208 this%recv_buf(i)%status, ierr)
209 ! If it has been received
210 if (this%recv_buf(i)%flag) then
211 ! One more request has been succesful
212 nreqs = nreqs - 1
214 src = this%recv_pe(i)
215 sp => this%recv_dof(src)%array()
216 !Do operation with data in buffer on dof specified by recv_dof
217 select case(op)
218 case (gs_op_add)
219 !NEC$ IVDEP
220 do concurrent(j = 1:this%recv_dof(src)%size())
221 u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)
222 end do
223 case (gs_op_mul)
224 !NEC$ IVDEP
225 do concurrent(j = 1:this%recv_dof(src)%size())
226 u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)
227 end do
228 case (gs_op_min)
229 !NEC$ IVDEP
230 do concurrent(j = 1:this%recv_dof(src)%size())
231 u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))
232 end do
233 case (gs_op_max)
234 !NEC$ IVDEP
235 do concurrent(j = 1:this%recv_dof(src)%size())
236 u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))
237 end do
238 case default
239 call neko_error("Unknown operation in gs_nbwait_mpi")
240 end select
241 end if
242 end if
243 end do
244 end do
245 ! Finally, check that the non-blocking sends this rank have issued have also
246 ! completed successfully
247 nreqs = size(this%send_pe)
248 do while (nreqs .gt. 0)
249 do i = 1, size(this%send_pe)
250 if (.not. this%send_buf(i)%flag) then
251 call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &
252 mpi_status_ignore, ierr)
253 if (this%send_buf(i)%flag) nreqs = nreqs - 1
254 end if
255 end do
256 end do
257
258 end subroutine gs_nbwait_mpi
259
260end module gs_mpi
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition gs_mpi.f90:192
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition gs_mpi.f90:168
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method See gs_comm.f90 for details.
Definition gs_mpi.f90:81
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition gs_mpi.f90:107
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition gs_mpi.f90:135
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
Gather-scatter communication method.
Definition gs_comm.f90:46
MPI buffer for non-blocking operations.
Definition gs_mpi.f90:49
Gather-scatter communication using MPI.
Definition gs_mpi.f90:62
Integer based stack.
Definition stack.f90:63
#define max(a, b)
Definition tensor.cu:40