Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_mpi.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_mpi
35 use num_types, only : rp
38 use stack, only : stack_i4_t
39 use mpi_f08, only : mpi_test, mpi_status_ignore, mpi_status, &
40 mpi_request, mpi_isend, mpi_irecv
42 use, intrinsic :: iso_c_binding
43 !$ use omp_lib
44 implicit none
45 private
46
48 type, private :: gs_comm_mpi_t
50 type(mpi_status) :: status
52 type(mpi_request) :: request
55 logical :: flag
57 real(kind=rp), allocatable :: data(:)
58 end type gs_comm_mpi_t
59
61 type, public, extends(gs_comm_t) :: gs_mpi_t
63 type(gs_comm_mpi_t), allocatable :: send_buf(:)
65 type(gs_comm_mpi_t), allocatable :: recv_buf(:)
66 contains
67 procedure, pass(this) :: init => gs_mpi_init
68 procedure, pass(this) :: free => gs_mpi_free
70 procedure, pass(this) :: nbsend => gs_nbsend_mpi
71 procedure, pass(this) :: nbrecv => gs_nbrecv_mpi
72 procedure, pass(this) :: nbwait => gs_nbwait_mpi
73 end type gs_mpi_t
74
75contains
76
79 subroutine gs_mpi_init(this, send_pe, recv_pe)
80 class(gs_mpi_t), intent(inout) :: this
81 type(stack_i4_t), intent(inout) :: send_pe
82 type(stack_i4_t), intent(inout) :: recv_pe
83 integer, pointer :: sp(:), rp(:)
84 integer :: i
85
86 call this%init_order(send_pe, recv_pe)
87
88 allocate(this%send_buf(send_pe%size()))
89
90 sp => send_pe%array()
91 do i = 1, send_pe%size()
92 allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))
93 end do
94
95 allocate(this%recv_buf(recv_pe%size()))
96
97 rp => recv_pe%array()
98 do i = 1, recv_pe%size()
99 allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))
100 end do
101
102 end subroutine gs_mpi_init
103
105 subroutine gs_mpi_free(this)
106 class(gs_mpi_t), intent(inout) :: this
107 integer :: i
108
109 if (allocated(this%send_buf)) then
110 do i = 1, size(this%send_buf)
111 if (allocated(this%send_buf(i)%data)) then
112 deallocate(this%send_buf(i)%data)
113 end if
114 end do
115 deallocate(this%send_buf)
116 end if
117
118 if (allocated(this%recv_buf)) then
119 do i = 1, size(this%recv_buf)
120 if (allocated(this%recv_buf(i)%data)) then
121 deallocate(this%recv_buf(i)%data)
122 end if
123 end do
124 deallocate(this%recv_buf)
125 end if
126
127 call this%free_order()
128 call this%free_dofs()
129
130 end subroutine gs_mpi_free
131
133 subroutine gs_nbsend_mpi(this, u, n, deps, strm)
134 class(gs_mpi_t), intent(inout) :: this
135 integer, intent(in) :: n
136 real(kind=rp), dimension(n), intent(inout) :: u
137 type(c_ptr), intent(inout) :: deps
138 type(c_ptr), intent(inout) :: strm
139 integer :: i, j, ierr, dst, thrdid
140 integer , pointer :: sp(:)
141
142 thrdid = 0
143 !$ thrdid = omp_get_thread_num()
144
145 do i = 1, size(this%send_pe)
146 dst = this%send_pe(i)
147 ! Gather data from u into buffers according to indices in send_dof
148 ! We want to send contigous data to each process in send_pe
149 sp => this%send_dof(dst)%array()
150 do concurrent(j = 1:this%send_dof(dst)%size())
151 this%send_buf(i)%data(j) = u(sp(j))
152 end do
153 ! We should not need this extra associate block, ant it works
154 ! great without it for GNU, Intel, NEC and Cray, but throws an
155 ! ICE with NAG.
156 associate(send_data => this%send_buf(i)%data)
157 call mpi_isend(send_data, size(send_data), &
158 mpi_real_precision, this%send_pe(i), thrdid, &
159 neko_comm, this%send_buf(i)%request, ierr)
160 end associate
161 this%send_buf(i)%flag = .false.
162 end do
163 end subroutine gs_nbsend_mpi
164
166 subroutine gs_nbrecv_mpi(this)
167 class(gs_mpi_t), intent(inout) :: this
168 integer :: i, ierr, thrdid
169
170 thrdid = 0
171 !$ thrdid = omp_get_thread_num()
172
173 do i = 1, size(this%recv_pe)
174 ! We should not need this extra associate block, ant it works
175 ! great without it for GNU, Intel, NEC and Cray, but throws an
176 ! ICE with NAG.
177 ! Issue recv requests, we will later check that these have finished
178 ! in nbwait
179 associate(recv_data => this%recv_buf(i)%data)
180 call mpi_irecv(recv_data, size(recv_data), &
181 mpi_real_precision, this%recv_pe(i), thrdid, &
182 neko_comm, this%recv_buf(i)%request, ierr)
183 end associate
184 this%recv_buf(i)%flag = .false.
185 end do
186
187 end subroutine gs_nbrecv_mpi
188
190 subroutine gs_nbwait_mpi(this, u, n, op, strm)
191 class(gs_mpi_t), intent(inout) :: this
192 integer, intent(in) :: n
193 real(kind=rp), dimension(n), intent(inout) :: u
194 type(c_ptr), intent(inout) :: strm
195 integer :: i, j, src, ierr
196 integer :: op
197 integer , pointer :: sp(:)
198 integer :: nreqs
199
200 nreqs = size(this%recv_pe)
201
202 do while (nreqs .gt. 0)
203 do i = 1, size(this%recv_pe)
204 if (.not. this%recv_buf(i)%flag) then
205 ! Check if we have recieved the data we want
206 call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
207 this%recv_buf(i)%status, ierr)
208 ! If it has been received
209 if (this%recv_buf(i)%flag) then
210 ! One more request has been succesful
211 nreqs = nreqs - 1
213 src = this%recv_pe(i)
214 sp => this%recv_dof(src)%array()
215 !Do operation with data in buffer on dof specified by recv_dof
216 select case(op)
217 case (gs_op_add)
218 !NEC$ IVDEP
219 do concurrent(j = 1:this%send_dof(src)%size())
220
221 u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)
222 end do
223 case (gs_op_mul)
224 !NEC$ IVDEP
225 do concurrent(j = 1:this%send_dof(src)%size())
226 u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)
227 end do
228 case (gs_op_min)
229 !NEC$ IVDEP
230 do concurrent(j = 1:this%send_dof(src)%size())
231 u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))
232 end do
233 case (gs_op_max)
234 !NEC$ IVDEP
235 do concurrent(j = 1:this%send_dof(src)%size())
236 u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))
237 end do
238 end select
239 end if
240 end if
241 end do
242 end do
243 ! Finally, check that the non-blocking sends this rank have issued have also
244 ! completed successfully
245 nreqs = size(this%send_pe)
246 do while (nreqs .gt. 0)
247 do i = 1, size(this%send_pe)
248 if (.not. this%send_buf(i)%flag) then
249 call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &
250 mpi_status_ignore, ierr)
251 if (this%send_buf(i)%flag) nreqs = nreqs - 1
252 end if
253 end do
254 end do
255
256 end subroutine gs_nbwait_mpi
257
258end module gs_mpi
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition gs_mpi.f90:191
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition gs_mpi.f90:167
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method See gs_comm.f90 for details.
Definition gs_mpi.f90:80
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition gs_mpi.f90:106
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition gs_mpi.f90:134
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements a dynamic stack ADT.
Definition stack.f90:35
Gather-scatter communication method.
Definition gs_comm.f90:46
MPI buffer for non-blocking operations.
Definition gs_mpi.f90:48
Gather-scatter communication using MPI.
Definition gs_mpi.f90:61
Integer based stack.
Definition stack.f90:63
#define max(a, b)
Definition tensor.cu:40