Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_mpi.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_mpi
35 use num_types, only : rp
38 use stack, only : stack_i4_t
39 use comm
40 use, intrinsic :: iso_c_binding
41 !$ use omp_lib
42 implicit none
43 private
44
46 type, private :: gs_comm_mpi_t
47 type(mpi_status) :: status
48 type(mpi_request) :: request
49 logical :: flag
50 real(kind=rp), allocatable :: data(:)
51 end type gs_comm_mpi_t
52
54 type, public, extends(gs_comm_t) :: gs_mpi_t
55 type(gs_comm_mpi_t), allocatable :: send_buf(:)
56 type(gs_comm_mpi_t), allocatable :: recv_buf(:)
57 contains
58 procedure, pass(this) :: init => gs_mpi_init
59 procedure, pass(this) :: free => gs_mpi_free
60 procedure, pass(this) :: nbsend => gs_nbsend_mpi
61 procedure, pass(this) :: nbrecv => gs_nbrecv_mpi
62 procedure, pass(this) :: nbwait => gs_nbwait_mpi
63 end type gs_mpi_t
64
65contains
66
68 subroutine gs_mpi_init(this, send_pe, recv_pe)
69 class(gs_mpi_t), intent(inout) :: this
70 type(stack_i4_t), intent(inout) :: send_pe
71 type(stack_i4_t), intent(inout) :: recv_pe
72 integer, pointer :: sp(:), rp(:)
73 integer :: i
74
75 call this%init_order(send_pe, recv_pe)
76
77 allocate(this%send_buf(send_pe%size()))
78
79 sp => send_pe%array()
80 do i = 1, send_pe%size()
81 allocate(this%send_buf(i)%data(this%send_dof(sp(i))%size()))
82 end do
83
84 allocate(this%recv_buf(recv_pe%size()))
85
86 rp => recv_pe%array()
87 do i = 1, recv_pe%size()
88 allocate(this%recv_buf(i)%data(this%recv_dof(rp(i))%size()))
89 end do
90
91 end subroutine gs_mpi_init
92
94 subroutine gs_mpi_free(this)
95 class(gs_mpi_t), intent(inout) :: this
96 integer :: i
97
98 if (allocated(this%send_buf)) then
99 do i = 1, size(this%send_buf)
100 if (allocated(this%send_buf(i)%data)) then
101 deallocate(this%send_buf(i)%data)
102 end if
103 end do
104 deallocate(this%send_buf)
105 end if
106
107 if (allocated(this%recv_buf)) then
108 do i = 1, size(this%recv_buf)
109 if (allocated(this%recv_buf(i)%data)) then
110 deallocate(this%recv_buf(i)%data)
111 end if
112 end do
113 deallocate(this%recv_buf)
114 end if
115
116 call this%free_order()
117 call this%free_dofs()
118
119 end subroutine gs_mpi_free
120
122 subroutine gs_nbsend_mpi(this, u, n, deps, strm)
123 class(gs_mpi_t), intent(inout) :: this
124 integer, intent(in) :: n
125 real(kind=rp), dimension(n), intent(inout) :: u
126 type(c_ptr), intent(inout) :: deps
127 type(c_ptr), intent(inout) :: strm
128 integer :: i, j, ierr, dst, thrdid
129 integer , pointer :: sp(:)
130
131 thrdid = 0
132 !$ thrdid = omp_get_thread_num()
133
134 do i = 1, size(this%send_pe)
135 dst = this%send_pe(i)
136 sp => this%send_dof(dst)%array()
137 do concurrent(j = 1:this%send_dof(dst)%size())
138 this%send_buf(i)%data(j) = u(sp(j))
139 end do
140 ! We should not need this extra associate block, ant it works
141 ! great without it for GNU, Intel, NEC and Cray, but throws an
142 ! ICE with NAG.
143 associate(send_data => this%send_buf(i)%data)
144 call mpi_isend(send_data, size(send_data), &
145 mpi_real_precision, this%send_pe(i), thrdid, &
146 neko_comm, this%send_buf(i)%request, ierr)
147 end associate
148 this%send_buf(i)%flag = .false.
149 end do
150 end subroutine gs_nbsend_mpi
151
153 subroutine gs_nbrecv_mpi(this)
154 class(gs_mpi_t), intent(inout) :: this
155 integer :: i, ierr, thrdid
156
157 thrdid = 0
158 !$ thrdid = omp_get_thread_num()
159
160 do i = 1, size(this%recv_pe)
161 ! We should not need this extra associate block, ant it works
162 ! great without it for GNU, Intel, NEC and Cray, but throws an
163 ! ICE with NAG.
164 associate(recv_data => this%recv_buf(i)%data)
165 call mpi_irecv(recv_data, size(recv_data), &
166 mpi_real_precision, this%recv_pe(i), thrdid, &
167 neko_comm, this%recv_buf(i)%request, ierr)
168 end associate
169 this%recv_buf(i)%flag = .false.
170 end do
171
172 end subroutine gs_nbrecv_mpi
173
175 subroutine gs_nbwait_mpi(this, u, n, op, strm)
176 class(gs_mpi_t), intent(inout) :: this
177 integer, intent(in) :: n
178 real(kind=rp), dimension(n), intent(inout) :: u
179 type(c_ptr), intent(inout) :: strm
180 integer :: i, j, src, ierr
181 integer :: op
182 integer , pointer :: sp(:)
183 integer :: nreqs
184
185 nreqs = size(this%recv_pe)
186
187 do while (nreqs .gt. 0)
188 do i = 1, size(this%recv_pe)
189 if (.not. this%recv_buf(i)%flag) then
190 call mpi_test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
191 this%recv_buf(i)%status, ierr)
192 if (this%recv_buf(i)%flag) then
193 nreqs = nreqs - 1
195 src = this%recv_pe(i)
196 sp => this%recv_dof(src)%array()
197 select case(op)
198 case (gs_op_add)
199 !NEC$ IVDEP
200 do concurrent(j = 1:this%send_dof(src)%size())
201 u(sp(j)) = u(sp(j)) + this%recv_buf(i)%data(j)
202 end do
203 case (gs_op_mul)
204 !NEC$ IVDEP
205 do concurrent(j = 1:this%send_dof(src)%size())
206 u(sp(j)) = u(sp(j)) * this%recv_buf(i)%data(j)
207 end do
208 case (gs_op_min)
209 !NEC$ IVDEP
210 do concurrent(j = 1:this%send_dof(src)%size())
211 u(sp(j)) = min(u(sp(j)), this%recv_buf(i)%data(j))
212 end do
213 case (gs_op_max)
214 !NEC$ IVDEP
215 do concurrent(j = 1:this%send_dof(src)%size())
216 u(sp(j)) = max(u(sp(j)), this%recv_buf(i)%data(j))
217 end do
218 end select
219 end if
220 end if
221 end do
222 end do
223
224 nreqs = size(this%send_pe)
225 do while (nreqs .gt. 0)
226 do i = 1, size(this%send_pe)
227 if (.not. this%send_buf(i)%flag) then
228 call mpi_test(this%send_buf(i)%request, this%send_buf(i)%flag, &
229 mpi_status_ignore, ierr)
230 if (this%send_buf(i)%flag) nreqs = nreqs - 1
231 end if
232 end do
233 end do
234
235 end subroutine gs_nbwait_mpi
236
237end module gs_mpi
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
subroutine gs_nbwait_mpi(this, u, n, op, strm)
Wait for non-blocking operations.
Definition gs_mpi.f90:176
subroutine gs_nbrecv_mpi(this)
Post non-blocking receive operations.
Definition gs_mpi.f90:154
subroutine gs_mpi_init(this, send_pe, recv_pe)
Initialise MPI based communication method.
Definition gs_mpi.f90:69
subroutine gs_mpi_free(this)
Deallocate MPI based communication method.
Definition gs_mpi.f90:95
subroutine gs_nbsend_mpi(this, u, n, deps, strm)
Post non-blocking send operations.
Definition gs_mpi.f90:123
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements a dynamic stack ADT.
Definition stack.f90:35
Gather-scatter communication method.
Definition gs_comm.f90:45
MPI buffer for non-blocking operations.
Definition gs_mpi.f90:46
Gather-scatter communication using MPI.
Definition gs_mpi.f90:54
Integer based stack.
Definition stack.f90:63
#define max(a, b)
Definition tensor.cu:40