Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cartesian_pe_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
37 use num_types, only: rp, dp, xp, i8
39 use space, only: space_t
40 use pe_finder, only: pe_finder_t
41 use stack, only: stack_i4_t, stack_i8_t
43 use tuple, only: tuple_i4_t
44 use htable, only: htable_i8_t
45 use point, only: point_t
47 use mpi_f08, only: mpi_max, mpi_allreduce, mpi_comm, mpi_comm_rank, &
48 mpi_comm_size, mpi_wtime, mpi_integer, mpi_integer8, &
49 mpi_min, mpi_sum, mpi_irecv, mpi_isend, mpi_wait, &
50 mpi_exscan, mpi_request, mpi_status, &
51 mpi_alltoall, mpi_in_place, mpi_barrier
52 implicit none
53 private
54
56 integer, public, parameter :: glob_map_size = 4096
57
58 type, private :: i8_mpi_t
60 type(mpi_status) :: status
62 type(mpi_request) :: request
65 logical :: flag
67 real(kind=dp), allocatable :: data(:)
68 integer :: size = 0
69 contains
70 procedure, pass(this) :: free => i8_mpi_free
71 end type i8_mpi_t
72
73
75 type, public, extends(pe_finder_t) :: cartesian_pe_finder_t
77 real(kind=dp) :: padding
79 integer(kind=i8) :: glob_n_boxes
80 integer(kind=i8) :: n_boxes
81 !Number of local boxes in x direction
82 integer :: n_boxes_per_pe
83 integer :: nelv
84 integer :: offset_el
85 real(kind=rp) :: max_x_global, max_y_global, max_z_global
86 real(kind=rp) :: min_x_global, min_y_global, min_z_global
87
88 ! Resolution of the boxes
89 real(kind=xp) :: res_x, res_y, res_z
90
91 type(i8_mpi_t), allocatable :: recv_buf(:)
92 type(i8_mpi_t), allocatable :: send_buf(:)
93 type(stack_i4_t), allocatable :: pe_map(:)
94
95 contains
96 procedure, pass(this) :: init => cartesian_pe_finder_init
97 procedure, pass(this) :: free => cartesian_pe_finder_free
98 procedure, pass(this) :: find => cartesian_pe_finder_find
99 procedure, pass(this) :: find_batch => cartesian_pe_finder_find_batch
100
101 end type cartesian_pe_finder_t
102
103contains
104
106 subroutine i8_mpi_free(this)
107 class(i8_mpi_t), intent(inout) :: this
108
109 if (allocated(this%data)) then
110 deallocate(this%data)
111 end if
112
113 end subroutine i8_mpi_free
114
124 subroutine cartesian_pe_finder_init(this, x, y, z, nelv, Xh, comm, n_boxes, padding)
125 class(cartesian_pe_finder_t), intent(inout) :: this
126 real(kind=rp), intent(in), target :: x(:)
127 real(kind=rp), intent(in), target :: y(:)
128 real(kind=rp), intent(in), target :: z(:)
129 integer, intent(in) :: nelv
130 type(mpi_comm), intent(in), optional :: comm
131 type(space_t), intent(in), target :: Xh
132 integer, intent(in) :: n_boxes
133 real(kind=dp), intent(in) :: padding
134 integer :: ierr
135 integer :: i, j, k, e, i2, j2, k2
136 integer :: pe_id, lin_idx
137 integer :: lxyz, lx2
138 integer(kind=i8) :: glob_id, loc_id
139 integer(kind=i8) :: min_id(3), max_id(3)
140 real(kind=rp) :: center_x, center_y, center_z
141 type(stack_i8_t), allocatable :: glob_ids(:), recv_ids(:)
142 integer(i8), pointer :: glb_ids(:)
143 integer(kind=i8) :: htable_data, temp ! We just use it as a set
144 integer, allocatable :: n_recv(:), n_send(:)
145 type(htable_i8_t) :: marked_box
146 real(kind=rp) :: min_bb_x, max_bb_x
147 real(kind=rp) :: min_bb_y, max_bb_y
148 real(kind=rp) :: min_bb_z, max_bb_z
149 real(kind=rp) :: el_x(xh%lxyz), el_y(xh%lxyz), el_z(xh%lxyz)
150
151 call this%free()
152 this%comm = comm
153 this%padding = padding
154
155 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
156 call mpi_comm_size(this%comm, this%pe_size, ierr)
157
158 this%glob_n_boxes = int(n_boxes, i8)**3
159 this%n_boxes = n_boxes
160 this%nelv = nelv
161 this%n_boxes_per_pe = (this%glob_n_boxes + int(this%pe_size - 1, i8))/ &
162 int(this%pe_size, i8)
163
164 allocate(this%send_buf(0:this%pe_size - 1))
165 allocate(this%recv_buf(0:this%pe_size - 1))
166 allocate(this%pe_map(0:this%n_boxes_per_pe - 1))
167 do i = 0, this%n_boxes_per_pe - 1
168 call this%pe_map(i)%init()
169 end do
170 call mpi_exscan(this%nelv, this%offset_el, 1, &
171 mpi_integer, mpi_sum, this%comm, ierr)
172 if (this%nelv .gt. 0) then
173 this%max_x_global = maxval(x(1:nelv*xh%lxyz))
174 this%max_y_global = maxval(y(1:nelv*xh%lxyz))
175 this%max_z_global = maxval(z(1:nelv*xh%lxyz))
176 this%min_x_global = minval(x(1:nelv*xh%lxyz))
177 this%min_y_global = minval(y(1:nelv*xh%lxyz))
178 this%min_z_global = minval(z(1:nelv*xh%lxyz))
179 else
180 this%max_x_global = -1e20
181 this%max_y_global = -1e20
182 this%max_z_global = -1e20
183 this%min_x_global = 1e20
184 this%min_y_global = 1e20
185 this%min_z_global = 1e20
186 end if
187
188
189 call mpi_allreduce(mpi_in_place, this%max_x_global, 1, mpi_real_precision, &
190 mpi_max, this%comm, ierr)
191 call mpi_allreduce(mpi_in_place, this%max_y_global, 1, mpi_real_precision, &
192 mpi_max, this%comm, ierr)
193 call mpi_allreduce(mpi_in_place, this%max_z_global, 1, mpi_real_precision, &
194 mpi_max, this%comm, ierr)
195 call mpi_allreduce(mpi_in_place, this%min_x_global, 1, mpi_real_precision, &
196 mpi_min, this%comm, ierr)
197 call mpi_allreduce(mpi_in_place, this%min_y_global, 1, mpi_real_precision, &
198 mpi_min, this%comm, ierr)
199 call mpi_allreduce(mpi_in_place, this%min_z_global, 1, mpi_real_precision, &
200 mpi_min, this%comm, ierr)
201
202 center_x = (this%max_x_global + this%min_x_global) / 2.0_dp
203 center_y = (this%max_y_global + this%min_y_global) / 2.0_dp
204 center_z = (this%max_z_global + this%min_z_global) / 2.0_dp
205
206 this%max_x_global = this%max_x_global - center_x
207 this%max_y_global = this%max_y_global - center_y
208 this%max_z_global = this%max_z_global - center_z
209 this%min_x_global = this%min_x_global - center_x
210 this%min_y_global = this%min_y_global - center_y
211 this%min_z_global = this%min_z_global - center_z
212 this%max_x_global = this%max_x_global*(1.0_xp+this%padding) + center_x
213 this%max_y_global = this%max_y_global*(1.0_xp+this%padding) + center_y
214 this%max_z_global = this%max_z_global*(1.0_xp+this%padding) + center_z
215 this%min_x_global = this%min_x_global*(1.0_xp+this%padding) + center_x
216 this%min_y_global = this%min_y_global*(1.0_xp+this%padding) + center_y
217 this%min_z_global = this%min_z_global*(1.0_xp+this%padding) + center_z
218
219
220 this%res_x = (this%max_x_global - this%min_x_global) / real(this%n_boxes, xp)
221 this%res_y = (this%max_y_global - this%min_y_global) / real(this%n_boxes, xp)
222 this%res_z = (this%max_z_global - this%min_z_global) / real(this%n_boxes, xp)
223 if (allocated(recv_ids)) then
224 do i = 0, this%pe_size-1
225 call recv_ids(i)%free()
226 end do
227 deallocate(recv_ids)
228 end if
229 if (allocated(glob_ids)) then
230 do i = 0, this%pe_size-1
231 call glob_ids(i)%free()
232 end do
233 deallocate(glob_ids)
234 end if
235 if (allocated(n_recv)) deallocate(n_recv)
236 if (allocated(n_send)) deallocate(n_send)
237 allocate(n_recv(0:this%pe_size-1))
238 allocate(n_send(0:this%pe_size-1))
239 allocate(recv_ids(0:this%pe_size-1))
240 allocate(glob_ids(0:this%pe_size-1))
241 do i = 0, this%pe_size-1
242 call recv_ids(i)%init()
243 call glob_ids(i)%init()
244 if (allocated(this%recv_buf(i)%data)) then
245 deallocate(this%recv_buf(i)%data)
246 end if
247 if (allocated(this%send_buf(i)%data)) then
248 deallocate(this%send_buf(i)%data)
249 end if
250 this%send_buf(i)%size = 0
251 this%recv_buf(i)%size = 0
252 end do
253 n_recv = 0
254 n_send = 0
255
256 lxyz = xh%lxyz
257 call marked_box%init(this%nelv*lxyz,htable_data)
258
259 do e = 1, this%nelv
260 !move it to do scaling
261 lx2 = xh%lx/2
262 if (mod(xh%lx,2) .eq. 0) then
263 lin_idx = linear_index(lx2, lx2, lx2, e, xh%lx, xh%lx, xh%lx)
264 center_x = x(lin_idx)
265 center_y = y(lin_idx)
266 center_z = z(lin_idx)
267 else
268 center_x = 0d0
269 center_y = 0d0
270 center_z = 0d0
271 do i = lx2, lx2+1
272 do j = lx2, lx2 + 1
273 do k = lx2, lx2 + 1
274 lin_idx = linear_index(i, j, k, e, xh%lx, xh%lx, xh%lx)
275 center_x = center_x + x(lin_idx)
276 center_y = center_y + y(lin_idx)
277 center_z = center_z + z(lin_idx)
278 end do
279 end do
280 end do
281 center_x = center_x / 8.0_xp
282 center_y = center_y / 8.0_xp
283 center_z = center_z / 8.0_xp
284 end if
285
286 !Maybe this is a stupid way to do it
287
288 el_x = x((e-1)*lxyz+1:e*lxyz) - center_x
289 el_y = y((e-1)*lxyz+1:e*lxyz) - center_y
290 el_z = z((e-1)*lxyz+1:e*lxyz) - center_z
291 el_x = el_x * (1.0_rp+padding) + center_x
292 el_y = el_y * (1.0_rp+padding) + center_y
293 el_z = el_z * (1.0_rp+padding) + center_z
294 !Padded and ready
295
296 !Now we go the bounding boxes of all subboxes in the element
297 do i = 1, xh%lx - 1
298 do j = 1, xh%ly - 1
299 do k = 1, xh%lz - 1
300 lin_idx = linear_index(i, j, k, 1, xh%lx, xh%lx, xh%lx)
301 max_bb_x = el_x(lin_idx)
302 min_bb_x = el_x(lin_idx)
303 max_bb_y = el_y(lin_idx)
304 min_bb_y = el_y(lin_idx)
305 max_bb_z = el_z(lin_idx)
306 min_bb_z = el_z(lin_idx)
307 do i2 = 0, 1
308 do j2 = 0, 1
309 do k2 = 0, 1
310 lin_idx = linear_index(i+i2,j+j2,k+k2, 1, xh%lx, xh%lx, xh%lx)
311 max_bb_x = max(max_bb_x, el_x(lin_idx))
312 min_bb_x = min(min_bb_x, el_x(lin_idx))
313 max_bb_y = max(max_bb_y, el_y(lin_idx))
314 min_bb_y = min(min_bb_y, el_y(lin_idx))
315 max_bb_z = max(max_bb_z, el_z(lin_idx))
316 min_bb_z = min(min_bb_z, el_z(lin_idx))
317 end do
318 end do
319 end do
320
321
322 min_id = get_global_idx(this, min_bb_x, min_bb_y, min_bb_z)
323 max_id = get_global_idx(this, max_bb_x, max_bb_y, max_bb_z)
324 do i2 = min_id(1), max_id(1)
325 do j2 = min_id(2), max_id(2)
326 do k2 = min_id(3), max_id(3)
327 if (i2 .ge. 0 .and. i2 .lt. this%n_boxes .and. &
328 j2 .ge. 0 .and. j2 .lt. this%n_boxes .and. &
329 k2 .ge. 0 .and. k2 .lt. this%n_boxes) then
330 glob_id = i2 + j2*this%n_boxes + k2*this%n_boxes**2
331 pe_id = get_pe_idx(this, glob_id)
332 if (marked_box%get(glob_id,htable_data) .ne. 0) then
333 call glob_ids(pe_id)%push(glob_id)
334 n_send(pe_id) = n_send(pe_id) + 1
335 call marked_box%set(glob_id, htable_data)
336 end if
337 end if
338 end do
339 end do
340 end do
341 end do
342 end do
343 end do
344 end do
345 !temp = 0
346 !do i = 0, this%pe_size-1
347 ! temp = temp + glob_ids(i)%size()
348 !end do
349 !print *, 'size of globids', temp, this%pe_rank
350 call send_recv_data(this, recv_ids, n_recv, glob_ids, n_send)
351
352 !temp = 0
353 !do i = 0, this%pe_size-1
354 ! temp = temp + recv_ids(i)%size()
355 !end do
356 !print *, 'size of globids', temp, this%pe_rank
357 !Buffers are likely way too large for other calls
358 do i = 0, this%pe_size-1
359 if (allocated(this%recv_buf(i)%data)) then
360 deallocate(this%recv_buf(i)%data)
361 end if
362 if (allocated(this%send_buf(i)%data)) then
363 deallocate(this%send_buf(i)%data)
364 end if
365 this%send_buf(i)%size = 0
366 this%recv_buf(i)%size = 0
367 end do
368
369
370 call mpi_barrier(this%comm, ierr)
371 do i = 0, this%pe_size-1
372 if (n_recv(i) .gt. 0) then
373 glb_ids => recv_ids(i)%array()
374 do j = 1, n_recv(i)
375 glob_id = glb_ids(j)
376 loc_id = glob_id - int(this%pe_rank,i8) * &
377 int(this%n_boxes_per_pe,i8)
378 if (loc_id .ge. this%n_boxes_per_pe .or. loc_id < 0) then
379 call neko_warning('loc_id out of bounds')
380 end if
381 call this%pe_map(int(loc_id))%push(i)
382 end do
383 end if
384 end do
385
386 if (allocated(recv_ids)) then
387 do i = 0, this%pe_size-1
388 call recv_ids(i)%free()
389 end do
390 deallocate(recv_ids)
391 end if
392 if (allocated(glob_ids)) then
393 do i = 0, this%pe_size-1
394 call glob_ids(i)%free()
395 end do
396 deallocate(glob_ids)
397 end if
398 if (allocated(n_recv)) deallocate(n_recv)
399 if (allocated(n_send)) deallocate(n_send)
400 end subroutine cartesian_pe_finder_init
401
402 subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
403 class(cartesian_pe_finder_t), intent(inout) :: this
404 type(point_t), intent(in) :: my_point
405 type(stack_i4_t), intent(inout) :: pe_candidates
406
407 call neko_error('cartesian_pe_finder_find not implemented')
408 end subroutine cartesian_pe_finder_find
409
410 function get_global_idx(this, x, y, z) result(global_box_id)
411 class(cartesian_pe_finder_t), intent(in) :: this
412 real(kind=rp), intent(in) :: x, y, z
413 integer(kind=i8) :: global_box_id(3)
414 integer(kind=i8) :: pe_size, n_boxes
415
416 pe_size = this%pe_size
417 n_boxes = this%n_boxes
418
419
420 global_box_id(1) = int((x - this%min_x_global) / this%res_x, i8)
421 global_box_id(2) = int((y - this%min_y_global) / this%res_y, i8)
422 global_box_id(3) = int((z - this%min_z_global) / this%res_z, i8)
423 end function get_global_idx
424
425 function get_pe_idx(this, global_idx) result(pe_id)
426 class(cartesian_pe_finder_t), intent(in) :: this
427 integer(kind=i8), intent(in) :: global_idx
428 integer :: pe_id
429 !Get x id and then divide by the number of x boxes per rank to get the correct pe id
430 pe_id = global_idx / int(this%n_boxes_per_pe, i8)
431 end function get_pe_idx
432
433
436 class(cartesian_pe_finder_t), intent(inout) :: this
437 integer :: i
438
439 if (allocated(this%send_buf)) then
440 do i = 0, this%pe_size-1
441 call this%send_buf(i)%free()
442 end do
443 deallocate(this%send_buf)
444 end if
445 if (allocated(this%recv_buf)) then
446 do i = 0, this%pe_size-1
447 call this%recv_buf(i)%free()
448 end do
449 deallocate(this%recv_buf)
450 end if
451 if (allocated(this%pe_map)) then
452 do i = 0, this%n_boxes_per_pe-1
453 call this%pe_map(i)%free()
454 end do
455 deallocate(this%pe_map)
456 end if
457
458 end subroutine cartesian_pe_finder_free
459
460 subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
461 class(cartesian_pe_finder_t), intent(inout) :: this
462 integer, intent(in) :: n_points
463 real(kind=rp), intent(in) :: points(3,n_points)
464 type(stack_i4_t), intent(inout) :: points_at_pe(0:(this%pe_size-1))
465 integer, intent(inout) :: n_points_pe(0:(this%pe_size-1))
466 integer :: i, j, k
467 integer(kind=i8) :: glob_id(3)
468 integer(kind=i8) :: pe_id
469 integer(kind=i8) :: lin_glob_id
470 integer(kind=i8) :: loc_id
471 type(stack_i8_t), allocatable :: work_pe_ids(:), work_pt_ids(:)
472 type(stack_i8_t), allocatable :: temp_pe_ids(:), temp_pt_ids(:)
473 integer, allocatable :: n_work_ids(:), n_temp_ids(:)
474 integer(i8), pointer :: ids(:)
475 integer(i8), pointer :: pt_id(:)
476 integer, pointer :: pe_cands(:)
477 integer(i8), pointer :: pe_cands8(:)
478 integer(i8), pointer :: pt_ids(:)
479 integer :: ierr
480
481 allocate(work_pe_ids(0:this%pe_size-1))
482 allocate(work_pt_ids(0:this%pe_size-1))
483 allocate(temp_pe_ids(0:this%pe_size-1))
484 allocate(temp_pt_ids(0:this%pe_size-1))
485 allocate(n_temp_ids(0:this%pe_size-1))
486 allocate(n_work_ids(0:this%pe_size-1))
487
488 do i = 0, this%pe_size-1
489 n_work_ids(i) = 0
490 n_temp_ids(i) = 0
491 call work_pt_ids(i)%init()
492 call work_pe_ids(i)%init()
493 call temp_pe_ids(i)%init()
494 call temp_pt_ids(i)%init()
495 end do
496
497 do i = 0, this%pe_size-1
498 call points_at_pe(i)%clear()
499 n_points_pe(i) = 0
500 end do
501
502 ! Compute global ids for the points
503 ! and the pe id for each point
504 n_work_ids = 0
505 do i = 1, n_points
506 glob_id = get_global_idx(this, points(1,i), points(2,i), points(3,i))
507 lin_glob_id = glob_id(1) + &
508 glob_id(2)*this%n_boxes + &
509 glob_id(3)*this%n_boxes**2
510 pe_id = get_pe_idx(this, lin_glob_id)
511 if (glob_id(1) .ge. 0 .and. glob_id(1) .lt. this%n_boxes .and. &
512 glob_id(2) .ge. 0 .and. glob_id(2) .lt. this%n_boxes .and. &
513 glob_id(3) .ge. 0 .and. glob_id(3) .lt. this%n_boxes) then
514 call work_pe_ids(pe_id)%push(lin_glob_id)
515 call work_pt_ids(pe_id)%push(int(i,i8))
516 n_work_ids(pe_id) = n_work_ids(pe_id) + 1
517 else
518 print *, 'Point found outside domain:', points(1,i), &
519 points(2,i), points(3,i), &
520 'Computed global id:', lin_glob_id, &
521 'Global box id (x,y,z):', glob_id(1), glob_id(2), glob_id(3)
522 end if
523 end do
524
525
526 ! Send the global ids to the correct pe
527 ! and get the global ids from the other pes with points I own
528 ! Also send point ids to the other pes
529 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
530 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
531 call mpi_barrier(this%comm, ierr)
532 ! Get the local ids for the points I own
533 ! and the pe candidates for the points I own
534 n_work_ids = 0
535 do i = 0, this%pe_size-1
536 call work_pe_ids(i)%clear()
537 call work_pt_ids(i)%clear()
538 end do
539
540 do i =0 , this%pe_size-1
541
542 if (n_temp_ids(i) .gt. 0) then
543 ids => temp_pe_ids(i)%array()
544 pt_ids => temp_pt_ids(i)%array()
545 do j = 1, n_temp_ids(i)
546 loc_id = ids(j) - int(this%pe_rank,i8) * &
547 int(this%n_boxes_per_pe,i8)
548 pe_cands => this%pe_map(int(loc_id))%array()
549 do k = 1, this%pe_map(int(loc_id))%size()
550 call work_pe_ids(i)%push(int(pe_cands(k),i8))
551 call work_pt_ids(i)%push(pt_ids(j))
552 n_work_ids(i) = n_work_ids(i) + 1
553 end do
554 if (this%pe_map(int(loc_id))%size() .lt. 1) then
555 print *, 'No PE candidates found for point:', &
556 points(1,pt_ids(j)), &
557 points(2,pt_ids(j)), points(3,pt_ids(j))
558 end if
559 end do
560 end if
561 end do
562 ! pe candidates found for the points I am responsible for
563 ! Now i need to send the data to the other pes
564 ! and get the candidates from the other pes for my own points
565 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
566 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
567 ! Gotten candidates for my points
568 ! Now I organize the candidates for the points I own
569 n_points_pe = 0
570 do i = 0, this%pe_size-1
571 if (n_temp_ids(i) .gt. 0) then
572 pe_cands8 => temp_pe_ids(i)%array()
573 pt_ids => temp_pt_ids(i)%array()
574 do j = 1, n_temp_ids(i)
575 pe_id = pe_cands8(j)
576 call points_at_pe(pe_id)%push(int(pt_ids(j)))
577 n_points_pe(pe_id) = n_points_pe(pe_id) + 1
578 end do
579 end if
580 end do
581
582 if (allocated(work_pe_ids)) then
583 do i = 0, this%pe_size-1
584 call work_pe_ids(i)%free()
585 end do
586 deallocate(work_pe_ids)
587 end if
588 if (allocated(temp_pe_ids)) then
589 do i = 0, this%pe_size-1
590 call temp_pe_ids(i)%free()
591 end do
592 deallocate(temp_pe_ids)
593 end if
594 if (allocated(temp_pt_ids)) then
595 do i = 0, this%pe_size-1
596 call temp_pt_ids(i)%free()
597 end do
598 deallocate(temp_pt_ids)
599 end if
600 if (allocated(work_pt_ids)) then
601 do i = 0, this%pe_size-1
602 call work_pt_ids(i)%free()
603 end do
604 deallocate(work_pt_ids)
605 end if
606
607 if (allocated(n_work_ids)) then
608 deallocate(n_work_ids)
609 end if
610
611 if (allocated(n_temp_ids)) then
612 deallocate(n_temp_ids)
613 end if
614 end subroutine cartesian_pe_finder_find_batch
615
616
617 subroutine send_recv_data(this, recv_values, n_recv_values, &
618 send_values, n_send_values)
619 class(cartesian_pe_finder_t), intent(inout) :: this
620 type(stack_i8_t), intent(inout) :: recv_values(0:this%pe_size-1)
621 type(stack_i8_t), intent(inout) :: send_values(0:this%pe_size-1)
622 integer, intent(inout) :: n_recv_values(0:this%pe_size-1)
623 integer, intent(inout) :: n_send_values(0:this%pe_size-1)
624 integer :: i, j, ierr
625 integer(i8) :: idx
626 integer(i8) , pointer :: sp(:)
627
628 call mpi_alltoall(n_send_values, 1, mpi_integer, &
629 n_recv_values, 1, mpi_integer, this%comm, ierr)
630
631 do i = 0, this%pe_size-1
632 if (n_recv_values(i) .gt. 0) then
633 if (this%recv_buf(i)%size .lt. n_recv_values(i)) then
634 if (allocated(this%recv_buf(i)%data)) deallocate(this%recv_buf(i)%data)
635 allocate(this%recv_buf(i)%data(n_recv_values(i)))
636 this%recv_buf(i)%size = n_recv_values(i)
637 end if
638 call mpi_irecv(this%recv_buf(i)%data, n_recv_values(i), mpi_integer8, &
639 i, 0, this%comm, this%recv_buf(i)%request, ierr)
640 end if
641 end do
642
643 do i = 0, this%pe_size-1
644 if (n_send_values(i) .gt. 0) then
645 if (this%send_buf(i)%size .lt. n_send_values(i)) then
646 if (allocated(this%send_buf(i)%data)) deallocate(this%send_buf(i)%data)
647 allocate(this%send_buf(i)%data(n_send_values(i)))
648 this%send_buf(i)%size = n_send_values(i)
649 end if
650 ! Copy the data to the send buffer
651 sp => send_values(i)%array()
652 do j = 1, n_send_values(i)
653 this%send_buf(i)%data(j) = sp(j)
654 end do
655 call mpi_isend(this%send_buf(i)%data, n_send_values(i), mpi_integer8, &
656 i, 0, this%comm, this%send_buf(i)%request, ierr)
657 end if
658 end do
659
660 do i = 0, this%pe_size-1
661 call recv_values(i)%clear()
662 end do
663
664 do i = 0, this%pe_size-1
665 if (n_recv_values(i) .gt. 0) then
666 call mpi_wait(this%recv_buf(i)%request, this%recv_buf(i)%status, ierr)
667 do j = 1, n_recv_values(i)
668 idx = this%recv_buf(i)%data(j)
669 call recv_values(i)%push(idx)
670 end do
671 end if
672 end do
673 do i = 0, this%pe_size-1
674 if (n_send_values(i) .gt. 0) then
675 call mpi_wait(this%send_buf(i)%request, this%send_buf(i)%status, ierr)
676 end if
677 end do
678
679
680 end subroutine send_recv_data
681end module cartesian_pe_finder
double real
Implements cartesian_pe_finder given a dofmap.
subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
integer, parameter, public glob_map_size
Minimum number of total boxes in the aabb tree.
subroutine cartesian_pe_finder_init(this, x, y, z, nelv, xh, comm, n_boxes, padding)
Initialize the global interpolation object on a set of coordinates.
subroutine i8_mpi_free(this)
Destructor for i8_mpi_t.
subroutine send_recv_data(this, recv_values, n_recv_values, send_values, n_send_values)
subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
integer function get_pe_idx(this, global_idx)
subroutine cartesian_pe_finder_free(this)
Destructor.
integer(kind=i8) function, dimension(3) get_global_idx(this, x, y, z)
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_size
MPI size of communicator.
Definition comm.F90:59
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements pe_finder given a dofmap.
Definition pe_finder.f90:35
Implements a point.
Definition point.f90:35
Defines a function space.
Definition space.f90:34
Implements a dynamic stack ADT.
Definition stack.f90:35
Implements a n-tuple.
Definition tuple.f90:34
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Implements global interpolation for arbitrary points in the domain.
Integer*8 based hash table.
Definition htable.f90:92
Implements global interpolation for arbitrary points in the domain.
Definition pe_finder.f90:45
A point in with coordinates .
Definition point.f90:43
The function space for the SEM solution fields.
Definition space.f90:63
Integer based stack.
Definition stack.f90:63
Integer*8 based stack.
Definition stack.f90:70
Integer based 2-tuple.
Definition tuple.f90:51
#define max(a, b)
Definition tensor.cu:40