Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cartesian_pe_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
37 use num_types, only: rp, dp, xp, i8
39 use space, only: space_t
40 use pe_finder, only: pe_finder_t
41 use stack, only: stack_i4_t, stack_i8_t
43 use tuple, only: tuple_i4_t
44 use htable, only: htable_i8_t
45 use point, only: point_t
47 use mpi_f08, only: mpi_max, mpi_allreduce, mpi_comm, mpi_comm_rank, &
48 mpi_comm_size, mpi_wtime, mpi_integer, mpi_integer8, &
49 mpi_min, mpi_sum, mpi_irecv, mpi_isend, mpi_wait, &
50 mpi_exscan, mpi_request, mpi_status, &
51 mpi_alltoall, mpi_in_place, mpi_barrier
52 implicit none
53 private
54
56 integer, public, parameter :: glob_map_size = 4096
57
58 type, private :: i8_mpi_t
60 type(mpi_status) :: status
62 type(mpi_request) :: request
65 logical :: flag
67 real(kind=dp), allocatable :: data(:)
68 integer :: size = 0
69 contains
70 procedure, pass(this) :: free => i8_mpi_free
71 end type i8_mpi_t
72
73
75 type, public, extends(pe_finder_t) :: cartesian_pe_finder_t
77 real(kind=dp) :: padding
79 integer(kind=i8) :: glob_n_boxes
80 integer(kind=i8) :: n_boxes
81 !Number of local boxes in x direction
82 integer :: n_boxes_per_pe
83 integer :: nelv
84 integer :: offset_el
85 real(kind=rp) :: max_x_global, max_y_global, max_z_global
86 real(kind=rp) :: min_x_global, min_y_global, min_z_global
87
88 ! Resolution of the boxes
89 real(kind=xp) :: res_x, res_y, res_z
90
91 type(i8_mpi_t), allocatable :: recv_buf(:)
92 type(i8_mpi_t), allocatable :: send_buf(:)
93 type(stack_i4_t), allocatable :: pe_map(:)
94
95 contains
96 procedure, pass(this) :: init => cartesian_pe_finder_init
97 procedure, pass(this) :: free => cartesian_pe_finder_free
98 procedure, pass(this) :: find => cartesian_pe_finder_find
99 procedure, pass(this) :: find_batch => cartesian_pe_finder_find_batch
100
101 end type cartesian_pe_finder_t
102
103contains
104
106 subroutine i8_mpi_free(this)
107 class(i8_mpi_t), intent(inout) :: this
108
109 if (allocated(this%data)) then
110 deallocate(this%data)
111 end if
112
113 end subroutine i8_mpi_free
114
124 subroutine cartesian_pe_finder_init(this, x, y, z, nelv, Xh, comm, n_boxes, padding)
125 class(cartesian_pe_finder_t), intent(inout) :: this
126 real(kind=rp), intent(in), target :: x(:)
127 real(kind=rp), intent(in), target :: y(:)
128 real(kind=rp), intent(in), target :: z(:)
129 integer, intent(in) :: nelv
130 type(mpi_comm), intent(in), optional :: comm
131 type(space_t), intent(in), target :: Xh
132 integer, intent(in) :: n_boxes
133 real(kind=dp), intent(in) :: padding
134 integer :: ierr
135 integer :: i, j, k, e, i2, j2, k2
136 integer :: pe_id, lin_idx
137 integer :: lxyz, lx2
138 integer(kind=i8) :: glob_id, loc_id
139 integer(kind=i8) :: min_id(3), max_id(3)
140 real(kind=rp) :: center_x, center_y, center_z
141 type(stack_i8_t), allocatable :: glob_ids(:), recv_ids(:)
142 integer(i8), pointer :: glb_ids(:)
143 integer(kind=i8) :: htable_data, temp ! We just use it as a set
144 integer, allocatable :: n_recv(:), n_send(:)
145 type(htable_i8_t) :: marked_box
146 real(kind=rp) :: min_bb_x, max_bb_x
147 real(kind=rp) :: min_bb_y, max_bb_y
148 real(kind=rp) :: min_bb_z, max_bb_z
149 real(kind=rp) :: el_x(xh%lxyz), el_y(xh%lxyz), el_z(xh%lxyz)
150
151 call this%free()
152 this%comm = comm
153 this%padding = padding
154
155 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
156 call mpi_comm_size(this%comm, this%pe_size, ierr)
157
158 this%glob_n_boxes = int(n_boxes,i8)**3
159 this%n_boxes = n_boxes
160 this%nelv = nelv
161 this%n_boxes_per_pe = (this%glob_n_boxes+int(this%pe_size-1,i8))/int(this%pe_size,i8)
162
163 allocate(this%send_buf(0:this%pe_size-1))
164 allocate(this%recv_buf(0:this%pe_size-1))
165 allocate(this%pe_map(0:this%n_boxes_per_pe-1))
166 do i = 0, this%n_boxes_per_pe-1
167 call this%pe_map(i)%init()
168 end do
169 call mpi_exscan(this%nelv, this%offset_el, 1, &
170 mpi_integer, mpi_sum, this%comm, ierr)
171 if (this%nelv .gt. 0) then
172 this%max_x_global = maxval(x(1:nelv*xh%lxyz))
173 this%max_y_global = maxval(y(1:nelv*xh%lxyz))
174 this%max_z_global = maxval(z(1:nelv*xh%lxyz))
175 this%min_x_global = minval(x(1:nelv*xh%lxyz))
176 this%min_y_global = minval(y(1:nelv*xh%lxyz))
177 this%min_z_global = minval(z(1:nelv*xh%lxyz))
178 else
179 this%max_x_global = -1e20
180 this%max_y_global = -1e20
181 this%max_z_global = -1e20
182 this%min_x_global = 1e20
183 this%min_y_global = 1e20
184 this%min_z_global = 1e20
185 end if
186
187
188 call mpi_allreduce(mpi_in_place, this%max_x_global, 1, mpi_real_precision, &
189 mpi_max, this%comm, ierr)
190 call mpi_allreduce(mpi_in_place, this%max_y_global, 1, mpi_real_precision, &
191 mpi_max, this%comm, ierr)
192 call mpi_allreduce(mpi_in_place, this%max_z_global, 1, mpi_real_precision, &
193 mpi_max, this%comm, ierr)
194 call mpi_allreduce(mpi_in_place, this%min_x_global, 1, mpi_real_precision, &
195 mpi_min, this%comm, ierr)
196 call mpi_allreduce(mpi_in_place, this%min_y_global, 1, mpi_real_precision, &
197 mpi_min, this%comm, ierr)
198 call mpi_allreduce(mpi_in_place, this%min_z_global, 1, mpi_real_precision, &
199 mpi_min, this%comm, ierr)
200
201 center_x = (this%max_x_global + this%min_x_global) / 2.0_dp
202 center_y = (this%max_y_global + this%min_y_global) / 2.0_dp
203 center_z = (this%max_z_global + this%min_z_global) / 2.0_dp
204
205 this%max_x_global = this%max_x_global - center_x
206 this%max_y_global = this%max_y_global - center_y
207 this%max_z_global = this%max_z_global - center_z
208 this%min_x_global = this%min_x_global - center_x
209 this%min_y_global = this%min_y_global - center_y
210 this%min_z_global = this%min_z_global - center_z
211 this%max_x_global = this%max_x_global*(1.0_xp+this%padding) + center_x
212 this%max_y_global = this%max_y_global*(1.0_xp+this%padding) + center_y
213 this%max_z_global = this%max_z_global*(1.0_xp+this%padding) + center_z
214 this%min_x_global = this%min_x_global*(1.0_xp+this%padding) + center_x
215 this%min_y_global = this%min_y_global*(1.0_xp+this%padding) + center_y
216 this%min_z_global = this%min_z_global*(1.0_xp+this%padding) + center_z
217
218
219 this%res_x = (this%max_x_global - this%min_x_global) / real(this%n_boxes, xp)
220 this%res_y = (this%max_y_global - this%min_y_global) / real(this%n_boxes, xp)
221 this%res_z = (this%max_z_global - this%min_z_global) / real(this%n_boxes, xp)
222 if (allocated(recv_ids)) then
223 do i = 0, this%pe_size-1
224 call recv_ids(i)%free()
225 end do
226 deallocate(recv_ids)
227 end if
228 if (allocated(glob_ids)) then
229 do i = 0, this%pe_size-1
230 call glob_ids(i)%free()
231 end do
232 deallocate(glob_ids)
233 end if
234 if (allocated(n_recv)) deallocate(n_recv)
235 if (allocated(n_send)) deallocate(n_send)
236 allocate(n_recv(0:this%pe_size-1))
237 allocate(n_send(0:this%pe_size-1))
238 allocate(recv_ids(0:this%pe_size-1))
239 allocate(glob_ids(0:this%pe_size-1))
240 do i = 0, this%pe_size-1
241 call recv_ids(i)%init()
242 call glob_ids(i)%init()
243 if (allocated(this%recv_buf(i)%data)) then
244 deallocate(this%recv_buf(i)%data)
245 end if
246 if (allocated(this%send_buf(i)%data)) then
247 deallocate(this%send_buf(i)%data)
248 end if
249 this%send_buf(i)%size = 0
250 this%recv_buf(i)%size = 0
251 end do
252 n_recv = 0
253 n_send = 0
254
255 lxyz = xh%lxyz
256 call marked_box%init(this%nelv*lxyz,htable_data)
257
258 do e = 1, this%nelv
259 !move it to do scaling
260 lx2 = xh%lx/2
261 if (mod(xh%lx,2) .eq. 0) then
262 lin_idx = linear_index(lx2, lx2, lx2, e, xh%lx, xh%lx, xh%lx)
263 center_x = x(lin_idx)
264 center_y = y(lin_idx)
265 center_z = z(lin_idx)
266 else
267 center_x = 0d0
268 center_y = 0d0
269 center_z = 0d0
270 do i = lx2, lx2+1
271 do j = lx2, lx2 + 1
272 do k = lx2, lx2 + 1
273 lin_idx = linear_index(i, j, k, e, xh%lx, xh%lx, xh%lx)
274 center_x = center_x + x(lin_idx)
275 center_y = center_y + y(lin_idx)
276 center_z = center_z + z(lin_idx)
277 end do
278 end do
279 end do
280 center_x = center_x / 8.0_xp
281 center_y = center_y / 8.0_xp
282 center_z = center_z / 8.0_xp
283 end if
284
285 !Maybe this is a stupid way to do it
286
287 el_x = x((e-1)*lxyz+1:e*lxyz) - center_x
288 el_y = y((e-1)*lxyz+1:e*lxyz) - center_y
289 el_z = z((e-1)*lxyz+1:e*lxyz) - center_z
290 el_x = el_x * (1.0_rp+padding) + center_x
291 el_y = el_y * (1.0_rp+padding) + center_y
292 el_z = el_z * (1.0_rp+padding) + center_z
293 !Padded and ready
294
295 !Now we go the bounding boxes of all subboxes in the element
296 do i = 1, xh%lx - 1
297 do j = 1, xh%ly - 1
298 do k = 1, xh%lz - 1
299 lin_idx = linear_index(i, j, k, 1, xh%lx, xh%lx, xh%lx)
300 max_bb_x = el_x(lin_idx)
301 min_bb_x = el_x(lin_idx)
302 max_bb_y = el_y(lin_idx)
303 min_bb_y = el_y(lin_idx)
304 max_bb_z = el_z(lin_idx)
305 min_bb_z = el_z(lin_idx)
306 do i2 = 0, 1
307 do j2 = 0, 1
308 do k2 = 0, 1
309 lin_idx = linear_index(i+i2,j+j2,k+k2, 1, xh%lx, xh%lx, xh%lx)
310 max_bb_x = max(max_bb_x, el_x(lin_idx))
311 min_bb_x = min(min_bb_x, el_x(lin_idx))
312 max_bb_y = max(max_bb_y, el_y(lin_idx))
313 min_bb_y = min(min_bb_y, el_y(lin_idx))
314 max_bb_z = max(max_bb_z, el_z(lin_idx))
315 min_bb_z = min(min_bb_z, el_z(lin_idx))
316 end do
317 end do
318 end do
319
320
321 min_id = get_global_idx(this, min_bb_x, min_bb_y, min_bb_z)
322 max_id = get_global_idx(this, max_bb_x, max_bb_y, max_bb_z)
323 do i2 = min_id(1), max_id(1)
324 do j2 = min_id(2), max_id(2)
325 do k2 = min_id(3), max_id(3)
326 if (i2 .ge. 0 .and. i2 .lt. this%n_boxes .and. &
327 j2 .ge. 0 .and. j2 .lt. this%n_boxes .and. &
328 k2 .ge. 0 .and. k2 .lt. this%n_boxes) then
329 glob_id = i2 + j2*this%n_boxes + k2*this%n_boxes**2
330 pe_id = get_pe_idx(this, glob_id)
331 if (marked_box%get(glob_id,htable_data) .ne. 0) then
332 call glob_ids(pe_id)%push(glob_id)
333 n_send(pe_id) = n_send(pe_id) + 1
334 call marked_box%set(glob_id, htable_data)
335 end if
336 end if
337 end do
338 end do
339 end do
340 end do
341 end do
342 end do
343 end do
344 !temp = 0
345 !do i = 0, this%pe_size-1
346 ! temp = temp + glob_ids(i)%size()
347 !end do
348 !print *, 'size of globids', temp, this%pe_rank
349 call send_recv_data(this, recv_ids, n_recv, glob_ids, n_send)
350
351 !temp = 0
352 !do i = 0, this%pe_size-1
353 ! temp = temp + recv_ids(i)%size()
354 !end do
355 !print *, 'size of globids', temp, this%pe_rank
356 !Buffers are likely way too large for other calls
357 do i = 0, this%pe_size-1
358 if (allocated(this%recv_buf(i)%data)) then
359 deallocate(this%recv_buf(i)%data)
360 end if
361 if (allocated(this%send_buf(i)%data)) then
362 deallocate(this%send_buf(i)%data)
363 end if
364 this%send_buf(i)%size = 0
365 this%recv_buf(i)%size = 0
366 end do
367
368
369 call mpi_barrier(this%comm, ierr)
370 do i = 0, this%pe_size-1
371 if (n_recv(i) .gt. 0) then
372 glb_ids => recv_ids(i)%array()
373 do j = 1, n_recv(i)
374 glob_id = glb_ids(j)
375 loc_id = glob_id - int(this%pe_rank,i8) * &
376 int(this%n_boxes_per_pe,i8)
377 if (loc_id .ge. this%n_boxes_per_pe .or. loc_id < 0) then
378 call neko_warning('loc_id out of bounds')
379 end if
380 call this%pe_map(int(loc_id))%push(i)
381 end do
382 end if
383 end do
384
385 if (allocated(recv_ids)) then
386 do i = 0, this%pe_size-1
387 call recv_ids(i)%free()
388 end do
389 deallocate(recv_ids)
390 end if
391 if (allocated(glob_ids)) then
392 do i = 0, this%pe_size-1
393 call glob_ids(i)%free()
394 end do
395 deallocate(glob_ids)
396 end if
397 if (allocated(n_recv)) deallocate(n_recv)
398 if (allocated(n_send)) deallocate(n_send)
399 end subroutine cartesian_pe_finder_init
400
401 subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
402 class(cartesian_pe_finder_t), intent(inout) :: this
403 type(point_t), intent(in) :: my_point
404 type(stack_i4_t), intent(inout) :: pe_candidates
405
406 call neko_error('cartesian_pe_finder_find not implemented')
407 end subroutine cartesian_pe_finder_find
408
409 function get_global_idx(this, x, y, z) result(global_box_id)
410 class(cartesian_pe_finder_t), intent(in) :: this
411 real(kind=rp), intent(in) :: x, y, z
412 integer(kind=i8) :: global_box_id(3)
413 integer(kind=i8) :: pe_size, n_boxes
414
415 pe_size = this%pe_size
416 n_boxes = this%n_boxes
417
418
419 global_box_id(1) = int((x - this%min_x_global) / this%res_x, i8)
420 global_box_id(2) = int((y - this%min_y_global) / this%res_y, i8)
421 global_box_id(3) = int((z - this%min_z_global) / this%res_z, i8)
422 end function get_global_idx
423
424 function get_pe_idx(this, global_idx) result(pe_id)
425 class(cartesian_pe_finder_t), intent(in) :: this
426 integer(kind=i8), intent(in) :: global_idx
427 integer :: pe_id
428 !Get x id and then divide by the number of x boxes per rank to get the correct pe id
429 pe_id = global_idx / int(this%n_boxes_per_pe, i8)
430 end function get_pe_idx
431
432
435 class(cartesian_pe_finder_t), intent(inout) :: this
436 integer :: i
437
438 if (allocated(this%send_buf)) then
439 do i = 0, this%pe_size-1
440 call this%send_buf(i)%free()
441 end do
442 deallocate(this%send_buf)
443 end if
444 if (allocated(this%recv_buf)) then
445 do i = 0, this%pe_size-1
446 call this%recv_buf(i)%free()
447 end do
448 deallocate(this%recv_buf)
449 end if
450 if (allocated(this%pe_map)) then
451 do i = 0, this%n_boxes_per_pe-1
452 call this%pe_map(i)%free()
453 end do
454 deallocate(this%pe_map)
455 end if
456
457 end subroutine cartesian_pe_finder_free
458
459 subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
460 class(cartesian_pe_finder_t), intent(inout) :: this
461 integer, intent(in) :: n_points
462 real(kind=rp), intent(in) :: points(3,n_points)
463 type(stack_i4_t), intent(inout) :: points_at_pe(0:(this%pe_size-1))
464 integer, intent(inout) :: n_points_pe(0:(this%pe_size-1))
465 integer :: i, j, k
466 integer(kind=i8) :: glob_id(3)
467 integer(kind=i8) :: pe_id
468 integer(kind=i8) :: lin_glob_id
469 integer(kind=i8) :: loc_id
470 type(stack_i8_t), allocatable :: work_pe_ids(:), work_pt_ids(:)
471 type(stack_i8_t), allocatable :: temp_pe_ids(:), temp_pt_ids(:)
472 integer, allocatable :: n_work_ids(:), n_temp_ids(:)
473 integer(i8), pointer :: ids(:)
474 integer(i8), pointer :: pt_id(:)
475 integer, pointer :: pe_cands(:)
476 integer(i8), pointer :: pe_cands8(:)
477 integer(i8), pointer :: pt_ids(:)
478 integer :: ierr
479
480 allocate(work_pe_ids(0:this%pe_size-1))
481 allocate(work_pt_ids(0:this%pe_size-1))
482 allocate(temp_pe_ids(0:this%pe_size-1))
483 allocate(temp_pt_ids(0:this%pe_size-1))
484 allocate(n_temp_ids(0:this%pe_size-1))
485 allocate(n_work_ids(0:this%pe_size-1))
486
487 do i = 0, this%pe_size-1
488 n_work_ids(i) = 0
489 n_temp_ids(i) = 0
490 call work_pt_ids(i)%init()
491 call work_pe_ids(i)%init()
492 call temp_pe_ids(i)%init()
493 call temp_pt_ids(i)%init()
494 end do
495
496 do i = 0, this%pe_size-1
497 call points_at_pe(i)%clear()
498 n_points_pe(i) = 0
499 end do
500
501 ! Compute global ids for the points
502 ! and the pe id for each point
503 n_work_ids = 0
504 do i = 1, n_points
505 glob_id = get_global_idx(this, points(1,i), points(2,i), points(3,i))
506 lin_glob_id = glob_id(1) + &
507 glob_id(2)*this%n_boxes + &
508 glob_id(3)*this%n_boxes**2
509 pe_id = get_pe_idx(this, lin_glob_id)
510 if (glob_id(1) .ge. 0 .and. glob_id(1) .lt. this%n_boxes .and. &
511 glob_id(2) .ge. 0 .and. glob_id(2) .lt. this%n_boxes .and. &
512 glob_id(3) .ge. 0 .and. glob_id(3) .lt. this%n_boxes) then
513 call work_pe_ids(pe_id)%push(lin_glob_id)
514 call work_pt_ids(pe_id)%push(int(i,i8))
515 n_work_ids(pe_id) = n_work_ids(pe_id) + 1
516 else
517 print *, 'Point found outside domain:', points(1,i), &
518 points(2,i), points(3,i), &
519 'Computed global id:', lin_glob_id, &
520 'Global box id (x,y,z):', glob_id(1), glob_id(2), glob_id(3)
521 end if
522 end do
523
524
525 ! Send the global ids to the correct pe
526 ! and get the global ids from the other pes with points I own
527 ! Also send point ids to the other pes
528 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
529 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
530 call mpi_barrier(this%comm, ierr)
531 ! Get the local ids for the points I own
532 ! and the pe candidates for the points I own
533 n_work_ids = 0
534 do i = 0, this%pe_size-1
535 call work_pe_ids(i)%clear()
536 call work_pt_ids(i)%clear()
537 end do
538
539 do i =0 , this%pe_size-1
540
541 if (n_temp_ids(i) .gt. 0) then
542 ids => temp_pe_ids(i)%array()
543 pt_ids => temp_pt_ids(i)%array()
544 do j = 1, n_temp_ids(i)
545 loc_id = ids(j) - int(this%pe_rank,i8) * &
546 int(this%n_boxes_per_pe,i8)
547 pe_cands => this%pe_map(int(loc_id))%array()
548 do k = 1, this%pe_map(int(loc_id))%size()
549 call work_pe_ids(i)%push(int(pe_cands(k),i8))
550 call work_pt_ids(i)%push(pt_ids(j))
551 n_work_ids(i) = n_work_ids(i) + 1
552 end do
553 if (this%pe_map(int(loc_id))%size() .lt. 1) then
554 print *, 'No PE candidates found for point:', &
555 points(1,pt_ids(j)), &
556 points(2,pt_ids(j)), points(3,pt_ids(j))
557 end if
558 end do
559 end if
560 end do
561 ! pe candidates found for the points I am responsible for
562 ! Now i need to send the data to the other pes
563 ! and get the candidates from the other pes for my own points
564 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
565 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
566 ! Gotten candidates for my points
567 ! Now I organize the candidates for the points I own
568 n_points_pe = 0
569 do i = 0, this%pe_size-1
570 if (n_temp_ids(i) .gt. 0) then
571 pe_cands8 => temp_pe_ids(i)%array()
572 pt_ids => temp_pt_ids(i)%array()
573 do j = 1, n_temp_ids(i)
574 pe_id = pe_cands8(j)
575 call points_at_pe(pe_id)%push(int(pt_ids(j)))
576 n_points_pe(pe_id) = n_points_pe(pe_id) + 1
577 end do
578 end if
579 end do
580
581 if (allocated(work_pe_ids)) then
582 do i = 0, this%pe_size-1
583 call work_pe_ids(i)%free()
584 end do
585 deallocate(work_pe_ids)
586 end if
587 if (allocated(temp_pe_ids)) then
588 do i = 0, this%pe_size-1
589 call temp_pe_ids(i)%free()
590 end do
591 deallocate(temp_pe_ids)
592 end if
593 if (allocated(temp_pt_ids)) then
594 do i = 0, this%pe_size-1
595 call temp_pt_ids(i)%free()
596 end do
597 deallocate(temp_pt_ids)
598 end if
599 if (allocated(work_pt_ids)) then
600 do i = 0, this%pe_size-1
601 call work_pt_ids(i)%free()
602 end do
603 deallocate(work_pt_ids)
604 end if
605
606 if (allocated(n_work_ids)) then
607 deallocate(n_work_ids)
608 end if
609
610 if (allocated(n_temp_ids)) then
611 deallocate(n_temp_ids)
612 end if
613 end subroutine cartesian_pe_finder_find_batch
614
615
616 subroutine send_recv_data(this, recv_values, n_recv_values, &
617 send_values, n_send_values)
618 class(cartesian_pe_finder_t), intent(inout) :: this
619 type(stack_i8_t), intent(inout) :: recv_values(0:this%pe_size-1)
620 type(stack_i8_t), intent(inout) :: send_values(0:this%pe_size-1)
621 integer, intent(inout) :: n_recv_values(0:this%pe_size-1)
622 integer, intent(inout) :: n_send_values(0:this%pe_size-1)
623 integer :: i, j, ierr
624 integer(i8) :: idx
625 integer(i8) , pointer :: sp(:)
626
627 call mpi_alltoall(n_send_values, 1, mpi_integer, &
628 n_recv_values, 1, mpi_integer, this%comm, ierr)
629
630 do i = 0, this%pe_size-1
631 if (n_recv_values(i) .gt. 0) then
632 if (this%recv_buf(i)%size .lt. n_recv_values(i)) then
633 if (allocated(this%recv_buf(i)%data)) deallocate(this%recv_buf(i)%data)
634 allocate(this%recv_buf(i)%data(n_recv_values(i)))
635 this%recv_buf(i)%size = n_recv_values(i)
636 end if
637 call mpi_irecv(this%recv_buf(i)%data, n_recv_values(i), mpi_integer8, &
638 i, 0, this%comm, this%recv_buf(i)%request, ierr)
639 end if
640 end do
641
642 do i = 0, this%pe_size-1
643 if (n_send_values(i) .gt. 0) then
644 if (this%send_buf(i)%size .lt. n_send_values(i)) then
645 if (allocated(this%send_buf(i)%data)) deallocate(this%send_buf(i)%data)
646 allocate(this%send_buf(i)%data(n_send_values(i)))
647 this%send_buf(i)%size = n_send_values(i)
648 end if
649 ! Copy the data to the send buffer
650 sp => send_values(i)%array()
651 do j = 1, n_send_values(i)
652 this%send_buf(i)%data(j) = sp(j)
653 end do
654 call mpi_isend(this%send_buf(i)%data, n_send_values(i), mpi_integer8, &
655 i, 0, this%comm, this%send_buf(i)%request, ierr)
656 end if
657 end do
658
659 do i = 0, this%pe_size-1
660 call recv_values(i)%clear()
661 end do
662
663 do i = 0, this%pe_size-1
664 if (n_recv_values(i) .gt. 0) then
665 call mpi_wait(this%recv_buf(i)%request, this%recv_buf(i)%status, ierr)
666 do j = 1, n_recv_values(i)
667 idx = this%recv_buf(i)%data(j)
668 call recv_values(i)%push(idx)
669 end do
670 end if
671 end do
672 do i = 0, this%pe_size-1
673 if (n_send_values(i) .gt. 0) then
674 call mpi_wait(this%send_buf(i)%request, this%send_buf(i)%status, ierr)
675 end if
676 end do
677
678
679 end subroutine send_recv_data
680end module cartesian_pe_finder
double real
Implements cartesian_pe_finder given a dofmap.
subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
integer, parameter, public glob_map_size
Minimum number of total boxes in the aabb tree.
subroutine cartesian_pe_finder_init(this, x, y, z, nelv, xh, comm, n_boxes, padding)
Initialize the global interpolation object on a set of coordinates.
subroutine i8_mpi_free(this)
Destructor for i8_mpi_t.
subroutine send_recv_data(this, recv_values, n_recv_values, send_values, n_send_values)
subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
integer function get_pe_idx(this, global_idx)
subroutine cartesian_pe_finder_free(this)
Destructor.
integer(kind=i8) function, dimension(3) get_global_idx(this, x, y, z)
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_size
MPI size of communicator.
Definition comm.F90:59
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements pe_finder given a dofmap.
Definition pe_finder.f90:35
Implements a point.
Definition point.f90:35
Defines a function space.
Definition space.f90:34
Implements a dynamic stack ADT.
Definition stack.f90:35
Implements a n-tuple.
Definition tuple.f90:34
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Implements global interpolation for arbitrary points in the domain.
Integer*8 based hash table.
Definition htable.f90:92
Implements global interpolation for arbitrary points in the domain.
Definition pe_finder.f90:45
A point in with coordinates .
Definition point.f90:43
The function space for the SEM solution fields.
Definition space.f90:63
Integer based stack.
Definition stack.f90:63
Integer*8 based stack.
Definition stack.f90:70
Integer based 2-tuple.
Definition tuple.f90:51
#define max(a, b)
Definition tensor.cu:40