Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cartesian_pe_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
37 use num_types, only: rp, dp, xp, i8
39 use space, only: space_t
40 use pe_finder, only: pe_finder_t
41 use stack, only: stack_i4_t, stack_i8_t
43 use tuple, only: tuple_i4_t
44 use htable, only: htable_i8_t
45 use point, only: point_t
47 use mpi_f08, only: mpi_max, mpi_allreduce, mpi_comm, mpi_comm_rank, &
48 mpi_comm_size, mpi_wtime, mpi_integer, mpi_integer8, &
49 mpi_min, mpi_sum, mpi_irecv, mpi_isend, mpi_wait, &
50 mpi_exscan, mpi_request, mpi_status, &
51 mpi_alltoall, mpi_in_place, mpi_barrier
52 implicit none
53 private
54
56 integer, public, parameter :: glob_map_size = 4096
57
58 type, private :: i8_mpi_t
60 type(mpi_status) :: status
62 type(mpi_request) :: request
65 logical :: flag
67 real(kind=dp), allocatable :: data(:)
68 integer :: size = 0
69 end type i8_mpi_t
70
71
72
74 type, public, extends(pe_finder_t) :: cartesian_pe_finder_t
76 real(kind=dp) :: padding
78 integer(kind=i8) :: glob_n_boxes
79 integer(kind=i8) :: n_boxes
80 !Number of local boxes in x direction
81 integer :: n_boxes_per_pe
82 integer :: nelv
83 integer :: offset_el
84 real(kind=rp) :: max_x_global, max_y_global, max_z_global
85 real(kind=rp) :: min_x_global, min_y_global, min_z_global
86
87 ! Resolution of the boxes
88 real(kind=xp) :: res_x, res_y, res_z
89
90 type(i8_mpi_t), allocatable :: recv_buf(:)
91 type(i8_mpi_t), allocatable :: send_buf(:)
92 type(stack_i4_t), allocatable :: pe_map(:)
93
94 contains
95 procedure, pass(this) :: init => cartesian_pe_finder_init
96 procedure, pass(this) :: free => cartesian_pe_finder_free
97 procedure, pass(this) :: find => cartesian_pe_finder_find
98 procedure, pass(this) :: find_batch => cartesian_pe_finder_find_batch
99
100 end type cartesian_pe_finder_t
101
102contains
103
113 subroutine cartesian_pe_finder_init(this, x, y, z, nelv, Xh, comm, n_boxes, padding)
114 class(cartesian_pe_finder_t), intent(inout) :: this
115 real(kind=rp), intent(in), target :: x(:)
116 real(kind=rp), intent(in), target :: y(:)
117 real(kind=rp), intent(in), target :: z(:)
118 integer, intent(in) :: nelv
119 type(mpi_comm), intent(in), optional :: comm
120 type(space_t), intent(in), target :: Xh
121 integer, intent(in) :: n_boxes
122 real(kind=dp), intent(in) :: padding
123 integer :: ierr
124 integer :: i, j, k, e, i2, j2, k2
125 integer :: pe_id, lin_idx
126 integer :: lxyz, lx2
127 integer(kind=i8) :: glob_id, loc_id
128 integer(kind=i8) :: min_id(3), max_id(3)
129 real(kind=rp) :: center_x, center_y, center_z
130 type(stack_i8_t), allocatable :: glob_ids(:), recv_ids(:)
131 integer(i8), pointer :: glb_ids(:)
132 integer(kind=i8) :: htable_data, temp ! We just use it as a set
133 integer, allocatable :: n_recv(:), n_send(:)
134 type(htable_i8_t) :: marked_box
135 real(kind=rp) :: min_bb_x, max_bb_x
136 real(kind=rp) :: min_bb_y, max_bb_y
137 real(kind=rp) :: min_bb_z, max_bb_z
138 real(kind=rp) :: el_x(xh%lxyz), el_y(xh%lxyz), el_z(xh%lxyz)
139
140 call this%free()
141 this%comm = comm
142 this%padding = padding
143
144 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
145 call mpi_comm_size(this%comm, this%pe_size, ierr)
146
147 this%glob_n_boxes = int(n_boxes,i8)**3
148 this%n_boxes = n_boxes
149 this%nelv = nelv
150 this%n_boxes_per_pe = (this%glob_n_boxes+int(this%pe_size-1,i8))/int(this%pe_size,i8)
151
152 allocate(this%send_buf(0:this%pe_size-1))
153 allocate(this%recv_buf(0:this%pe_size-1))
154 allocate(this%pe_map(0:this%n_boxes_per_pe-1))
155 do i = 0, this%n_boxes_per_pe-1
156 call this%pe_map(i)%init()
157 end do
158 call mpi_exscan(this%nelv, this%offset_el, 1, &
159 mpi_integer, mpi_sum, this%comm, ierr)
160 if (this%nelv .gt. 0) then
161 this%max_x_global = maxval(x(1:nelv*xh%lxyz))
162 this%max_y_global = maxval(y(1:nelv*xh%lxyz))
163 this%max_z_global = maxval(z(1:nelv*xh%lxyz))
164 this%min_x_global = minval(x(1:nelv*xh%lxyz))
165 this%min_y_global = minval(y(1:nelv*xh%lxyz))
166 this%min_z_global = minval(z(1:nelv*xh%lxyz))
167 else
168 this%max_x_global = -1e20
169 this%max_y_global = -1e20
170 this%max_z_global = -1e20
171 this%min_x_global = 1e20
172 this%min_y_global = 1e20
173 this%min_z_global = 1e20
174 end if
175
176
177 call mpi_allreduce(mpi_in_place, this%max_x_global, 1, mpi_real_precision, &
178 mpi_max, this%comm, ierr)
179 call mpi_allreduce(mpi_in_place, this%max_y_global, 1, mpi_real_precision, &
180 mpi_max, this%comm, ierr)
181 call mpi_allreduce(mpi_in_place, this%max_z_global, 1, mpi_real_precision, &
182 mpi_max, this%comm, ierr)
183 call mpi_allreduce(mpi_in_place, this%min_x_global, 1, mpi_real_precision, &
184 mpi_min, this%comm, ierr)
185 call mpi_allreduce(mpi_in_place, this%min_y_global, 1, mpi_real_precision, &
186 mpi_min, this%comm, ierr)
187 call mpi_allreduce(mpi_in_place, this%min_z_global, 1, mpi_real_precision, &
188 mpi_min, this%comm, ierr)
189
190 center_x = (this%max_x_global + this%min_x_global) / 2.0_dp
191 center_y = (this%max_y_global + this%min_y_global) / 2.0_dp
192 center_z = (this%max_z_global + this%min_z_global) / 2.0_dp
193
194 this%max_x_global = this%max_x_global - center_x
195 this%max_y_global = this%max_y_global - center_y
196 this%max_z_global = this%max_z_global - center_z
197 this%min_x_global = this%min_x_global - center_x
198 this%min_y_global = this%min_y_global - center_y
199 this%min_z_global = this%min_z_global - center_z
200 this%max_x_global = this%max_x_global*(1.0_xp+this%padding) + center_x
201 this%max_y_global = this%max_y_global*(1.0_xp+this%padding) + center_y
202 this%max_z_global = this%max_z_global*(1.0_xp+this%padding) + center_z
203 this%min_x_global = this%min_x_global*(1.0_xp+this%padding) + center_x
204 this%min_y_global = this%min_y_global*(1.0_xp+this%padding) + center_y
205 this%min_z_global = this%min_z_global*(1.0_xp+this%padding) + center_z
206
207
208 this%res_x = (this%max_x_global - this%min_x_global) / real(this%n_boxes, xp)
209 this%res_y = (this%max_y_global - this%min_y_global) / real(this%n_boxes, xp)
210 this%res_z = (this%max_z_global - this%min_z_global) / real(this%n_boxes, xp)
211 if (allocated(recv_ids)) then
212 do i = 0, this%pe_size-1
213 call recv_ids(i)%free()
214 end do
215 deallocate(recv_ids)
216 end if
217 if (allocated(glob_ids)) then
218 do i = 0, this%pe_size-1
219 call glob_ids(i)%free()
220 end do
221 deallocate(glob_ids)
222 end if
223 if (allocated(n_recv)) deallocate(n_recv)
224 if (allocated(n_send)) deallocate(n_send)
225 allocate(n_recv(0:this%pe_size-1))
226 allocate(n_send(0:this%pe_size-1))
227 allocate(recv_ids(0:this%pe_size-1))
228 allocate(glob_ids(0:this%pe_size-1))
229 do i = 0, this%pe_size-1
230 call recv_ids(i)%init()
231 call glob_ids(i)%init()
232 if (allocated(this%recv_buf(i)%data)) then
233 deallocate(this%recv_buf(i)%data)
234 end if
235 if (allocated(this%send_buf(i)%data)) then
236 deallocate(this%send_buf(i)%data)
237 end if
238 this%send_buf(i)%size = 0
239 this%recv_buf(i)%size = 0
240 end do
241 n_recv = 0
242 n_send = 0
243
244 lxyz = xh%lxyz
245 call marked_box%init(this%nelv*lxyz,htable_data)
246
247 do e = 1, this%nelv
248 !move it to do scaling
249 lx2 = xh%lx/2
250 if (mod(xh%lx,2) .eq. 0) then
251 lin_idx = linear_index(lx2, lx2, lx2, e, xh%lx, xh%lx, xh%lx)
252 center_x = x(lin_idx)
253 center_y = y(lin_idx)
254 center_z = z(lin_idx)
255 else
256 center_x = 0d0
257 center_y = 0d0
258 center_z = 0d0
259 do i = lx2, lx2+1
260 do j = lx2, lx2 + 1
261 do k = lx2, lx2 + 1
262 lin_idx = linear_index(i, j, k, e, xh%lx, xh%lx, xh%lx)
263 center_x = center_x + x(lin_idx)
264 center_y = center_y + y(lin_idx)
265 center_z = center_z + z(lin_idx)
266 end do
267 end do
268 end do
269 center_x = center_x / 8.0_xp
270 center_y = center_y / 8.0_xp
271 center_z = center_z / 8.0_xp
272 end if
273
274 !Maybe this is a stupid way to do it
275
276 el_x = x((e-1)*lxyz+1:e*lxyz) - center_x
277 el_y = y((e-1)*lxyz+1:e*lxyz) - center_y
278 el_z = z((e-1)*lxyz+1:e*lxyz) - center_z
279 el_x = el_x * (1.0_rp+padding) + center_x
280 el_y = el_y * (1.0_rp+padding) + center_y
281 el_z = el_z * (1.0_rp+padding) + center_z
282 !Padded and ready
283
284 !Now we go the bounding boxes of all subboxes in the element
285 do i = 1, xh%lx - 1
286 do j = 1, xh%ly - 1
287 do k = 1, xh%lz - 1
288 lin_idx = linear_index(i, j, k, 1, xh%lx, xh%lx, xh%lx)
289 max_bb_x = el_x(lin_idx)
290 min_bb_x = el_x(lin_idx)
291 max_bb_y = el_y(lin_idx)
292 min_bb_y = el_y(lin_idx)
293 max_bb_z = el_z(lin_idx)
294 min_bb_z = el_z(lin_idx)
295 do i2 = 0, 1
296 do j2 = 0, 1
297 do k2 = 0, 1
298 lin_idx = linear_index(i+i2,j+j2,k+k2, 1, xh%lx, xh%lx, xh%lx)
299 max_bb_x = max(max_bb_x, el_x(lin_idx))
300 min_bb_x = min(min_bb_x, el_x(lin_idx))
301 max_bb_y = max(max_bb_y, el_y(lin_idx))
302 min_bb_y = min(min_bb_y, el_y(lin_idx))
303 max_bb_z = max(max_bb_z, el_z(lin_idx))
304 min_bb_z = min(min_bb_z, el_z(lin_idx))
305 end do
306 end do
307 end do
308
309
310 min_id = get_global_idx(this, min_bb_x, min_bb_y, min_bb_z)
311 max_id = get_global_idx(this, max_bb_x, max_bb_y, max_bb_z)
312 do i2 = min_id(1), max_id(1)
313 do j2 = min_id(2), max_id(2)
314 do k2 = min_id(3), max_id(3)
315 if (i2 .ge. 0 .and. i2 .lt. this%n_boxes .and. &
316 j2 .ge. 0 .and. j2 .lt. this%n_boxes .and. &
317 k2 .ge. 0 .and. k2 .lt. this%n_boxes) then
318 glob_id = i2 + j2*this%n_boxes + k2*this%n_boxes**2
319 pe_id = get_pe_idx(this, glob_id)
320 if (marked_box%get(glob_id,htable_data) .ne. 0) then
321 call glob_ids(pe_id)%push(glob_id)
322 n_send(pe_id) = n_send(pe_id) + 1
323 call marked_box%set(glob_id, htable_data)
324 end if
325 end if
326 end do
327 end do
328 end do
329 end do
330 end do
331 end do
332 end do
333 !temp = 0
334 !do i = 0, this%pe_size-1
335 ! temp = temp + glob_ids(i)%size()
336 !end do
337 !print *, 'size of globids', temp, this%pe_rank
338 call send_recv_data(this, recv_ids, n_recv, glob_ids, n_send)
339
340 !temp = 0
341 !do i = 0, this%pe_size-1
342 ! temp = temp + recv_ids(i)%size()
343 !end do
344 !print *, 'size of globids', temp, this%pe_rank
345 !Buffers are likely way too large for other calls
346 do i = 0, this%pe_size-1
347 if (allocated(this%recv_buf(i)%data)) then
348 deallocate(this%recv_buf(i)%data)
349 end if
350 if (allocated(this%send_buf(i)%data)) then
351 deallocate(this%send_buf(i)%data)
352 end if
353 this%send_buf(i)%size = 0
354 this%recv_buf(i)%size = 0
355 end do
356
357
358 call mpi_barrier(this%comm, ierr)
359 do i = 0, this%pe_size-1
360 if (n_recv(i) .gt. 0) then
361 glb_ids => recv_ids(i)%array()
362 do j = 1, n_recv(i)
363 glob_id = glb_ids(j)
364 loc_id = glob_id - int(this%pe_rank,i8) * &
365 int(this%n_boxes_per_pe,i8)
366 if (loc_id .ge. this%n_boxes_per_pe .or. loc_id < 0) then
367 call neko_warning('loc_id out of bounds')
368 end if
369 call this%pe_map(int(loc_id))%push(i)
370 end do
371 end if
372 end do
373
374 if (allocated(recv_ids)) then
375 do i = 0, this%pe_size-1
376 call recv_ids(i)%free()
377 end do
378 deallocate(recv_ids)
379 end if
380 if (allocated(glob_ids)) then
381 do i = 0, this%pe_size-1
382 call glob_ids(i)%free()
383 end do
384 deallocate(glob_ids)
385 end if
386 if (allocated(n_recv)) deallocate(n_recv)
387 if (allocated(n_send)) deallocate(n_send)
388 end subroutine cartesian_pe_finder_init
389
390 subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
391 class(cartesian_pe_finder_t), intent(inout) :: this
392 type(point_t), intent(in) :: my_point
393 type(stack_i4_t), intent(inout) :: pe_candidates
394
395 call neko_error('cartesian_pe_finder_find not implemented')
396 end subroutine cartesian_pe_finder_find
397
398 function get_global_idx(this, x, y, z) result(global_box_id)
399 class(cartesian_pe_finder_t), intent(in) :: this
400 real(kind=rp), intent(in) :: x, y, z
401 integer(kind=i8) :: global_box_id(3)
402 integer(kind=i8) :: pe_size, n_boxes
403
404 pe_size = this%pe_size
405 n_boxes = this%n_boxes
406
407
408 global_box_id(1) = int((x - this%min_x_global) / this%res_x, i8)
409 global_box_id(2) = int((y - this%min_y_global) / this%res_y, i8)
410 global_box_id(3) = int((z - this%min_z_global) / this%res_z, i8)
411 end function get_global_idx
412
413 function get_pe_idx(this, global_idx) result(pe_id)
414 class(cartesian_pe_finder_t), intent(in) :: this
415 integer(kind=i8), intent(in) :: global_idx
416 integer :: pe_id
417 !Get x id and then divide by the number of x boxes per rank to get the correct pe id
418 pe_id = global_idx / int(this%n_boxes_per_pe, i8)
419 end function get_pe_idx
420
421
424 class(cartesian_pe_finder_t), intent(inout) :: this
425 integer :: i
426
427 if (allocated(this%send_buf)) then
428 do i = 0, this%pe_size-1
429 if (allocated(this%send_buf(i)%data)) then
430 deallocate(this%send_buf(i)%data)
431 end if
432 end do
433 deallocate(this%send_buf)
434 end if
435 if (allocated(this%recv_buf)) then
436 do i = 0, this%pe_size-1
437 if (allocated(this%recv_buf(i)%data)) then
438 deallocate(this%recv_buf(i)%data)
439 end if
440 end do
441 deallocate(this%recv_buf)
442 end if
443 if (allocated(this%pe_map)) then
444 do i = 0, this%n_boxes_per_pe-1
445 call this%pe_map(i)%free()
446 end do
447 deallocate(this%pe_map)
448 end if
449
450 end subroutine cartesian_pe_finder_free
451
452 subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
453 class(cartesian_pe_finder_t), intent(inout) :: this
454 integer, intent(in) :: n_points
455 real(kind=rp), intent(in) :: points(3,n_points)
456 type(stack_i4_t), intent(inout) :: points_at_pe(0:(this%pe_size-1))
457 integer, intent(inout) :: n_points_pe(0:(this%pe_size-1))
458 integer :: i, j, k
459 integer(kind=i8) :: glob_id(3)
460 integer(kind=i8) :: pe_id
461 integer(kind=i8) :: lin_glob_id
462 integer(kind=i8) :: loc_id
463 type(stack_i8_t), allocatable :: work_pe_ids(:), work_pt_ids(:)
464 type(stack_i8_t), allocatable :: temp_pe_ids(:), temp_pt_ids(:)
465 integer, allocatable :: n_work_ids(:), n_temp_ids(:)
466 integer(i8), pointer :: ids(:)
467 integer(i8), pointer :: pt_id(:)
468 integer, pointer :: pe_cands(:)
469 integer(i8), pointer :: pe_cands8(:)
470 integer(i8), pointer :: pt_ids(:)
471 integer :: ierr
472
473 allocate(work_pe_ids(0:this%pe_size-1))
474 allocate(work_pt_ids(0:this%pe_size-1))
475 allocate(temp_pe_ids(0:this%pe_size-1))
476 allocate(temp_pt_ids(0:this%pe_size-1))
477 allocate(n_temp_ids(0:this%pe_size-1))
478 allocate(n_work_ids(0:this%pe_size-1))
479
480 do i = 0, this%pe_size-1
481 n_work_ids(i) = 0
482 n_temp_ids(i) = 0
483 call work_pt_ids(i)%init()
484 call work_pe_ids(i)%init()
485 call temp_pe_ids(i)%init()
486 call temp_pt_ids(i)%init()
487 end do
488
489 do i = 0, this%pe_size-1
490 call points_at_pe(i)%clear()
491 n_points_pe(i) = 0
492 end do
493
494 ! Compute global ids for the points
495 ! and the pe id for each point
496 n_work_ids = 0
497 do i = 1, n_points
498 glob_id = get_global_idx(this, points(1,i), points(2,i), points(3,i))
499 lin_glob_id = glob_id(1) + &
500 glob_id(2)*this%n_boxes + &
501 glob_id(3)*this%n_boxes**2
502 pe_id = get_pe_idx(this, lin_glob_id)
503 if (glob_id(1) .ge. 0 .and. glob_id(1) .lt. this%n_boxes .and. &
504 glob_id(2) .ge. 0 .and. glob_id(2) .lt. this%n_boxes .and. &
505 glob_id(3) .ge. 0 .and. glob_id(3) .lt. this%n_boxes) then
506 call work_pe_ids(pe_id)%push(lin_glob_id)
507 call work_pt_ids(pe_id)%push(int(i,i8))
508 n_work_ids(pe_id) = n_work_ids(pe_id) + 1
509 else
510 print *, 'Point found outside domain:', points(1,i), &
511 points(2,i), points(3,i), &
512 'Computed global id:', lin_glob_id, &
513 'Global box id (x,y,z):', glob_id(1), glob_id(2), glob_id(3)
514 end if
515 end do
516
517
518 ! Send the global ids to the correct pe
519 ! and get the global ids from the other pes with points I own
520 ! Also send point ids to the other pes
521 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
522 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
523 call mpi_barrier(this%comm, ierr)
524 ! Get the local ids for the points I own
525 ! and the pe candidates for the points I own
526 n_work_ids = 0
527 do i = 0, this%pe_size-1
528 call work_pe_ids(i)%clear()
529 call work_pt_ids(i)%clear()
530 end do
531
532 do i =0 , this%pe_size-1
533
534 if (n_temp_ids(i) .gt. 0) then
535 ids => temp_pe_ids(i)%array()
536 pt_ids => temp_pt_ids(i)%array()
537 do j = 1, n_temp_ids(i)
538 loc_id = ids(j) - int(this%pe_rank,i8) * &
539 int(this%n_boxes_per_pe,i8)
540 pe_cands => this%pe_map(int(loc_id))%array()
541 do k = 1, this%pe_map(int(loc_id))%size()
542 call work_pe_ids(i)%push(int(pe_cands(k),i8))
543 call work_pt_ids(i)%push(pt_ids(j))
544 n_work_ids(i) = n_work_ids(i) + 1
545 end do
546 if (this%pe_map(int(loc_id))%size() .lt. 1) then
547 print *, 'No PE candidates found for point:', &
548 points(1,pt_ids(j)), &
549 points(2,pt_ids(j)), points(3,pt_ids(j))
550 end if
551 end do
552 end if
553 end do
554 ! pe candidates found for the points I am responsible for
555 ! Now i need to send the data to the other pes
556 ! and get the candidates from the other pes for my own points
557 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
558 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
559 ! Gotten candidates for my points
560 ! Now I organize the candidates for the points I own
561 n_points_pe = 0
562 do i = 0, this%pe_size-1
563 if (n_temp_ids(i) .gt. 0) then
564 pe_cands8 => temp_pe_ids(i)%array()
565 pt_ids => temp_pt_ids(i)%array()
566 do j = 1, n_temp_ids(i)
567 pe_id = pe_cands8(j)
568 call points_at_pe(pe_id)%push(int(pt_ids(j)))
569 n_points_pe(pe_id) = n_points_pe(pe_id) + 1
570 end do
571 end if
572 end do
573
574 if (allocated(work_pe_ids)) then
575 do i = 0, this%pe_size-1
576 call work_pe_ids(i)%free()
577 end do
578 deallocate(work_pe_ids)
579 end if
580 if (allocated(temp_pe_ids)) then
581 do i = 0, this%pe_size-1
582 call temp_pe_ids(i)%free()
583 end do
584 deallocate(temp_pe_ids)
585 end if
586 if (allocated(temp_pt_ids)) then
587 do i = 0, this%pe_size-1
588 call temp_pt_ids(i)%free()
589 end do
590 deallocate(temp_pt_ids)
591 end if
592 if (allocated(work_pt_ids)) then
593 do i = 0, this%pe_size-1
594 call work_pt_ids(i)%free()
595 end do
596 deallocate(work_pt_ids)
597 end if
598
599 if (allocated(n_work_ids)) then
600 deallocate(n_work_ids)
601 end if
602
603 if (allocated(n_temp_ids)) then
604 deallocate(n_temp_ids)
605 end if
606 end subroutine cartesian_pe_finder_find_batch
607
608
609 subroutine send_recv_data(this, recv_values, n_recv_values, &
610 send_values, n_send_values)
611 class(cartesian_pe_finder_t), intent(inout) :: this
612 type(stack_i8_t), intent(inout) :: recv_values(0:this%pe_size-1)
613 type(stack_i8_t), intent(inout) :: send_values(0:this%pe_size-1)
614 integer, intent(inout) :: n_recv_values(0:this%pe_size-1)
615 integer, intent(inout) :: n_send_values(0:this%pe_size-1)
616 integer :: i, j, ierr
617 integer(i8) :: idx
618 integer(i8) , pointer :: sp(:)
619
620 call mpi_alltoall(n_send_values, 1, mpi_integer, &
621 n_recv_values, 1, mpi_integer, this%comm, ierr)
622
623 do i = 0, this%pe_size-1
624 if (n_recv_values(i) .gt. 0) then
625 if (this%recv_buf(i)%size .lt. n_recv_values(i)) then
626 if (allocated(this%recv_buf(i)%data)) deallocate(this%recv_buf(i)%data)
627 allocate(this%recv_buf(i)%data(n_recv_values(i)))
628 this%recv_buf(i)%size = n_recv_values(i)
629 end if
630 call mpi_irecv(this%recv_buf(i)%data, n_recv_values(i), mpi_integer8, &
631 i, 0, this%comm, this%recv_buf(i)%request, ierr)
632 end if
633 end do
634
635 do i = 0, this%pe_size-1
636 if (n_send_values(i) .gt. 0) then
637 if (this%send_buf(i)%size .lt. n_send_values(i)) then
638 if (allocated(this%send_buf(i)%data)) deallocate(this%send_buf(i)%data)
639 allocate(this%send_buf(i)%data(n_send_values(i)))
640 this%send_buf(i)%size = n_send_values(i)
641 end if
642 ! Copy the data to the send buffer
643 sp => send_values(i)%array()
644 do j = 1, n_send_values(i)
645 this%send_buf(i)%data(j) = sp(j)
646 end do
647 call mpi_isend(this%send_buf(i)%data, n_send_values(i), mpi_integer8, &
648 i, 0, this%comm, this%send_buf(i)%request, ierr)
649 end if
650 end do
651
652 do i = 0, this%pe_size-1
653 call recv_values(i)%clear()
654 end do
655
656 do i = 0, this%pe_size-1
657 if (n_recv_values(i) .gt. 0) then
658 call mpi_wait(this%recv_buf(i)%request, this%recv_buf(i)%status, ierr)
659 do j = 1, n_recv_values(i)
660 idx = this%recv_buf(i)%data(j)
661 call recv_values(i)%push(idx)
662 end do
663 end if
664 end do
665 do i = 0, this%pe_size-1
666 if (n_send_values(i) .gt. 0) then
667 call mpi_wait(this%send_buf(i)%request, this%send_buf(i)%status, ierr)
668 end if
669 end do
670
671
672 end subroutine send_recv_data
673end module cartesian_pe_finder
double real
Implements cartesian_pe_finder given a dofmap.
subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
integer, parameter, public glob_map_size
Minimum number of total boxes in the aabb tree.
subroutine cartesian_pe_finder_init(this, x, y, z, nelv, xh, comm, n_boxes, padding)
Initialize the global interpolation object on a set of coordinates.
subroutine send_recv_data(this, recv_values, n_recv_values, send_values, n_send_values)
subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
integer function get_pe_idx(this, global_idx)
subroutine cartesian_pe_finder_free(this)
Destructor.
integer(kind=i8) function, dimension(3) get_global_idx(this, x, y, z)
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
integer, public pe_size
MPI size of communicator.
Definition comm.F90:58
integer, public pe_rank
MPI rank.
Definition comm.F90:55
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements pe_finder given a dofmap.
Definition pe_finder.f90:35
Implements a point.
Definition point.f90:35
Defines a function space.
Definition space.f90:34
Implements a dynamic stack ADT.
Definition stack.f90:35
Implements a n-tuple.
Definition tuple.f90:34
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Implements global interpolation for arbitrary points in the domain.
Integer*8 based hash table.
Definition htable.f90:92
Implements global interpolation for arbitrary points in the domain.
Definition pe_finder.f90:45
A point in with coordinates .
Definition point.f90:43
The function space for the SEM solution fields.
Definition space.f90:63
Integer based stack.
Definition stack.f90:63
Integer*8 based stack.
Definition stack.f90:70
Integer based 2-tuple.
Definition tuple.f90:51
#define max(a, b)
Definition tensor.cu:40