Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cartesian_pe_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
37 use num_types, only: rp, dp, xp, i8
39 use space, only: space_t
40 use pe_finder, only: pe_finder_t
41 use stack, only: stack_i4_t, stack_i8_t
43 use htable, only: htable_i8_t
44 use point, only: point_t
46 use mpi_f08, only: mpi_max, mpi_allreduce, mpi_comm, mpi_comm_rank, &
47 mpi_comm_size, mpi_wtime, mpi_integer, mpi_integer8, &
48 mpi_min, mpi_sum, mpi_irecv, mpi_isend, &
49 mpi_exscan, mpi_request, mpi_status, &
50 mpi_alltoall, mpi_in_place, mpi_barrier
51 implicit none
52 private
53
55 integer, public, parameter :: glob_map_size = 4096
56
57 type, private :: i8_mpi_t
59 type(mpi_status) :: status
61 type(mpi_request) :: request
64 logical :: flag
66 real(kind=dp), allocatable :: data(:)
67 integer :: size = 0
68 contains
69 procedure, pass(this) :: free => i8_mpi_free
70 end type i8_mpi_t
71
72
74 type, public, extends(pe_finder_t) :: cartesian_pe_finder_t
76 real(kind=dp) :: padding
78 integer(kind=i8) :: glob_n_boxes
79 integer(kind=i8) :: n_boxes
80 !Number of local boxes in x direction
81 integer :: n_boxes_per_pe
82 integer :: nelv
83 integer :: offset_el
84 real(kind=rp) :: max_x_global, max_y_global, max_z_global
85 real(kind=rp) :: min_x_global, min_y_global, min_z_global
86
87 ! Resolution of the boxes
88 real(kind=xp) :: res_x, res_y, res_z
89
90 type(i8_mpi_t), allocatable :: recv_buf(:)
91 type(i8_mpi_t), allocatable :: send_buf(:)
92 type(stack_i4_t), allocatable :: pe_map(:)
93
94 contains
95 procedure, pass(this) :: init => cartesian_pe_finder_init
96 procedure, pass(this) :: free => cartesian_pe_finder_free
97 procedure, pass(this) :: find => cartesian_pe_finder_find
98 procedure, pass(this) :: find_batch => cartesian_pe_finder_find_batch
99
100 end type cartesian_pe_finder_t
101
102contains
103
105 subroutine i8_mpi_free(this)
106 class(i8_mpi_t), intent(inout) :: this
107
108 if (allocated(this%data)) then
109 deallocate(this%data)
110 end if
111
112 end subroutine i8_mpi_free
113
123 subroutine cartesian_pe_finder_init(this, x, y, z, nelv, Xh, comm, n_boxes, padding)
124 class(cartesian_pe_finder_t), intent(inout) :: this
125 real(kind=rp), intent(in), target :: x(:)
126 real(kind=rp), intent(in), target :: y(:)
127 real(kind=rp), intent(in), target :: z(:)
128 integer, intent(in) :: nelv
129 type(mpi_comm), intent(in), optional :: comm
130 type(space_t), intent(in), target :: Xh
131 integer, intent(in) :: n_boxes
132 real(kind=dp), intent(in) :: padding
133 integer :: ierr
134 integer :: i, j, k, e, i2, j2, k2
135 integer :: pe_id, lin_idx
136 integer :: lxyz, lx2
137 integer(kind=i8) :: glob_id, loc_id
138 integer(kind=i8) :: min_id(3), max_id(3)
139 real(kind=rp) :: center_x, center_y, center_z
140 type(stack_i8_t), allocatable :: glob_ids(:), recv_ids(:)
141 integer(i8), pointer :: glb_ids(:)
142 integer(kind=i8) :: htable_data! We just use it as a set
143 integer, allocatable :: n_recv(:), n_send(:)
144 type(htable_i8_t) :: marked_box
145 real(kind=rp) :: min_bb_x, max_bb_x
146 real(kind=rp) :: min_bb_y, max_bb_y
147 real(kind=rp) :: min_bb_z, max_bb_z
148 real(kind=rp) :: el_x(xh%lxyz), el_y(xh%lxyz), el_z(xh%lxyz)
149
150 call this%free()
151 this%comm = comm
152 this%padding = padding
153
154 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
155 call mpi_comm_size(this%comm, this%pe_size, ierr)
156
157 this%glob_n_boxes = int(n_boxes, i8)**3
158 this%n_boxes = n_boxes
159 this%nelv = nelv
160 this%n_boxes_per_pe = (this%glob_n_boxes + int(this%pe_size - 1, i8))/ &
161 int(this%pe_size, i8)
162
163 allocate(this%send_buf(0:this%pe_size - 1))
164 allocate(this%recv_buf(0:this%pe_size - 1))
165 allocate(this%pe_map(0:this%n_boxes_per_pe - 1))
166 do i = 0, this%n_boxes_per_pe - 1
167 call this%pe_map(i)%init()
168 end do
169 call mpi_exscan(this%nelv, this%offset_el, 1, &
170 mpi_integer, mpi_sum, this%comm, ierr)
171 if (this%nelv .gt. 0) then
172 this%max_x_global = maxval(x(1:nelv*xh%lxyz))
173 this%max_y_global = maxval(y(1:nelv*xh%lxyz))
174 this%max_z_global = maxval(z(1:nelv*xh%lxyz))
175 this%min_x_global = minval(x(1:nelv*xh%lxyz))
176 this%min_y_global = minval(y(1:nelv*xh%lxyz))
177 this%min_z_global = minval(z(1:nelv*xh%lxyz))
178 else
179 this%max_x_global = -1e20
180 this%max_y_global = -1e20
181 this%max_z_global = -1e20
182 this%min_x_global = 1e20
183 this%min_y_global = 1e20
184 this%min_z_global = 1e20
185 end if
186
187
188 call mpi_allreduce(mpi_in_place, this%max_x_global, 1, mpi_real_precision, &
189 mpi_max, this%comm, ierr)
190 call mpi_allreduce(mpi_in_place, this%max_y_global, 1, mpi_real_precision, &
191 mpi_max, this%comm, ierr)
192 call mpi_allreduce(mpi_in_place, this%max_z_global, 1, mpi_real_precision, &
193 mpi_max, this%comm, ierr)
194 call mpi_allreduce(mpi_in_place, this%min_x_global, 1, mpi_real_precision, &
195 mpi_min, this%comm, ierr)
196 call mpi_allreduce(mpi_in_place, this%min_y_global, 1, mpi_real_precision, &
197 mpi_min, this%comm, ierr)
198 call mpi_allreduce(mpi_in_place, this%min_z_global, 1, mpi_real_precision, &
199 mpi_min, this%comm, ierr)
200
201 center_x = (this%max_x_global + this%min_x_global) / 2.0_dp
202 center_y = (this%max_y_global + this%min_y_global) / 2.0_dp
203 center_z = (this%max_z_global + this%min_z_global) / 2.0_dp
204
205 this%max_x_global = this%max_x_global - center_x
206 this%max_y_global = this%max_y_global - center_y
207 this%max_z_global = this%max_z_global - center_z
208 this%min_x_global = this%min_x_global - center_x
209 this%min_y_global = this%min_y_global - center_y
210 this%min_z_global = this%min_z_global - center_z
211 this%max_x_global = this%max_x_global*(1.0_xp+this%padding) + center_x
212 this%max_y_global = this%max_y_global*(1.0_xp+this%padding) + center_y
213 this%max_z_global = this%max_z_global*(1.0_xp+this%padding) + center_z
214 this%min_x_global = this%min_x_global*(1.0_xp+this%padding) + center_x
215 this%min_y_global = this%min_y_global*(1.0_xp+this%padding) + center_y
216 this%min_z_global = this%min_z_global*(1.0_xp+this%padding) + center_z
217
218
219 this%res_x = (this%max_x_global - this%min_x_global) / real(this%n_boxes, xp)
220 this%res_y = (this%max_y_global - this%min_y_global) / real(this%n_boxes, xp)
221 this%res_z = (this%max_z_global - this%min_z_global) / real(this%n_boxes, xp)
222 if (allocated(recv_ids)) then
223 do i = 0, this%pe_size-1
224 call recv_ids(i)%free()
225 end do
226 deallocate(recv_ids)
227 end if
228 if (allocated(glob_ids)) then
229 do i = 0, this%pe_size-1
230 call glob_ids(i)%free()
231 end do
232 deallocate(glob_ids)
233 end if
234 if (allocated(n_recv)) deallocate(n_recv)
235 if (allocated(n_send)) deallocate(n_send)
236 allocate(n_recv(0:this%pe_size-1))
237 allocate(n_send(0:this%pe_size-1))
238 allocate(recv_ids(0:this%pe_size-1))
239 allocate(glob_ids(0:this%pe_size-1))
240 do i = 0, this%pe_size-1
241 call recv_ids(i)%init()
242 call glob_ids(i)%init()
243 if (allocated(this%recv_buf(i)%data)) then
244 deallocate(this%recv_buf(i)%data)
245 end if
246 if (allocated(this%send_buf(i)%data)) then
247 deallocate(this%send_buf(i)%data)
248 end if
249 this%send_buf(i)%size = 0
250 this%recv_buf(i)%size = 0
251 end do
252 n_recv = 0
253 n_send = 0
254
255 lxyz = xh%lxyz
256 call marked_box%init(this%nelv*lxyz,htable_data)
257
258 do e = 1, this%nelv
259 !move it to do scaling
260 lx2 = xh%lx/2
261 if (mod(xh%lx,2) .eq. 0) then
262 lin_idx = linear_index(lx2, lx2, lx2, e, xh%lx, xh%lx, xh%lx)
263 center_x = x(lin_idx)
264 center_y = y(lin_idx)
265 center_z = z(lin_idx)
266 else
267 center_x = 0d0
268 center_y = 0d0
269 center_z = 0d0
270 do i = lx2, lx2+1
271 do j = lx2, lx2 + 1
272 do k = lx2, lx2 + 1
273 lin_idx = linear_index(i, j, k, e, xh%lx, xh%lx, xh%lx)
274 center_x = center_x + x(lin_idx)
275 center_y = center_y + y(lin_idx)
276 center_z = center_z + z(lin_idx)
277 end do
278 end do
279 end do
280 center_x = center_x / 8.0_xp
281 center_y = center_y / 8.0_xp
282 center_z = center_z / 8.0_xp
283 end if
284
285 !Maybe this is a stupid way to do it
286
287 el_x = x((e-1)*lxyz+1:e*lxyz) - center_x
288 el_y = y((e-1)*lxyz+1:e*lxyz) - center_y
289 el_z = z((e-1)*lxyz+1:e*lxyz) - center_z
290 el_x = el_x * (1.0_rp+padding) + center_x
291 el_y = el_y * (1.0_rp+padding) + center_y
292 el_z = el_z * (1.0_rp+padding) + center_z
293 !Padded and ready
294
295 !Now we go the bounding boxes of all subboxes in the element
296 do i = 1, xh%lx - 1
297 do j = 1, xh%ly - 1
298 do k = 1, xh%lz - 1
299 lin_idx = linear_index(i, j, k, 1, xh%lx, xh%lx, xh%lx)
300 max_bb_x = el_x(lin_idx)
301 min_bb_x = el_x(lin_idx)
302 max_bb_y = el_y(lin_idx)
303 min_bb_y = el_y(lin_idx)
304 max_bb_z = el_z(lin_idx)
305 min_bb_z = el_z(lin_idx)
306 do i2 = 0, 1
307 do j2 = 0, 1
308 do k2 = 0, 1
309 lin_idx = linear_index(i+i2,j+j2,k+k2, 1, xh%lx, xh%lx, xh%lx)
310 max_bb_x = max(max_bb_x, el_x(lin_idx))
311 min_bb_x = min(min_bb_x, el_x(lin_idx))
312 max_bb_y = max(max_bb_y, el_y(lin_idx))
313 min_bb_y = min(min_bb_y, el_y(lin_idx))
314 max_bb_z = max(max_bb_z, el_z(lin_idx))
315 min_bb_z = min(min_bb_z, el_z(lin_idx))
316 end do
317 end do
318 end do
319
320
321 min_id = get_global_idx(this, min_bb_x, min_bb_y, min_bb_z)
322 max_id = get_global_idx(this, max_bb_x, max_bb_y, max_bb_z)
323 do i2 = min_id(1), max_id(1)
324 do j2 = min_id(2), max_id(2)
325 do k2 = min_id(3), max_id(3)
326 if (i2 .ge. 0 .and. i2 .lt. this%n_boxes .and. &
327 j2 .ge. 0 .and. j2 .lt. this%n_boxes .and. &
328 k2 .ge. 0 .and. k2 .lt. this%n_boxes) then
329 glob_id = i2 + j2*this%n_boxes + k2*this%n_boxes**2
330 pe_id = get_pe_idx(this, glob_id)
331 if (marked_box%get(glob_id,htable_data) .ne. 0) then
332 call glob_ids(pe_id)%push(glob_id)
333 n_send(pe_id) = n_send(pe_id) + 1
334 call marked_box%set(glob_id, htable_data)
335 end if
336 end if
337 end do
338 end do
339 end do
340 end do
341 end do
342 end do
343 end do
344 !temp = 0
345 !do i = 0, this%pe_size-1
346 ! temp = temp + glob_ids(i)%size()
347 !end do
348 !print *, 'size of globids', temp, this%pe_rank
349 call send_recv_data(this, recv_ids, n_recv, glob_ids, n_send)
350
351 !temp = 0
352 !do i = 0, this%pe_size-1
353 ! temp = temp + recv_ids(i)%size()
354 !end do
355 !print *, 'size of globids', temp, this%pe_rank
356 !Buffers are likely way too large for other calls
357 do i = 0, this%pe_size-1
358 if (allocated(this%recv_buf(i)%data)) then
359 deallocate(this%recv_buf(i)%data)
360 end if
361 if (allocated(this%send_buf(i)%data)) then
362 deallocate(this%send_buf(i)%data)
363 end if
364 this%send_buf(i)%size = 0
365 this%recv_buf(i)%size = 0
366 end do
367
368
369 call mpi_barrier(this%comm, ierr)
370 do i = 0, this%pe_size-1
371 if (n_recv(i) .gt. 0) then
372 glb_ids => recv_ids(i)%array()
373 do j = 1, n_recv(i)
374 glob_id = glb_ids(j)
375 loc_id = glob_id - int(this%pe_rank,i8) * &
376 int(this%n_boxes_per_pe,i8)
377 if (loc_id .ge. this%n_boxes_per_pe .or. loc_id < 0) then
378 call neko_warning('loc_id out of bounds')
379 end if
380 call this%pe_map(int(loc_id))%push(i)
381 end do
382 end if
383 end do
384
385 if (allocated(recv_ids)) then
386 do i = 0, this%pe_size-1
387 call recv_ids(i)%free()
388 end do
389 deallocate(recv_ids)
390 end if
391 if (allocated(glob_ids)) then
392 do i = 0, this%pe_size-1
393 call glob_ids(i)%free()
394 end do
395 deallocate(glob_ids)
396 end if
397 if (allocated(n_recv)) deallocate(n_recv)
398 if (allocated(n_send)) deallocate(n_send)
399 end subroutine cartesian_pe_finder_init
400
401 subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
402 class(cartesian_pe_finder_t), intent(inout) :: this
403 type(point_t), intent(in) :: my_point
404 type(stack_i4_t), intent(inout) :: pe_candidates
405
406 call neko_error('cartesian_pe_finder_find not implemented')
407 end subroutine cartesian_pe_finder_find
408
409 function get_global_idx(this, x, y, z) result(global_box_id)
410 class(cartesian_pe_finder_t), intent(in) :: this
411 real(kind=rp), intent(in) :: x, y, z
412 integer(kind=i8) :: global_box_id(3)
413
414 global_box_id(1) = int((x - this%min_x_global) / this%res_x, i8)
415 global_box_id(2) = int((y - this%min_y_global) / this%res_y, i8)
416 global_box_id(3) = int((z - this%min_z_global) / this%res_z, i8)
417 end function get_global_idx
418
419 function get_pe_idx(this, global_idx) result(pe_id)
420 class(cartesian_pe_finder_t), intent(in) :: this
421 integer(kind=i8), intent(in) :: global_idx
422 integer :: pe_id
423 !Get x id and then divide by the number of x boxes per rank to get the correct pe id
424 pe_id = global_idx / int(this%n_boxes_per_pe, i8)
425 end function get_pe_idx
426
427
430 class(cartesian_pe_finder_t), intent(inout) :: this
431 integer :: i
432
433 if (allocated(this%send_buf)) then
434 do i = 0, this%pe_size-1
435 call this%send_buf(i)%free()
436 end do
437 deallocate(this%send_buf)
438 end if
439 if (allocated(this%recv_buf)) then
440 do i = 0, this%pe_size-1
441 call this%recv_buf(i)%free()
442 end do
443 deallocate(this%recv_buf)
444 end if
445 if (allocated(this%pe_map)) then
446 do i = 0, this%n_boxes_per_pe-1
447 call this%pe_map(i)%free()
448 end do
449 deallocate(this%pe_map)
450 end if
451
452 end subroutine cartesian_pe_finder_free
453
454 subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
455 class(cartesian_pe_finder_t), intent(inout) :: this
456 integer, intent(in) :: n_points
457 real(kind=rp), intent(in) :: points(3,n_points)
458 type(stack_i4_t), intent(inout) :: points_at_pe(0:(this%pe_size-1))
459 integer, intent(inout) :: n_points_pe(0:(this%pe_size-1))
460 integer :: i, j, k
461 integer(kind=i8) :: glob_id(3)
462 integer(kind=i8) :: pe_id
463 integer(kind=i8) :: lin_glob_id
464 integer(kind=i8) :: loc_id
465 type(stack_i8_t), allocatable :: work_pe_ids(:), work_pt_ids(:)
466 type(stack_i8_t), allocatable :: temp_pe_ids(:), temp_pt_ids(:)
467 integer, allocatable :: n_work_ids(:), n_temp_ids(:)
468 integer(i8), pointer :: ids(:)
469 integer(i8), pointer :: pt_id(:)
470 integer, pointer :: pe_cands(:)
471 integer(i8), pointer :: pe_cands8(:)
472 integer(i8), pointer :: pt_ids(:)
473 integer :: ierr
474
475 allocate(work_pe_ids(0:this%pe_size-1))
476 allocate(work_pt_ids(0:this%pe_size-1))
477 allocate(temp_pe_ids(0:this%pe_size-1))
478 allocate(temp_pt_ids(0:this%pe_size-1))
479 allocate(n_temp_ids(0:this%pe_size-1))
480 allocate(n_work_ids(0:this%pe_size-1))
481
482 do i = 0, this%pe_size-1
483 n_work_ids(i) = 0
484 n_temp_ids(i) = 0
485 call work_pt_ids(i)%init()
486 call work_pe_ids(i)%init()
487 call temp_pe_ids(i)%init()
488 call temp_pt_ids(i)%init()
489 end do
490
491 do i = 0, this%pe_size-1
492 call points_at_pe(i)%clear()
493 n_points_pe(i) = 0
494 end do
495
496 ! Compute global ids for the points
497 ! and the pe id for each point
498 n_work_ids = 0
499 do i = 1, n_points
500 glob_id = get_global_idx(this, points(1,i), points(2,i), points(3,i))
501 lin_glob_id = glob_id(1) + &
502 glob_id(2)*this%n_boxes + &
503 glob_id(3)*this%n_boxes**2
504 pe_id = get_pe_idx(this, lin_glob_id)
505 if (glob_id(1) .ge. 0 .and. glob_id(1) .lt. this%n_boxes .and. &
506 glob_id(2) .ge. 0 .and. glob_id(2) .lt. this%n_boxes .and. &
507 glob_id(3) .ge. 0 .and. glob_id(3) .lt. this%n_boxes) then
508 call work_pe_ids(pe_id)%push(lin_glob_id)
509 call work_pt_ids(pe_id)%push(int(i,i8))
510 n_work_ids(pe_id) = n_work_ids(pe_id) + 1
511 else
512 print *, 'Point found outside domain:', points(1,i), &
513 points(2,i), points(3,i), &
514 'Computed global id:', lin_glob_id, &
515 'Global box id (x,y,z):', glob_id(1), glob_id(2), glob_id(3)
516 end if
517 end do
518
519
520 ! Send the global ids to the correct pe
521 ! and get the global ids from the other pes with points I own
522 ! Also send point ids to the other pes
523 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
524 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
525 call mpi_barrier(this%comm, ierr)
526 ! Get the local ids for the points I own
527 ! and the pe candidates for the points I own
528 n_work_ids = 0
529 do i = 0, this%pe_size-1
530 call work_pe_ids(i)%clear()
531 call work_pt_ids(i)%clear()
532 end do
533
534 do i =0 , this%pe_size-1
535
536 if (n_temp_ids(i) .gt. 0) then
537 ids => temp_pe_ids(i)%array()
538 pt_ids => temp_pt_ids(i)%array()
539 do j = 1, n_temp_ids(i)
540 loc_id = ids(j) - int(this%pe_rank,i8) * &
541 int(this%n_boxes_per_pe,i8)
542 pe_cands => this%pe_map(int(loc_id))%array()
543 do k = 1, this%pe_map(int(loc_id))%size()
544 call work_pe_ids(i)%push(int(pe_cands(k),i8))
545 call work_pt_ids(i)%push(pt_ids(j))
546 n_work_ids(i) = n_work_ids(i) + 1
547 end do
548 if (this%pe_map(int(loc_id))%size() .lt. 1) then
549 print *, 'No PE candidates found for point:', &
550 points(1,pt_ids(j)), &
551 points(2,pt_ids(j)), points(3,pt_ids(j))
552 end if
553 end do
554 end if
555 end do
556 ! pe candidates found for the points I am responsible for
557 ! Now i need to send the data to the other pes
558 ! and get the candidates from the other pes for my own points
559 call send_recv_data(this, temp_pe_ids, n_temp_ids, work_pe_ids, n_work_ids)
560 call send_recv_data(this, temp_pt_ids, n_temp_ids, work_pt_ids, n_work_ids)
561 ! Gotten candidates for my points
562 ! Now I organize the candidates for the points I own
563 n_points_pe = 0
564 do i = 0, this%pe_size-1
565 if (n_temp_ids(i) .gt. 0) then
566 pe_cands8 => temp_pe_ids(i)%array()
567 pt_ids => temp_pt_ids(i)%array()
568 do j = 1, n_temp_ids(i)
569 pe_id = pe_cands8(j)
570 call points_at_pe(pe_id)%push(int(pt_ids(j)))
571 n_points_pe(pe_id) = n_points_pe(pe_id) + 1
572 end do
573 end if
574 end do
575
576 if (allocated(work_pe_ids)) then
577 do i = 0, this%pe_size-1
578 call work_pe_ids(i)%free()
579 end do
580 deallocate(work_pe_ids)
581 end if
582 if (allocated(temp_pe_ids)) then
583 do i = 0, this%pe_size-1
584 call temp_pe_ids(i)%free()
585 end do
586 deallocate(temp_pe_ids)
587 end if
588 if (allocated(temp_pt_ids)) then
589 do i = 0, this%pe_size-1
590 call temp_pt_ids(i)%free()
591 end do
592 deallocate(temp_pt_ids)
593 end if
594 if (allocated(work_pt_ids)) then
595 do i = 0, this%pe_size-1
596 call work_pt_ids(i)%free()
597 end do
598 deallocate(work_pt_ids)
599 end if
600
601 if (allocated(n_work_ids)) then
602 deallocate(n_work_ids)
603 end if
604
605 if (allocated(n_temp_ids)) then
606 deallocate(n_temp_ids)
607 end if
608 end subroutine cartesian_pe_finder_find_batch
609
610
611 subroutine send_recv_data(this, recv_values, n_recv_values, &
612 send_values, n_send_values)
613 class(cartesian_pe_finder_t), intent(inout) :: this
614 type(stack_i8_t), intent(inout) :: recv_values(0:this%pe_size-1)
615 type(stack_i8_t), intent(inout) :: send_values(0:this%pe_size-1)
616 integer, intent(inout) :: n_recv_values(0:this%pe_size-1)
617 integer, intent(inout) :: n_send_values(0:this%pe_size-1)
618 integer :: i, j, ierr
619 integer(i8) :: idx
620 integer(i8) , pointer :: sp(:)
621
622 call mpi_alltoall(n_send_values, 1, mpi_integer, &
623 n_recv_values, 1, mpi_integer, this%comm, ierr)
624
625 do i = 0, this%pe_size-1
626 if (n_recv_values(i) .gt. 0) then
627 if (this%recv_buf(i)%size .lt. n_recv_values(i)) then
628 if (allocated(this%recv_buf(i)%data)) deallocate(this%recv_buf(i)%data)
629 allocate(this%recv_buf(i)%data(n_recv_values(i)))
630 this%recv_buf(i)%size = n_recv_values(i)
631 end if
632 call mpi_irecv(this%recv_buf(i)%data, n_recv_values(i), mpi_integer8, &
633 i, 0, this%comm, this%recv_buf(i)%request, ierr)
634 end if
635 end do
636
637 do i = 0, this%pe_size-1
638 if (n_send_values(i) .gt. 0) then
639 if (this%send_buf(i)%size .lt. n_send_values(i)) then
640 if (allocated(this%send_buf(i)%data)) deallocate(this%send_buf(i)%data)
641 allocate(this%send_buf(i)%data(n_send_values(i)))
642 this%send_buf(i)%size = n_send_values(i)
643 end if
644 ! Copy the data to the send buffer
645 sp => send_values(i)%array()
646 do j = 1, n_send_values(i)
647 this%send_buf(i)%data(j) = sp(j)
648 end do
649 call mpi_isend(this%send_buf(i)%data, n_send_values(i), mpi_integer8, &
650 i, 0, this%comm, this%send_buf(i)%request, ierr)
651 end if
652 end do
653
654 do i = 0, this%pe_size-1
655 call recv_values(i)%clear()
656 end do
657
658 do i = 0, this%pe_size-1
659 if (n_recv_values(i) .gt. 0) then
660 call mpi_wait(this%recv_buf(i)%request, this%recv_buf(i)%status, ierr)
661 do j = 1, n_recv_values(i)
662 idx = this%recv_buf(i)%data(j)
663 call recv_values(i)%push(idx)
664 end do
665 end if
666 end do
667 do i = 0, this%pe_size-1
668 if (n_send_values(i) .gt. 0) then
669 call mpi_wait(this%send_buf(i)%request, this%send_buf(i)%status, ierr)
670 end if
671 end do
672
673
674 end subroutine send_recv_data
675end module cartesian_pe_finder
double real
Implements cartesian_pe_finder given a dofmap.
subroutine cartesian_pe_finder_find(this, my_point, pe_candidates)
integer, parameter, public glob_map_size
Minimum number of total boxes in the aabb tree.
subroutine cartesian_pe_finder_init(this, x, y, z, nelv, xh, comm, n_boxes, padding)
Initialize the global interpolation object on a set of coordinates.
subroutine i8_mpi_free(this)
Destructor for i8_mpi_t.
subroutine send_recv_data(this, recv_values, n_recv_values, send_values, n_send_values)
subroutine cartesian_pe_finder_find_batch(this, points, n_points, points_at_pe, n_points_pe)
integer function get_pe_idx(this, global_idx)
subroutine cartesian_pe_finder_free(this)
Destructor.
integer(kind=i8) function, dimension(3) get_global_idx(this, x, y, z)
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_size
MPI size of communicator.
Definition comm.F90:59
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements pe_finder given a dofmap.
Definition pe_finder.f90:35
Implements a point.
Definition point.f90:35
Defines a function space.
Definition space.f90:34
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Implements global interpolation for arbitrary points in the domain.
Integer*8 based hash table.
Definition htable.f90:96
Implements global interpolation for arbitrary points in the domain.
Definition pe_finder.f90:44
A point in with coordinates .
Definition point.f90:43
The function space for the SEM solution fields.
Definition space.f90:63
Integer based stack.
Definition stack.f90:63
Integer*8 based stack.
Definition stack.f90:70
#define max(a, b)
Definition tensor.cu:40