222 real(kind=
rp),
intent(in) :: x(:)
223 real(kind=
rp),
intent(in) :: y(:)
224 real(kind=
rp),
intent(in) :: z(:)
225 integer,
intent(in) :: gdim
226 integer,
intent(in) :: nelv
227 type(mpi_comm),
intent(in),
optional :: comm
228 type(
space_t),
intent(in) :: Xh
229 real(kind=
rp),
intent(in),
optional :: tol
230 real(kind=
rp),
intent(in),
optional :: pad
231 integer :: lx, ly, lz, ierr, i, n
232 character(len=8000) :: log_buf
233 real(kind=
dp) :: padding
234 real(kind=
rp) :: time1, time_start
235 character(len=255) :: mode_str
236 integer :: boxdim, envvar_len
240 if (
present(
comm))
then
246 if (
present(pad))
then
254 time_start = mpi_wtime()
255 call mpi_barrier(this%comm)
257 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
258 call mpi_comm_size(this%comm, this%pe_size, ierr)
262 if (
present(tol))
then
268 call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
269 mpi_sum, this%comm, ierr)
277 call copy(this%x%x, x, n)
279 call copy(this%y%x, y, n)
281 call copy(this%z%x, z, n)
283 call this%Xh%init(xh%t, lx, ly, lz)
286 call neko_log%section(
'Global Interpolation')
288 call neko_log%message(
'Initializing global interpolation')
289 call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
290 mode_str, envvar_len)
292 if (envvar_len .gt. 0)
then
293 if (mode_str(1:envvar_len) ==
'AABB')
then
297 call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
298 mode_str, envvar_len)
300 if (envvar_len .gt. 0)
then
301 if (mode_str(1:envvar_len) ==
'AABB')
then
306 if (.not.
allocated(this%el_finder))
then
309 if (.not.
allocated(this%pe_finder))
then
312 select type(el_find => this%el_finder)
314 call neko_log%message(
'Using AABB element finder')
315 call el_find%init(x, y, z, nelv, xh, padding)
317 call neko_log%message(
'Using Cartesian element finder')
318 boxdim =
max(lx*int(
real(nelv,
xp)**(1.0_xp/3.0_xp)),2)
319 boxdim = min(boxdim, 300)
320 call el_find%init(x, y, z, nelv, xh, boxdim, padding)
322 call neko_error(
'Unknown element finder type')
325 select type(pe_find => this%pe_finder)
327 call neko_log%message(
'Using AABB PE finder')
328 call pe_find%init(this%x%x, this%y%x, this%z%x, &
329 nelv, xh, this%comm, padding)
331 call neko_log%message(
'Using Cartesian PE finder')
332 boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp/3.0_xp))
333 boxdim =
max(boxdim,32)
334 boxdim = min(boxdim, &
335 int(8.0_xp*(30000.0_xp*this%pe_size)**(1.0_xp/3.0_xp)))
336 call pe_find%init(this%x%x, this%y%x, this%z%x, &
337 nelv, xh, this%comm, boxdim, padding)
342 call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, this%tol)
343 if (
allocated(this%n_points_pe))
deallocate(this%n_points_pe)
344 if (
allocated(this%n_points_pe_local))
deallocate(this%n_points_pe_local)
345 if (
allocated(this%n_points_offset_pe_local)) &
346 deallocate(this%n_points_offset_pe_local)
347 if (
allocated(this%n_points_offset_pe))
deallocate(this%n_points_offset_pe)
348 allocate(this%n_points_pe(0:(this%pe_size-1)))
349 allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
350 allocate(this%n_points_pe_local(0:(this%pe_size-1)))
351 allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
352 allocate(this%points_at_pe(0:(this%pe_size-1)))
353 do i = 0, this%pe_size-1
354 call this%points_at_pe(i)%init()
356 call mpi_barrier(this%comm)
358 write(log_buf,
'(A,E15.7)') &
359 'Global interpolation initialized (s):', time1-time_start
451 character(len=8000) :: log_buf
459 type(c_ptr) :: el_cands_d
461 integer :: i, j, stupid_intent
463 integer,
allocatable :: n_el_cands(:)
464 integer,
contiguous,
pointer :: el_cands(:), point_ids(:), send_recv(:)
465 real(kind=
rp),
allocatable :: res_results(:,:)
466 real(kind=
rp),
allocatable :: rst_results(:,:)
467 integer,
allocatable :: el_owner_results(:)
468 integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
469 real(kind=
rp) :: time1, time2, time_start
473 type(
stack_i4_t) :: send_pe_find, recv_pe_find
475 el_cands_d = c_null_ptr
476 call neko_log%section(
'Global Interpolation')
477 call glb_intrp_find%init_dofs(this%pe_size)
478 call send_pe_find%init()
479 call recv_pe_find%init()
480 call mpi_barrier(this%comm)
481 time_start = mpi_wtime()
482 write(log_buf,
'(A)')
'Global interpolation, finding points'
488 call this%pe_finder%find_batch(this%xyz, this%n_points, &
489 this%points_at_pe, this%n_points_pe)
490 call mpi_barrier(this%comm)
492 write(log_buf,
'(A,E15.7)') &
493 'Found PE candidates time since start of findpts (s):', time1-time_start
499 this%n_points_pe_local = 0
500 this%n_points_local = 0
501 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
502 1, mpi_integer, mpi_sum, this%comm, ierr)
503 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
504 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
507 this%n_points_offset_pe_local(0) = 0
508 this%n_points_offset_pe(0) = 0
509 do i = 1, (this%pe_size - 1)
510 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
511 + this%n_points_offset_pe_local(i-1)
512 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
513 + this%n_points_offset_pe(i-1)
515 do i = 0, (this%pe_size-1)
516 if (this%n_points_pe(i) .gt. 0)
then
517 call send_pe_find%push(i)
518 point_ids => this%points_at_pe(i)%array()
519 do j = 1, this%n_points_pe(i)
520 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
521 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
522 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
525 if (this%n_points_pe_local(i) .gt. 0)
then
526 call recv_pe_find%push(i)
527 do j = 1, this%n_points_pe_local(i)
528 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
529 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
530 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
537 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
539 call glb_intrp_find_back%init_dofs(this%pe_size)
541 do i = 0, (this%pe_size-1)
542 send_recv => glb_intrp_find%recv_dof(i)%array()
543 do j = 1, glb_intrp_find%recv_dof(i)%size()
544 call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
546 send_recv => glb_intrp_find%send_dof(i)%array()
547 do j = 1, glb_intrp_find%send_dof(i)%size()
549 call glb_intrp_find_back%recv_dof(i)%push(ii)
553 call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
556 if (
allocated(this%xyz_local))
then
557 deallocate(this%xyz_local)
559 allocate(this%xyz_local(3, this%n_points_local))
560 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
561 this%n_points_local*3)
563 call mpi_barrier(this%comm)
565 write(log_buf,
'(A,E15.7)') &
566 'Sent to points to PE candidates, time since start of find_points (s):', time1-time_start
569 call all_el_candidates%init()
571 if (
allocated(n_el_cands))
then
572 deallocate(n_el_cands)
575 allocate(n_el_cands(this%n_points_local))
577 call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
578 all_el_candidates, n_el_cands)
580 n_point_cand = all_el_candidates%size()
581 if (n_point_cand .gt. 1e8)
then
582 print *,
'Warning, many point candidates on rank', this%pe_rank,
'cands:', n_point_cand, &
583 'Consider increasing number of ranks'
585 call x_t%init(n_point_cand)
586 call y_t%init(n_point_cand)
587 call z_t%init(n_point_cand)
590 do i = 1 , this%n_points_local
591 do j = 1, n_el_cands(i)
593 x_t%x(ii) = this%xyz_local(1,i)
594 y_t%x(ii) = this%xyz_local(2,i)
595 z_t%x(ii) = this%xyz_local(3,i)
599 call mpi_barrier(this%comm)
601 write(log_buf,
'(A,E15.7)') &
602 'Element candidates found, now time for finding rst,time since start of find_points (s):', time1-time_start
604 call rst_local_cand%init(3,n_point_cand)
605 call resx%init(n_point_cand)
606 call resy%init(n_point_cand)
607 call resz%init(n_point_cand)
610 call mpi_barrier(this%comm)
612 el_cands => all_el_candidates%array()
619 call device_map(el_cands, el_cands_d,n_point_cand)
624 call this%rst_finder%find(rst_local_cand, &
626 el_cands, n_point_cand, &
636 call mpi_barrier(this%comm)
639 write(log_buf,
'(A,E15.7)') &
640 'Found rst with Newton iteration, time (s):', time2-time1
643 write(log_buf,
'(A,E15.7)') &
644 'Tolerance: ', this%tol
646 write(log_buf,
'(A)') &
647 'Checking validity of points and choosing best candidates.'
649 call mpi_barrier(this%comm,ierr)
651 if (
allocated(this%rst_local))
deallocate(this%rst_local)
652 if (
allocated(this%el_owner0_local))
deallocate(this%el_owner0_local)
653 allocate(this%rst_local(3,this%n_points_local))
654 allocate(this%el_owner0_local(this%n_points_local))
657 do i = 1 , this%n_points_local
658 this%xyz_local(1,i) = 10.0
659 this%xyz_local(2,i) = 10.0
660 this%xyz_local(3,i) = 10.0
661 this%rst_local(1,i) = 10.0
662 this%rst_local(2,i) = 10.0
663 this%rst_local(3,i) = 10.0
664 this%el_owner0_local(i) = -1
665 do j = 1, n_el_cands(i)
667 if (
rst_cmp(this%rst_local(:,i), rst_local_cand%x(:,ii),&
668 this%xyz_local(:,i), (/resx%x(ii),resy%x(ii),resz%x(ii)/), this%padding))
then
669 this%rst_local(1,i) = rst_local_cand%x(1,ii)
670 this%rst_local(2,i) = rst_local_cand%x(2,ii)
672 this%rst_local(3,i) = rst_local_cand%x(3,ii)
673 this%xyz_local(1,i) = resx%x(ii)
674 this%xyz_local(2,i) = resy%x(ii)
675 this%xyz_local(3,i) = resz%x(ii)
676 this%el_owner0_local(i) = el_cands(ii)
682 call res%init(3,this%n_points)
683 n_glb_point_cand = sum(this%n_points_pe)
684 if (
allocated(rst_results))
deallocate(rst_results)
685 if (
allocated(res_results))
deallocate(res_results)
686 if (
allocated(el_owner_results))
deallocate(el_owner_results)
687 allocate(rst_results(3,n_glb_point_cand))
688 allocate(res_results(3,n_glb_point_cand))
689 allocate(el_owner_results(n_glb_point_cand))
694 call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
695 this%n_points_local*3, n_glb_point_cand*3)
696 call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
697 this%n_points_local*3, n_glb_point_cand*3)
698 do i = 1,
size(glb_intrp_find_back%send_pe)
699 rank = glb_intrp_find_back%send_pe(i)
700 call mpi_isend(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1),&
701 this%n_points_pe_local(rank), &
702 mpi_integer, rank, 0, &
703 this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
704 glb_intrp_find_back%send_buf(i)%flag = .false.
706 do i = 1,
size(glb_intrp_find_back%recv_pe)
707 rank = glb_intrp_find_back%recv_pe(i)
708 call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
709 this%n_points_pe(rank), &
710 mpi_integer, rank, 0, &
711 this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
712 glb_intrp_find_back%recv_buf(i)%flag = .false.
714 call glb_intrp_find_back%nbwait_no_op()
716 do i = 1,
size(glb_intrp_find_back%recv_pe)
717 point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
718 do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
719 point_id = point_ids(j)
721 if (
rst_cmp(this%rst(:,point_id), rst_results(:,ii), &
722 res%x(:,point_id), res_results(:,ii), this%padding) .or. &
723 this%pe_owner(point_ids(j)) .eq. -1 )
then
724 this%rst(:,point_ids(j)) = rst_results(:,ii)
725 res%x(:,point_ids(j)) = res_results(:,ii)
726 this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
727 this%el_owner0(point_ids(j)) = el_owner_results(ii)
737 do i = 0, this%pe_size-1
738 call this%points_at_pe(i)%clear()
739 this%n_points_pe(i) = 0
742 do i = 1, this%n_points
744 if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1)
then
745 print *,
'No owning rank found for',&
746 ' point ', stupid_intent,
' with coords', this%xyz(:,i), &
747 ' Interpolation will always yield 0.0. Try increase padding.'
749 call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
750 this%n_points_pe(this%pe_owner(i)) = this%n_points_pe(this%pe_owner(i)) + 1
753 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, mpi_integer, &
754 mpi_sum, this%comm, ierr)
755 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
756 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
757 this%n_points_offset_pe_local(0) = 0
758 this%n_points_offset_pe(0) = 0
759 do i = 1, (this%pe_size - 1)
760 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
761 + this%n_points_offset_pe_local(i-1)
762 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
763 + this%n_points_offset_pe(i-1)
765 call send_pe_find%free()
766 call recv_pe_find%free()
767 call glb_intrp_find%free()
768 call send_pe_find%init()
769 call recv_pe_find%init()
770 call glb_intrp_find%init_dofs(this%pe_size)
772 do i = 0, (this%pe_size-1)
773 if (this%n_points_pe(i) .gt. 0)
then
774 call send_pe_find%push(i)
775 point_ids => this%points_at_pe(i)%array()
776 do j = 1, this%n_points_pe(i)
777 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
778 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
779 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
782 if (this%n_points_pe_local(i) .gt. 0)
then
783 call recv_pe_find%push(i)
784 do j = 1, this%n_points_pe_local(i)
785 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
786 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
787 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
793 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
794 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
795 this%n_points_local*3)
796 call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
797 this%n_points_local*3)
799 do i = 1,
size(glb_intrp_find%send_pe)
800 rank = glb_intrp_find%send_pe(i)
801 point_ids => this%points_at_pe(rank)%array()
802 do j = 1, this%n_points_pe(rank)
804 el_owner_results(ii) = this%el_owner0(point_ids(j))
806 call mpi_isend(el_owner_results(this%n_points_offset_pe(rank)+1),&
807 this%n_points_pe(rank), &
808 mpi_integer, rank, 0, &
809 this%comm, glb_intrp_find%send_buf(i)%request, ierr)
810 glb_intrp_find%send_buf(i)%flag = .false.
812 do i = 1,
size(glb_intrp_find%recv_pe)
813 rank = glb_intrp_find%recv_pe(i)
814 call mpi_irecv(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1), &
815 this%n_points_pe_local(rank), &
816 mpi_integer, rank, 0, &
817 this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
818 glb_intrp_find%recv_buf(i)%flag = .false.
820 call glb_intrp_find%nbwait_no_op()
822 call glb_intrp_find%free()
827 call this%glb_intrp_comm%init_dofs(this%pe_size)
828 do i = 0, (this%pe_size-1)
829 if (this%n_points_pe(i) .gt. 0)
then
831 point_ids => this%points_at_pe(i)%array()
832 do j = 1, this%n_points_pe(i)
833 call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
836 if (this%n_points_pe_local(i) .gt. 0)
then
838 do j = 1, this%n_points_pe_local(i)
839 call this%glb_intrp_comm%send_dof(i)%push(j+this%n_points_offset_pe_local(i))
843 call this%glb_intrp_comm%init(send_pe, recv_pe,this%comm)
846 call this%temp_local%init(this%n_points_local)
847 call this%temp%init(this%n_points)
850 call this%local_interp%init(this%Xh, this%rst_local, &
857 call device_map(this%el_owner0_local, this%el_owner0_local_d, this%n_points_local)
858 call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
862 call this%check_points(this%x%x,this%y%x, this%z%x)
867 call glb_intrp_find_back%free()
868 call send_pe_find%free()
869 call recv_pe_find%free()
873 call rst_local_cand%free()
878 call all_el_candidates%free()
880 if (
allocated(n_el_cands))
deallocate(n_el_cands)
881 if (
allocated(rst_results))
deallocate(rst_results)
882 if (
allocated(res_results))
deallocate(res_results)
883 if (
allocated(el_owner_results))
deallocate(el_owner_results)
884 call mpi_barrier(this%comm, ierr)
886 write(log_buf,
'(A,E15.7)')
'Global interpolation find points done, time (s):', &