330 real(kind=
rp),
intent(in) :: x(:)
331 real(kind=
rp),
intent(in) :: y(:)
332 real(kind=
rp),
intent(in) :: z(:)
333 integer,
intent(in) :: gdim
334 integer,
intent(in) :: nelv
335 type(
space_t),
intent(in) :: Xh
336 type(mpi_comm),
intent(in),
optional :: comm
337 real(kind=
dp),
intent(in),
optional :: tol
338 real(kind=
dp),
intent(in),
optional :: pad
340 integer :: lx, ly, lz, ierr, i, n
341 character(len=8000) :: log_buf
343 real(kind=
rp) :: time1, time_start
344 character(len=255) :: mode_str
345 integer :: boxdim, envvar_len
347 call neko_log%section(
'Global Interpolation')
348 call neko_log%message(
'Initializing global interpolation')
353 if (
present(
comm))
then
361 if (
present(pad)) this%padding = pad
364 if (
present(tol)) this%tolerance = tol
366 write(log_buf,
'(A,E15.7)') &
367 'Tolerance: ', this%tolerance
369 write(log_buf,
'(A,E15.7)') &
370 'Padding : ', this%padding
373 time_start = mpi_wtime()
374 call mpi_barrier(this%comm)
376 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
377 call mpi_comm_size(this%comm, this%pe_size, ierr)
382 call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
383 mpi_sum, this%comm, ierr)
391 call copy(this%x%x, x, n)
393 call copy(this%y%x, y, n)
395 call copy(this%z%x, z, n)
397 call this%Xh%init(xh%t, lx, ly, lz)
400 if (this%n_dof == -1)
then
405 call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
406 mode_str, envvar_len)
408 if (envvar_len .gt. 0)
then
409 if (mode_str(1:envvar_len) ==
'AABB')
then
413 call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
414 mode_str, envvar_len)
416 if (envvar_len .gt. 0)
then
417 if (mode_str(1:envvar_len) ==
'AABB')
then
422 if (.not.
allocated(this%el_finder))
then
425 if (.not.
allocated(this%pe_finder))
then
428 select type (el_find => this%el_finder)
430 call neko_log%message(
'Using AABB element finder')
431 call el_find%init(x, y, z, nelv, xh, this%padding)
433 call neko_log%message(
'Using Cartesian element finder')
434 boxdim =
max(lx*int(
real(nelv,
xp)**(1.0_xp / 3.0_xp)), 2)
435 boxdim = min(boxdim, 300)
436 call el_find%init(x, y, z, nelv, xh, boxdim, this%padding)
438 call neko_error(
'Unknown element finder type')
441 select type (pe_find => this%pe_finder)
443 call neko_log%message(
'Using AABB PE finder')
444 call pe_find%init(this%x%x, this%y%x, this%z%x, &
445 nelv, xh, this%comm, this%padding)
447 call neko_log%message(
'Using Cartesian PE finder')
448 boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp / 3.0_xp))
449 boxdim =
max(boxdim, 32)
450 boxdim = min(boxdim, &
451 int(8.0_xp*(30000.0_xp * this%pe_size)**(1.0_xp / 3.0_xp)))
452 call pe_find%init(this%x%x, this%y%x, this%z%x, &
453 nelv, xh, this%comm, boxdim, this%padding)
458 call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, &
460 if (
allocated(this%n_points_pe))
deallocate(this%n_points_pe)
461 if (
allocated(this%n_points_pe_local))
deallocate(this%n_points_pe_local)
462 if (
allocated(this%n_points_offset_pe_local)) &
463 deallocate(this%n_points_offset_pe_local)
464 if (
allocated(this%n_points_offset_pe))
deallocate(this%n_points_offset_pe)
465 allocate(this%n_points_pe(0:(this%pe_size-1)))
466 allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
467 allocate(this%n_points_pe_local(0:(this%pe_size-1)))
468 allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
469 allocate(this%points_at_pe(0:(this%pe_size-1)))
470 do i = 0, this%pe_size-1
471 call this%points_at_pe(i)%init()
473 call mpi_barrier(this%comm)
475 write(log_buf,
'(A,E15.7)') &
476 'Global interpolation initialized (s):', time1-time_start
570 character(len=8000) :: log_buf
578 type(c_ptr) :: el_cands_d
580 integer :: i, j, stupid_intent
582 integer,
allocatable :: n_el_cands(:)
583 integer,
contiguous,
pointer :: el_cands(:), point_ids(:), send_recv(:)
584 real(kind=
rp),
allocatable :: res_results(:,:)
585 real(kind=
rp),
allocatable :: rst_results(:,:)
586 integer,
allocatable :: el_owner_results(:)
587 integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
588 real(kind=
rp) :: time1, time2, time_start
592 type(
stack_i4_t) :: send_pe_find, recv_pe_find
594 el_cands_d = c_null_ptr
595 call neko_log%section(
'Global Interpolation')
596 call glb_intrp_find%init_dofs(this%pe_size)
597 call send_pe_find%init()
598 call recv_pe_find%init()
599 call mpi_barrier(this%comm)
600 time_start = mpi_wtime()
601 write(log_buf,
'(A)')
'Global interpolation, finding points'
607 call this%pe_finder%find_batch(this%xyz, this%n_points, &
608 this%points_at_pe, this%n_points_pe)
609 call mpi_barrier(this%comm)
611 write(log_buf,
'(A,E15.7)') &
612 'Found PE candidates time since start of findpts (s):', &
620 this%n_points_pe_local = 0
621 this%n_points_local = 0
622 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
623 1, mpi_integer, mpi_sum, this%comm, ierr)
624 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
625 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
628 this%n_points_offset_pe_local(0) = 0
629 this%n_points_offset_pe(0) = 0
630 do i = 1, (this%pe_size - 1)
631 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
632 + this%n_points_offset_pe_local(i-1)
633 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
634 + this%n_points_offset_pe(i-1)
636 do i = 0, (this%pe_size-1)
637 if (this%n_points_pe(i) .gt. 0)
then
638 call send_pe_find%push(i)
639 point_ids => this%points_at_pe(i)%array()
640 do j = 1, this%n_points_pe(i)
641 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
642 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
643 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
646 if (this%n_points_pe_local(i) .gt. 0)
then
647 call recv_pe_find%push(i)
648 do j = 1, this%n_points_pe_local(i)
649 call glb_intrp_find%recv_dof(i)%push(3*(j + &
650 this%n_points_offset_pe_local(i) - 1) + 1)
651 call glb_intrp_find%recv_dof(i)%push(3*(j + &
652 this%n_points_offset_pe_local(i) - 1) + 2)
653 call glb_intrp_find%recv_dof(i)%push(3*(j + &
654 this%n_points_offset_pe_local(i) - 1) + 3)
661 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
663 call glb_intrp_find_back%init_dofs(this%pe_size)
665 do i = 0, (this%pe_size-1)
666 send_recv => glb_intrp_find%recv_dof(i)%array()
667 do j = 1, glb_intrp_find%recv_dof(i)%size()
668 call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
670 send_recv => glb_intrp_find%send_dof(i)%array()
671 do j = 1, glb_intrp_find%send_dof(i)%size()
673 call glb_intrp_find_back%recv_dof(i)%push(ii)
677 call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
680 if (
allocated(this%xyz_local))
then
681 deallocate(this%xyz_local)
683 allocate(this%xyz_local(3, this%n_points_local))
684 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
685 this%n_points_local*3)
687 call mpi_barrier(this%comm)
689 write(log_buf,
'(A,E15.7)') &
690 'Sent to points to PE candidates, time since start of ' &
691 //
'find_points (s):', time1 - time_start
695 call all_el_candidates%init()
697 if (
allocated(n_el_cands))
then
698 deallocate(n_el_cands)
701 allocate(n_el_cands(this%n_points_local))
703 call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
704 all_el_candidates, n_el_cands)
706 n_point_cand = all_el_candidates%size()
707 if (n_point_cand .gt. 1e8)
then
708 print *,
'Warning, many point candidates on rank', this%pe_rank, &
709 'cands:', n_point_cand, &
710 'Consider increasing number of ranks'
712 call x_t%init(n_point_cand)
713 call y_t%init(n_point_cand)
714 call z_t%init(n_point_cand)
717 do i = 1 , this%n_points_local
718 do j = 1, n_el_cands(i)
720 x_t%x(ii) = this%xyz_local(1,i)
721 y_t%x(ii) = this%xyz_local(2,i)
722 z_t%x(ii) = this%xyz_local(3,i)
726 call mpi_barrier(this%comm)
728 write(log_buf,
'(A,E15.7)') &
729 'Element candidates found, now time for finding rst, time ' // &
730 'since start of find_points (s):', time1 - time_start
732 call rst_local_cand%init(3, n_point_cand)
733 call resx%init(n_point_cand)
734 call resy%init(n_point_cand)
735 call resz%init(n_point_cand)
738 call mpi_barrier(this%comm)
740 el_cands => all_el_candidates%array()
747 call device_map(el_cands, el_cands_d, n_point_cand)
752 call this%rst_finder%find(rst_local_cand, &
754 el_cands, n_point_cand, &
763 call mpi_barrier(this%comm)
766 write(log_buf,
'(A,E15.7)') &
767 'Found rst with Newton iteration, time (s):', time2-time1
770 write(log_buf,
'(A)') &
771 'Checking validity of points and choosing best candidates.'
773 call mpi_barrier(this%comm, ierr)
775 if (
allocated(this%rst_local))
deallocate(this%rst_local)
776 if (
allocated(this%el_owner0_local))
deallocate(this%el_owner0_local)
777 allocate(this%rst_local(3, this%n_points_local))
778 allocate(this%el_owner0_local(this%n_points_local))
781 do i = 1 , this%n_points_local
782 this%xyz_local(1,i) = 10.0
783 this%xyz_local(2,i) = 10.0
784 this%xyz_local(3,i) = 10.0
785 this%rst_local(1,i) = 10.0
786 this%rst_local(2,i) = 10.0
787 this%rst_local(3,i) = 10.0
788 this%el_owner0_local(i) = -1
789 do j = 1, n_el_cands(i)
791 if (
rst_cmp(this%rst_local(:, i), rst_local_cand%x(:, ii), &
792 this%xyz_local(:, i), [resx%x(ii), resy%x(ii), resz%x(ii)], &
794 this%rst_local(1, i) = rst_local_cand%x(1, ii)
795 this%rst_local(2, i) = rst_local_cand%x(2, ii)
797 this%rst_local(3, i) = rst_local_cand%x(3, ii)
798 this%xyz_local(1,i) = resx%x(ii)
799 this%xyz_local(2,i) = resy%x(ii)
800 this%xyz_local(3,i) = resz%x(ii)
801 this%el_owner0_local(i) = el_cands(ii)
807 call res%init(3, this%n_points)
808 n_glb_point_cand = sum(this%n_points_pe)
809 if (
allocated(rst_results))
deallocate(rst_results)
810 if (
allocated(res_results))
deallocate(res_results)
811 if (
allocated(el_owner_results))
deallocate(el_owner_results)
812 allocate(rst_results(3, n_glb_point_cand))
813 allocate(res_results(3, n_glb_point_cand))
814 allocate(el_owner_results(n_glb_point_cand))
819 call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
820 this%n_points_local*3, n_glb_point_cand*3)
821 call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
822 this%n_points_local*3, n_glb_point_cand*3)
823 do i = 1,
size(glb_intrp_find_back%send_pe)
824 rank = glb_intrp_find_back%send_pe(i)
825 call mpi_isend(this%el_owner0_local( &
826 this%n_points_offset_pe_local(rank) + 1), &
827 this%n_points_pe_local(rank), &
828 mpi_integer, rank, 0, &
829 this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
830 glb_intrp_find_back%send_buf(i)%flag = .false.
832 do i = 1,
size(glb_intrp_find_back%recv_pe)
833 rank = glb_intrp_find_back%recv_pe(i)
834 call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
835 this%n_points_pe(rank), &
836 mpi_integer, rank, 0, &
837 this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
838 glb_intrp_find_back%recv_buf(i)%flag = .false.
840 call glb_intrp_find_back%nbwait_no_op()
842 do i = 1,
size(glb_intrp_find_back%recv_pe)
843 point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
844 do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
845 point_id = point_ids(j)
847 if (
rst_cmp(this%rst(:, point_id), rst_results(:, ii), &
848 res%x(:, point_id), res_results(:, ii), this%padding) .or. &
849 this%pe_owner(point_ids(j)) .eq. -1 )
then
850 this%rst(:, point_ids(j)) = rst_results(:, ii)
851 res%x(:, point_ids(j)) = res_results(:, ii)
852 this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
853 this%el_owner0(point_ids(j)) = el_owner_results(ii)
864 do i = 0, this%pe_size-1
865 call this%points_at_pe(i)%clear()
866 this%n_points_pe(i) = 0
869 do i = 1, this%n_points
871 if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1)
then
872 print *,
'No owning rank found for',&
873 ' point ', stupid_intent,
' with coords', this%xyz(:,i), &
874 ' Interpolation will always yield 0.0. Try increase padding.'
876 call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
877 this%n_points_pe(this%pe_owner(i)) = &
878 this%n_points_pe(this%pe_owner(i)) + 1
881 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, &
882 mpi_integer, mpi_sum, this%comm, ierr)
883 call mpi_alltoall(this%n_points_pe, 1, mpi_integer, &
884 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
885 this%n_points_offset_pe_local(0) = 0
886 this%n_points_offset_pe(0) = 0
887 do i = 1, (this%pe_size - 1)
888 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
889 + this%n_points_offset_pe_local(i-1)
890 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
891 + this%n_points_offset_pe(i-1)
893 call send_pe_find%free()
894 call recv_pe_find%free()
895 call glb_intrp_find%free()
896 call send_pe_find%init()
897 call recv_pe_find%init()
898 call glb_intrp_find%init_dofs(this%pe_size)
900 do i = 0, (this%pe_size-1)
901 if (this%n_points_pe(i) .gt. 0)
then
902 call send_pe_find%push(i)
903 point_ids => this%points_at_pe(i)%array()
904 do j = 1, this%n_points_pe(i)
905 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 1)
906 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 2)
907 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 3)
910 if (this%n_points_pe_local(i) .gt. 0)
then
911 call recv_pe_find%push(i)
912 do j = 1, this%n_points_pe_local(i)
913 call glb_intrp_find%recv_dof(i)%push(3*(j + &
914 this%n_points_offset_pe_local(i) - 1) + 1)
915 call glb_intrp_find%recv_dof(i)%push(3*(j + &
916 this%n_points_offset_pe_local(i) - 1) + 2)
917 call glb_intrp_find%recv_dof(i)%push(3*(j + &
918 this%n_points_offset_pe_local(i) - 1) + 3)
924 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
925 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
926 this%n_points_local*3)
927 call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
928 this%n_points_local*3)
930 do i = 1,
size(glb_intrp_find%send_pe)
931 rank = glb_intrp_find%send_pe(i)
932 point_ids => this%points_at_pe(rank)%array()
933 do j = 1, this%n_points_pe(rank)
935 el_owner_results(ii) = this%el_owner0(point_ids(j))
937 call mpi_isend(el_owner_results(this%n_points_offset_pe(rank) + 1),&
938 this%n_points_pe(rank), &
939 mpi_integer, rank, 0, &
940 this%comm, glb_intrp_find%send_buf(i)%request, ierr)
941 glb_intrp_find%send_buf(i)%flag = .false.
943 do i = 1,
size(glb_intrp_find%recv_pe)
944 rank = glb_intrp_find%recv_pe(i)
945 call mpi_irecv(this%el_owner0_local( &
946 this%n_points_offset_pe_local(rank) + 1), &
947 this%n_points_pe_local(rank), &
948 mpi_integer, rank, 0, &
949 this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
950 glb_intrp_find%recv_buf(i)%flag = .false.
952 call glb_intrp_find%nbwait_no_op()
954 call glb_intrp_find%free()
959 call this%glb_intrp_comm%init_dofs(this%pe_size)
960 do i = 0, (this%pe_size-1)
961 if (this%n_points_pe(i) .gt. 0)
then
963 point_ids => this%points_at_pe(i)%array()
964 do j = 1, this%n_points_pe(i)
965 call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
968 if (this%n_points_pe_local(i) .gt. 0)
then
970 do j = 1, this%n_points_pe_local(i)
971 call this%glb_intrp_comm%send_dof(i)%push(j + &
972 this%n_points_offset_pe_local(i))
976 call this%glb_intrp_comm%init(send_pe, recv_pe, this%comm)
979 call this%temp_local%init(this%n_points_local)
980 call this%temp%init(this%n_points)
983 call this%local_interp%init(this%Xh, this%rst_local, &
990 call device_map(this%el_owner0_local, this%el_owner0_local_d, &
992 call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
996 call this%check_points(this%x%x, this%y%x, this%z%x)
1001 call glb_intrp_find_back%free()
1002 call send_pe_find%free()
1003 call recv_pe_find%free()
1007 call rst_local_cand%free()
1012 call all_el_candidates%free()
1014 if (
allocated(n_el_cands))
deallocate(n_el_cands)
1015 if (
allocated(rst_results))
deallocate(rst_results)
1016 if (
allocated(res_results))
deallocate(res_results)
1017 if (
allocated(el_owner_results))
deallocate(el_owner_results)
1018 call mpi_barrier(this%comm, ierr)
1020 write(log_buf,
'(A,E15.7)')
'Global interpolation find points ' // &
1021 'done, time (s):', time2-time_start