330 real(kind=
rp),
intent(in) :: x(:)
331 real(kind=
rp),
intent(in) :: y(:)
332 real(kind=
rp),
intent(in) :: z(:)
333 integer,
intent(in) :: gdim
334 integer,
intent(in) :: nelv
335 type(
space_t),
intent(in) :: Xh
336 type(mpi_comm),
intent(in),
optional :: comm
337 real(kind=
dp),
intent(in),
optional :: tol
338 real(kind=
dp),
intent(in),
optional :: pad
340 integer :: lx, ly, lz, ierr, i, n
341 character(len=8000) :: log_buf
343 real(kind=
rp) :: time1, time_start
344 character(len=255) :: mode_str
345 integer :: boxdim, envvar_len
347 call neko_log%section(
'Global Interpolation')
348 call neko_log%message(
'Initializing global interpolation')
353 if (
present(
comm))
then
361 if (
present(pad)) this%padding = pad
364 if (
present(tol)) this%tolerance = tol
366 write(log_buf,
'(A,E15.7)') &
367 'Tolerance: ', this%tolerance
369 write(log_buf,
'(A,E15.7)') &
370 'Padding : ', this%padding
373 time_start = mpi_wtime()
374 call mpi_barrier(this%comm)
376 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
377 call mpi_comm_size(this%comm, this%pe_size, ierr)
382 call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
383 mpi_sum, this%comm, ierr)
391 call copy(this%x%x, x, n)
393 call copy(this%y%x, y, n)
395 call copy(this%z%x, z, n)
397 call this%Xh%init(xh%t, lx, ly, lz)
400 if (this%n_dof == -1)
then
405 call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
406 mode_str, envvar_len)
408 if (envvar_len .gt. 0)
then
409 if (mode_str(1:envvar_len) ==
'AABB')
then
413 call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
414 mode_str, envvar_len)
416 if (envvar_len .gt. 0)
then
417 if (mode_str(1:envvar_len) ==
'AABB')
then
422 if (.not.
allocated(this%el_finder))
then
425 if (.not.
allocated(this%pe_finder))
then
428 select type (el_find => this%el_finder)
430 call neko_log%message(
'Using AABB element finder')
431 call el_find%init(x, y, z, nelv, xh, this%padding)
433 call neko_log%message(
'Using Cartesian element finder')
434 boxdim =
max(lx*int(
real(nelv,
xp)**(1.0_xp/3.0_xp)),2)
435 boxdim = min(boxdim, 300)
436 call el_find%init(x, y, z, nelv, xh, boxdim, this%padding)
438 call neko_error(
'Unknown element finder type')
441 select type (pe_find => this%pe_finder)
443 call neko_log%message(
'Using AABB PE finder')
444 call pe_find%init(this%x%x, this%y%x, this%z%x, &
445 nelv, xh, this%comm, this%padding)
447 call neko_log%message(
'Using Cartesian PE finder')
448 boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp/3.0_xp))
449 boxdim =
max(boxdim,32)
450 boxdim = min(boxdim, &
451 int(8.0_xp*(30000.0_xp*this%pe_size)**(1.0_xp/3.0_xp)))
452 call pe_find%init(this%x%x, this%y%x, this%z%x, &
453 nelv, xh, this%comm, boxdim, this%padding)
458 call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, &
460 if (
allocated(this%n_points_pe))
deallocate(this%n_points_pe)
461 if (
allocated(this%n_points_pe_local))
deallocate(this%n_points_pe_local)
462 if (
allocated(this%n_points_offset_pe_local)) &
463 deallocate(this%n_points_offset_pe_local)
464 if (
allocated(this%n_points_offset_pe))
deallocate(this%n_points_offset_pe)
465 allocate(this%n_points_pe(0:(this%pe_size-1)))
466 allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
467 allocate(this%n_points_pe_local(0:(this%pe_size-1)))
468 allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
469 allocate(this%points_at_pe(0:(this%pe_size-1)))
470 do i = 0, this%pe_size-1
471 call this%points_at_pe(i)%init()
473 call mpi_barrier(this%comm)
475 write(log_buf,
'(A,E15.7)') &
476 'Global interpolation initialized (s):', time1-time_start
568 character(len=8000) :: log_buf
576 type(c_ptr) :: el_cands_d
578 integer :: i, j, stupid_intent
580 integer,
allocatable :: n_el_cands(:)
581 integer,
contiguous,
pointer :: el_cands(:), point_ids(:), send_recv(:)
582 real(kind=
rp),
allocatable :: res_results(:,:)
583 real(kind=
rp),
allocatable :: rst_results(:,:)
584 integer,
allocatable :: el_owner_results(:)
585 integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
586 real(kind=
rp) :: time1, time2, time_start
590 type(
stack_i4_t) :: send_pe_find, recv_pe_find
592 el_cands_d = c_null_ptr
593 call neko_log%section(
'Global Interpolation')
594 call glb_intrp_find%init_dofs(this%pe_size)
595 call send_pe_find%init()
596 call recv_pe_find%init()
597 call mpi_barrier(this%comm)
598 time_start = mpi_wtime()
599 write(log_buf,
'(A)')
'Global interpolation, finding points'
605 call this%pe_finder%find_batch(this%xyz, this%n_points, &
606 this%points_at_pe, this%n_points_pe)
607 call mpi_barrier(this%comm)
609 write(log_buf,
'(A,E15.7)') &
610 'Found PE candidates time since start of findpts (s):', &
618 this%n_points_pe_local = 0
619 this%n_points_local = 0
620 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
621 1, mpi_integer, mpi_sum, this%comm, ierr)
622 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
623 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
626 this%n_points_offset_pe_local(0) = 0
627 this%n_points_offset_pe(0) = 0
628 do i = 1, (this%pe_size - 1)
629 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
630 + this%n_points_offset_pe_local(i-1)
631 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
632 + this%n_points_offset_pe(i-1)
634 do i = 0, (this%pe_size-1)
635 if (this%n_points_pe(i) .gt. 0)
then
636 call send_pe_find%push(i)
637 point_ids => this%points_at_pe(i)%array()
638 do j = 1, this%n_points_pe(i)
639 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
640 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
641 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
644 if (this%n_points_pe_local(i) .gt. 0)
then
645 call recv_pe_find%push(i)
646 do j = 1, this%n_points_pe_local(i)
647 call glb_intrp_find%recv_dof(i)%push(3*(j + &
648 this%n_points_offset_pe_local(i) - 1) + 1)
649 call glb_intrp_find%recv_dof(i)%push(3*(j + &
650 this%n_points_offset_pe_local(i) - 1) + 2)
651 call glb_intrp_find%recv_dof(i)%push(3*(j + &
652 this%n_points_offset_pe_local(i) - 1) + 3)
659 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
661 call glb_intrp_find_back%init_dofs(this%pe_size)
663 do i = 0, (this%pe_size-1)
664 send_recv => glb_intrp_find%recv_dof(i)%array()
665 do j = 1, glb_intrp_find%recv_dof(i)%size()
666 call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
668 send_recv => glb_intrp_find%send_dof(i)%array()
669 do j = 1, glb_intrp_find%send_dof(i)%size()
671 call glb_intrp_find_back%recv_dof(i)%push(ii)
675 call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
678 if (
allocated(this%xyz_local))
then
679 deallocate(this%xyz_local)
681 allocate(this%xyz_local(3, this%n_points_local))
682 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
683 this%n_points_local*3)
685 call mpi_barrier(this%comm)
687 write(log_buf,
'(A,E15.7)') &
688 'Sent to points to PE candidates, time since start of ' &
689 //
'find_points (s):', time1 - time_start
693 call all_el_candidates%init()
695 if (
allocated(n_el_cands))
then
696 deallocate(n_el_cands)
699 allocate(n_el_cands(this%n_points_local))
701 call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
702 all_el_candidates, n_el_cands)
704 n_point_cand = all_el_candidates%size()
705 if (n_point_cand .gt. 1e8)
then
706 print *,
'Warning, many point candidates on rank', this%pe_rank, &
707 'cands:', n_point_cand, &
708 'Consider increasing number of ranks'
710 call x_t%init(n_point_cand)
711 call y_t%init(n_point_cand)
712 call z_t%init(n_point_cand)
715 do i = 1 , this%n_points_local
716 do j = 1, n_el_cands(i)
718 x_t%x(ii) = this%xyz_local(1,i)
719 y_t%x(ii) = this%xyz_local(2,i)
720 z_t%x(ii) = this%xyz_local(3,i)
724 call mpi_barrier(this%comm)
726 write(log_buf,
'(A,E15.7)') &
727 'Element candidates found, now time for finding rst,time ' // &
728 'since start of find_points (s):', time1 - time_start
730 call rst_local_cand%init(3,n_point_cand)
731 call resx%init(n_point_cand)
732 call resy%init(n_point_cand)
733 call resz%init(n_point_cand)
736 call mpi_barrier(this%comm)
738 el_cands => all_el_candidates%array()
745 call device_map(el_cands, el_cands_d,n_point_cand)
750 call this%rst_finder%find(rst_local_cand, &
752 el_cands, n_point_cand, &
762 call mpi_barrier(this%comm)
765 write(log_buf,
'(A,E15.7)') &
766 'Found rst with Newton iteration, time (s):', time2-time1
769 write(log_buf,
'(A)') &
770 'Checking validity of points and choosing best candidates.'
772 call mpi_barrier(this%comm,ierr)
774 if (
allocated(this%rst_local))
deallocate(this%rst_local)
775 if (
allocated(this%el_owner0_local))
deallocate(this%el_owner0_local)
776 allocate(this%rst_local(3,this%n_points_local))
777 allocate(this%el_owner0_local(this%n_points_local))
780 do i = 1 , this%n_points_local
781 this%xyz_local(1,i) = 10.0
782 this%xyz_local(2,i) = 10.0
783 this%xyz_local(3,i) = 10.0
784 this%rst_local(1,i) = 10.0
785 this%rst_local(2,i) = 10.0
786 this%rst_local(3,i) = 10.0
787 this%el_owner0_local(i) = -1
788 do j = 1, n_el_cands(i)
790 if (
rst_cmp(this%rst_local(:,i), rst_local_cand%x(:,ii),&
791 this%xyz_local(:,i), [resx%x(ii),resy%x(ii),resz%x(ii)], &
793 this%rst_local(1,i) = rst_local_cand%x(1,ii)
794 this%rst_local(2,i) = rst_local_cand%x(2,ii)
796 this%rst_local(3,i) = rst_local_cand%x(3,ii)
797 this%xyz_local(1,i) = resx%x(ii)
798 this%xyz_local(2,i) = resy%x(ii)
799 this%xyz_local(3,i) = resz%x(ii)
800 this%el_owner0_local(i) = el_cands(ii)
806 call res%init(3,this%n_points)
807 n_glb_point_cand = sum(this%n_points_pe)
808 if (
allocated(rst_results))
deallocate(rst_results)
809 if (
allocated(res_results))
deallocate(res_results)
810 if (
allocated(el_owner_results))
deallocate(el_owner_results)
811 allocate(rst_results(3,n_glb_point_cand))
812 allocate(res_results(3,n_glb_point_cand))
813 allocate(el_owner_results(n_glb_point_cand))
818 call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
819 this%n_points_local*3, n_glb_point_cand*3)
820 call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
821 this%n_points_local*3, n_glb_point_cand*3)
822 do i = 1,
size(glb_intrp_find_back%send_pe)
823 rank = glb_intrp_find_back%send_pe(i)
824 call mpi_isend(this%el_owner0_local( &
825 this%n_points_offset_pe_local(rank) + 1), &
826 this%n_points_pe_local(rank), &
827 mpi_integer, rank, 0, &
828 this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
829 glb_intrp_find_back%send_buf(i)%flag = .false.
831 do i = 1,
size(glb_intrp_find_back%recv_pe)
832 rank = glb_intrp_find_back%recv_pe(i)
833 call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
834 this%n_points_pe(rank), &
835 mpi_integer, rank, 0, &
836 this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
837 glb_intrp_find_back%recv_buf(i)%flag = .false.
839 call glb_intrp_find_back%nbwait_no_op()
841 do i = 1,
size(glb_intrp_find_back%recv_pe)
842 point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
843 do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
844 point_id = point_ids(j)
846 if (
rst_cmp(this%rst(:,point_id), rst_results(:,ii), &
847 res%x(:,point_id), res_results(:,ii), this%padding) .or. &
848 this%pe_owner(point_ids(j)) .eq. -1 )
then
849 this%rst(:,point_ids(j)) = rst_results(:,ii)
850 res%x(:,point_ids(j)) = res_results(:,ii)
851 this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
852 this%el_owner0(point_ids(j)) = el_owner_results(ii)
863 do i = 0, this%pe_size-1
864 call this%points_at_pe(i)%clear()
865 this%n_points_pe(i) = 0
868 do i = 1, this%n_points
870 if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1)
then
871 print *,
'No owning rank found for',&
872 ' point ', stupid_intent,
' with coords', this%xyz(:,i), &
873 ' Interpolation will always yield 0.0. Try increase padding.'
875 call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
876 this%n_points_pe(this%pe_owner(i)) = &
877 this%n_points_pe(this%pe_owner(i)) + 1
880 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, &
881 mpi_integer, mpi_sum, this%comm, ierr)
882 call mpi_alltoall(this%n_points_pe, 1, mpi_integer, &
883 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
884 this%n_points_offset_pe_local(0) = 0
885 this%n_points_offset_pe(0) = 0
886 do i = 1, (this%pe_size - 1)
887 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
888 + this%n_points_offset_pe_local(i-1)
889 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
890 + this%n_points_offset_pe(i-1)
892 call send_pe_find%free()
893 call recv_pe_find%free()
894 call glb_intrp_find%free()
895 call send_pe_find%init()
896 call recv_pe_find%init()
897 call glb_intrp_find%init_dofs(this%pe_size)
899 do i = 0, (this%pe_size-1)
900 if (this%n_points_pe(i) .gt. 0)
then
901 call send_pe_find%push(i)
902 point_ids => this%points_at_pe(i)%array()
903 do j = 1, this%n_points_pe(i)
904 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 1)
905 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 2)
906 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 3)
909 if (this%n_points_pe_local(i) .gt. 0)
then
910 call recv_pe_find%push(i)
911 do j = 1, this%n_points_pe_local(i)
912 call glb_intrp_find%recv_dof(i)%push(3*(j + &
913 this%n_points_offset_pe_local(i) - 1) + 1)
914 call glb_intrp_find%recv_dof(i)%push(3*(j + &
915 this%n_points_offset_pe_local(i) - 1) + 2)
916 call glb_intrp_find%recv_dof(i)%push(3*(j + &
917 this%n_points_offset_pe_local(i) - 1) + 3)
923 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
924 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
925 this%n_points_local*3)
926 call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
927 this%n_points_local*3)
929 do i = 1,
size(glb_intrp_find%send_pe)
930 rank = glb_intrp_find%send_pe(i)
931 point_ids => this%points_at_pe(rank)%array()
932 do j = 1, this%n_points_pe(rank)
934 el_owner_results(ii) = this%el_owner0(point_ids(j))
936 call mpi_isend(el_owner_results(this%n_points_offset_pe(rank) + 1),&
937 this%n_points_pe(rank), &
938 mpi_integer, rank, 0, &
939 this%comm, glb_intrp_find%send_buf(i)%request, ierr)
940 glb_intrp_find%send_buf(i)%flag = .false.
942 do i = 1,
size(glb_intrp_find%recv_pe)
943 rank = glb_intrp_find%recv_pe(i)
944 call mpi_irecv(this%el_owner0_local( &
945 this%n_points_offset_pe_local(rank) + 1), &
946 this%n_points_pe_local(rank), &
947 mpi_integer, rank, 0, &
948 this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
949 glb_intrp_find%recv_buf(i)%flag = .false.
951 call glb_intrp_find%nbwait_no_op()
953 call glb_intrp_find%free()
958 call this%glb_intrp_comm%init_dofs(this%pe_size)
959 do i = 0, (this%pe_size-1)
960 if (this%n_points_pe(i) .gt. 0)
then
962 point_ids => this%points_at_pe(i)%array()
963 do j = 1, this%n_points_pe(i)
964 call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
967 if (this%n_points_pe_local(i) .gt. 0)
then
969 do j = 1, this%n_points_pe_local(i)
970 call this%glb_intrp_comm%send_dof(i)%push(j + &
971 this%n_points_offset_pe_local(i))
975 call this%glb_intrp_comm%init(send_pe, recv_pe,this%comm)
978 call this%temp_local%init(this%n_points_local)
979 call this%temp%init(this%n_points)
982 call this%local_interp%init(this%Xh, this%rst_local, &
989 call device_map(this%el_owner0_local, this%el_owner0_local_d, &
991 call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
995 call this%check_points(this%x%x,this%y%x, this%z%x)
1000 call glb_intrp_find_back%free()
1001 call send_pe_find%free()
1002 call recv_pe_find%free()
1006 call rst_local_cand%free()
1011 call all_el_candidates%free()
1013 if (
allocated(n_el_cands))
deallocate(n_el_cands)
1014 if (
allocated(rst_results))
deallocate(rst_results)
1015 if (
allocated(res_results))
deallocate(res_results)
1016 if (
allocated(el_owner_results))
deallocate(el_owner_results)
1017 call mpi_barrier(this%comm, ierr)
1019 write(log_buf,
'(A,E15.7)')
'Global interpolation find points ' // &
1020 'done, time (s):', time2-time_start