320 real(kind=
rp),
intent(in) :: x(:)
321 real(kind=
rp),
intent(in) :: y(:)
322 real(kind=
rp),
intent(in) :: z(:)
323 integer,
intent(in) :: gdim
324 integer,
intent(in) :: nelv
325 type(
space_t),
intent(in) :: Xh
326 type(mpi_comm),
intent(in),
optional :: comm
327 real(kind=
dp),
intent(in),
optional :: tol
328 real(kind=
dp),
intent(in),
optional :: pad
330 integer :: lx, ly, lz, ierr, i, n
331 character(len=8000) :: log_buf
333 real(kind=
rp) :: time1, time_start
334 character(len=255) :: mode_str
335 integer :: boxdim, envvar_len
337 call neko_log%section(
'Global Interpolation')
338 call neko_log%message(
'Initializing global interpolation')
343 if (
present(
comm))
then
351 if (
present(pad)) this%padding = pad
354 if (
present(tol)) this%tolerance = tol
356 write(log_buf,
'(A,E15.7)') &
357 'Tolerance: ', this%tolerance
359 write(log_buf,
'(A,E15.7)') &
360 'Padding : ', this%padding
363 time_start = mpi_wtime()
364 call mpi_barrier(this%comm)
366 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
367 call mpi_comm_size(this%comm, this%pe_size, ierr)
372 call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
373 mpi_sum, this%comm, ierr)
381 call copy(this%x%x, x, n)
383 call copy(this%y%x, y, n)
385 call copy(this%z%x, z, n)
387 call this%Xh%init(xh%t, lx, ly, lz)
390 if (this%n_dof == -1)
then
395 call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
396 mode_str, envvar_len)
398 if (envvar_len .gt. 0)
then
399 if (mode_str(1:envvar_len) ==
'AABB')
then
403 call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
404 mode_str, envvar_len)
406 if (envvar_len .gt. 0)
then
407 if (mode_str(1:envvar_len) ==
'AABB')
then
412 if (.not.
allocated(this%el_finder))
then
415 if (.not.
allocated(this%pe_finder))
then
418 select type (el_find => this%el_finder)
420 call neko_log%message(
'Using AABB element finder')
421 call el_find%init(x, y, z, nelv, xh, this%padding)
423 call neko_log%message(
'Using Cartesian element finder')
424 boxdim =
max(lx*int(
real(nelv,
xp)**(1.0_xp/3.0_xp)),2)
425 boxdim = min(boxdim, 300)
426 call el_find%init(x, y, z, nelv, xh, boxdim, this%padding)
428 call neko_error(
'Unknown element finder type')
431 select type (pe_find => this%pe_finder)
433 call neko_log%message(
'Using AABB PE finder')
434 call pe_find%init(this%x%x, this%y%x, this%z%x, &
435 nelv, xh, this%comm, this%padding)
437 call neko_log%message(
'Using Cartesian PE finder')
438 boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp/3.0_xp))
439 boxdim =
max(boxdim,32)
440 boxdim = min(boxdim, &
441 int(8.0_xp*(30000.0_xp*this%pe_size)**(1.0_xp/3.0_xp)))
442 call pe_find%init(this%x%x, this%y%x, this%z%x, &
443 nelv, xh, this%comm, boxdim, this%padding)
448 call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, &
450 if (
allocated(this%n_points_pe))
deallocate(this%n_points_pe)
451 if (
allocated(this%n_points_pe_local))
deallocate(this%n_points_pe_local)
452 if (
allocated(this%n_points_offset_pe_local)) &
453 deallocate(this%n_points_offset_pe_local)
454 if (
allocated(this%n_points_offset_pe))
deallocate(this%n_points_offset_pe)
455 allocate(this%n_points_pe(0:(this%pe_size-1)))
456 allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
457 allocate(this%n_points_pe_local(0:(this%pe_size-1)))
458 allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
459 allocate(this%points_at_pe(0:(this%pe_size-1)))
460 do i = 0, this%pe_size-1
461 call this%points_at_pe(i)%init()
463 call mpi_barrier(this%comm)
465 write(log_buf,
'(A,E15.7)') &
466 'Global interpolation initialized (s):', time1-time_start
558 character(len=8000) :: log_buf
566 type(c_ptr) :: el_cands_d
568 integer :: i, j, stupid_intent
570 integer,
allocatable :: n_el_cands(:)
571 integer,
contiguous,
pointer :: el_cands(:), point_ids(:), send_recv(:)
572 real(kind=
rp),
allocatable :: res_results(:,:)
573 real(kind=
rp),
allocatable :: rst_results(:,:)
574 integer,
allocatable :: el_owner_results(:)
575 integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
576 real(kind=
rp) :: time1, time2, time_start
580 type(
stack_i4_t) :: send_pe_find, recv_pe_find
582 el_cands_d = c_null_ptr
583 call neko_log%section(
'Global Interpolation')
584 call glb_intrp_find%init_dofs(this%pe_size)
585 call send_pe_find%init()
586 call recv_pe_find%init()
587 call mpi_barrier(this%comm)
588 time_start = mpi_wtime()
589 write(log_buf,
'(A)')
'Global interpolation, finding points'
595 call this%pe_finder%find_batch(this%xyz, this%n_points, &
596 this%points_at_pe, this%n_points_pe)
597 call mpi_barrier(this%comm)
599 write(log_buf,
'(A,E15.7)') &
600 'Found PE candidates time since start of findpts (s):', &
608 this%n_points_pe_local = 0
609 this%n_points_local = 0
610 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
611 1, mpi_integer, mpi_sum, this%comm, ierr)
612 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
613 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
616 this%n_points_offset_pe_local(0) = 0
617 this%n_points_offset_pe(0) = 0
618 do i = 1, (this%pe_size - 1)
619 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
620 + this%n_points_offset_pe_local(i-1)
621 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
622 + this%n_points_offset_pe(i-1)
624 do i = 0, (this%pe_size-1)
625 if (this%n_points_pe(i) .gt. 0)
then
626 call send_pe_find%push(i)
627 point_ids => this%points_at_pe(i)%array()
628 do j = 1, this%n_points_pe(i)
629 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
630 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
631 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
634 if (this%n_points_pe_local(i) .gt. 0)
then
635 call recv_pe_find%push(i)
636 do j = 1, this%n_points_pe_local(i)
637 call glb_intrp_find%recv_dof(i)%push(3*(j + &
638 this%n_points_offset_pe_local(i) - 1) + 1)
639 call glb_intrp_find%recv_dof(i)%push(3*(j + &
640 this%n_points_offset_pe_local(i) - 1) + 2)
641 call glb_intrp_find%recv_dof(i)%push(3*(j + &
642 this%n_points_offset_pe_local(i) - 1) + 3)
649 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
651 call glb_intrp_find_back%init_dofs(this%pe_size)
653 do i = 0, (this%pe_size-1)
654 send_recv => glb_intrp_find%recv_dof(i)%array()
655 do j = 1, glb_intrp_find%recv_dof(i)%size()
656 call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
658 send_recv => glb_intrp_find%send_dof(i)%array()
659 do j = 1, glb_intrp_find%send_dof(i)%size()
661 call glb_intrp_find_back%recv_dof(i)%push(ii)
665 call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
668 if (
allocated(this%xyz_local))
then
669 deallocate(this%xyz_local)
671 allocate(this%xyz_local(3, this%n_points_local))
672 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
673 this%n_points_local*3)
675 call mpi_barrier(this%comm)
677 write(log_buf,
'(A,E15.7)') &
678 'Sent to points to PE candidates, time since start of ' &
679 //
'find_points (s):', time1 - time_start
683 call all_el_candidates%init()
685 if (
allocated(n_el_cands))
then
686 deallocate(n_el_cands)
689 allocate(n_el_cands(this%n_points_local))
691 call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
692 all_el_candidates, n_el_cands)
694 n_point_cand = all_el_candidates%size()
695 if (n_point_cand .gt. 1e8)
then
696 print *,
'Warning, many point candidates on rank', this%pe_rank, &
697 'cands:', n_point_cand, &
698 'Consider increasing number of ranks'
700 call x_t%init(n_point_cand)
701 call y_t%init(n_point_cand)
702 call z_t%init(n_point_cand)
705 do i = 1 , this%n_points_local
706 do j = 1, n_el_cands(i)
708 x_t%x(ii) = this%xyz_local(1,i)
709 y_t%x(ii) = this%xyz_local(2,i)
710 z_t%x(ii) = this%xyz_local(3,i)
714 call mpi_barrier(this%comm)
716 write(log_buf,
'(A,E15.7)') &
717 'Element candidates found, now time for finding rst,time ' // &
718 'since start of find_points (s):', time1 - time_start
720 call rst_local_cand%init(3,n_point_cand)
721 call resx%init(n_point_cand)
722 call resy%init(n_point_cand)
723 call resz%init(n_point_cand)
726 call mpi_barrier(this%comm)
728 el_cands => all_el_candidates%array()
735 call device_map(el_cands, el_cands_d,n_point_cand)
740 call this%rst_finder%find(rst_local_cand, &
742 el_cands, n_point_cand, &
752 call mpi_barrier(this%comm)
755 write(log_buf,
'(A,E15.7)') &
756 'Found rst with Newton iteration, time (s):', time2-time1
759 write(log_buf,
'(A)') &
760 'Checking validity of points and choosing best candidates.'
762 call mpi_barrier(this%comm,ierr)
764 if (
allocated(this%rst_local))
deallocate(this%rst_local)
765 if (
allocated(this%el_owner0_local))
deallocate(this%el_owner0_local)
766 allocate(this%rst_local(3,this%n_points_local))
767 allocate(this%el_owner0_local(this%n_points_local))
770 do i = 1 , this%n_points_local
771 this%xyz_local(1,i) = 10.0
772 this%xyz_local(2,i) = 10.0
773 this%xyz_local(3,i) = 10.0
774 this%rst_local(1,i) = 10.0
775 this%rst_local(2,i) = 10.0
776 this%rst_local(3,i) = 10.0
777 this%el_owner0_local(i) = -1
778 do j = 1, n_el_cands(i)
780 if (
rst_cmp(this%rst_local(:,i), rst_local_cand%x(:,ii),&
781 this%xyz_local(:,i), [resx%x(ii),resy%x(ii),resz%x(ii)], &
783 this%rst_local(1,i) = rst_local_cand%x(1,ii)
784 this%rst_local(2,i) = rst_local_cand%x(2,ii)
786 this%rst_local(3,i) = rst_local_cand%x(3,ii)
787 this%xyz_local(1,i) = resx%x(ii)
788 this%xyz_local(2,i) = resy%x(ii)
789 this%xyz_local(3,i) = resz%x(ii)
790 this%el_owner0_local(i) = el_cands(ii)
796 call res%init(3,this%n_points)
797 n_glb_point_cand = sum(this%n_points_pe)
798 if (
allocated(rst_results))
deallocate(rst_results)
799 if (
allocated(res_results))
deallocate(res_results)
800 if (
allocated(el_owner_results))
deallocate(el_owner_results)
801 allocate(rst_results(3,n_glb_point_cand))
802 allocate(res_results(3,n_glb_point_cand))
803 allocate(el_owner_results(n_glb_point_cand))
808 call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
809 this%n_points_local*3, n_glb_point_cand*3)
810 call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
811 this%n_points_local*3, n_glb_point_cand*3)
812 do i = 1,
size(glb_intrp_find_back%send_pe)
813 rank = glb_intrp_find_back%send_pe(i)
814 call mpi_isend(this%el_owner0_local( &
815 this%n_points_offset_pe_local(rank) + 1), &
816 this%n_points_pe_local(rank), &
817 mpi_integer, rank, 0, &
818 this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
819 glb_intrp_find_back%send_buf(i)%flag = .false.
821 do i = 1,
size(glb_intrp_find_back%recv_pe)
822 rank = glb_intrp_find_back%recv_pe(i)
823 call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
824 this%n_points_pe(rank), &
825 mpi_integer, rank, 0, &
826 this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
827 glb_intrp_find_back%recv_buf(i)%flag = .false.
829 call glb_intrp_find_back%nbwait_no_op()
831 do i = 1,
size(glb_intrp_find_back%recv_pe)
832 point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
833 do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
834 point_id = point_ids(j)
836 if (
rst_cmp(this%rst(:,point_id), rst_results(:,ii), &
837 res%x(:,point_id), res_results(:,ii), this%padding) .or. &
838 this%pe_owner(point_ids(j)) .eq. -1 )
then
839 this%rst(:,point_ids(j)) = rst_results(:,ii)
840 res%x(:,point_ids(j)) = res_results(:,ii)
841 this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
842 this%el_owner0(point_ids(j)) = el_owner_results(ii)
853 do i = 0, this%pe_size-1
854 call this%points_at_pe(i)%clear()
855 this%n_points_pe(i) = 0
858 do i = 1, this%n_points
860 if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1)
then
861 print *,
'No owning rank found for',&
862 ' point ', stupid_intent,
' with coords', this%xyz(:,i), &
863 ' Interpolation will always yield 0.0. Try increase padding.'
865 call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
866 this%n_points_pe(this%pe_owner(i)) = &
867 this%n_points_pe(this%pe_owner(i)) + 1
870 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, &
871 mpi_integer, mpi_sum, this%comm, ierr)
872 call mpi_alltoall(this%n_points_pe, 1, mpi_integer, &
873 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
874 this%n_points_offset_pe_local(0) = 0
875 this%n_points_offset_pe(0) = 0
876 do i = 1, (this%pe_size - 1)
877 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
878 + this%n_points_offset_pe_local(i-1)
879 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
880 + this%n_points_offset_pe(i-1)
882 call send_pe_find%free()
883 call recv_pe_find%free()
884 call glb_intrp_find%free()
885 call send_pe_find%init()
886 call recv_pe_find%init()
887 call glb_intrp_find%init_dofs(this%pe_size)
889 do i = 0, (this%pe_size-1)
890 if (this%n_points_pe(i) .gt. 0)
then
891 call send_pe_find%push(i)
892 point_ids => this%points_at_pe(i)%array()
893 do j = 1, this%n_points_pe(i)
894 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 1)
895 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 2)
896 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j) - 1) + 3)
899 if (this%n_points_pe_local(i) .gt. 0)
then
900 call recv_pe_find%push(i)
901 do j = 1, this%n_points_pe_local(i)
902 call glb_intrp_find%recv_dof(i)%push(3*(j + &
903 this%n_points_offset_pe_local(i) - 1) + 1)
904 call glb_intrp_find%recv_dof(i)%push(3*(j + &
905 this%n_points_offset_pe_local(i) - 1) + 2)
906 call glb_intrp_find%recv_dof(i)%push(3*(j + &
907 this%n_points_offset_pe_local(i) - 1) + 3)
913 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
914 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
915 this%n_points_local*3)
916 call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
917 this%n_points_local*3)
919 do i = 1,
size(glb_intrp_find%send_pe)
920 rank = glb_intrp_find%send_pe(i)
921 point_ids => this%points_at_pe(rank)%array()
922 do j = 1, this%n_points_pe(rank)
924 el_owner_results(ii) = this%el_owner0(point_ids(j))
926 call mpi_isend(el_owner_results(this%n_points_offset_pe(rank) + 1),&
927 this%n_points_pe(rank), &
928 mpi_integer, rank, 0, &
929 this%comm, glb_intrp_find%send_buf(i)%request, ierr)
930 glb_intrp_find%send_buf(i)%flag = .false.
932 do i = 1,
size(glb_intrp_find%recv_pe)
933 rank = glb_intrp_find%recv_pe(i)
934 call mpi_irecv(this%el_owner0_local( &
935 this%n_points_offset_pe_local(rank) + 1), &
936 this%n_points_pe_local(rank), &
937 mpi_integer, rank, 0, &
938 this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
939 glb_intrp_find%recv_buf(i)%flag = .false.
941 call glb_intrp_find%nbwait_no_op()
943 call glb_intrp_find%free()
948 call this%glb_intrp_comm%init_dofs(this%pe_size)
949 do i = 0, (this%pe_size-1)
950 if (this%n_points_pe(i) .gt. 0)
then
952 point_ids => this%points_at_pe(i)%array()
953 do j = 1, this%n_points_pe(i)
954 call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
957 if (this%n_points_pe_local(i) .gt. 0)
then
959 do j = 1, this%n_points_pe_local(i)
960 call this%glb_intrp_comm%send_dof(i)%push(j + &
961 this%n_points_offset_pe_local(i))
965 call this%glb_intrp_comm%init(send_pe, recv_pe,this%comm)
968 call this%temp_local%init(this%n_points_local)
969 call this%temp%init(this%n_points)
972 call this%local_interp%init(this%Xh, this%rst_local, &
979 call device_map(this%el_owner0_local, this%el_owner0_local_d, &
981 call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
985 call this%check_points(this%x%x,this%y%x, this%z%x)
990 call glb_intrp_find_back%free()
991 call send_pe_find%free()
992 call recv_pe_find%free()
996 call rst_local_cand%free()
1001 call all_el_candidates%free()
1003 if (
allocated(n_el_cands))
deallocate(n_el_cands)
1004 if (
allocated(rst_results))
deallocate(rst_results)
1005 if (
allocated(res_results))
deallocate(res_results)
1006 if (
allocated(el_owner_results))
deallocate(el_owner_results)
1007 call mpi_barrier(this%comm, ierr)
1009 write(log_buf,
'(A,E15.7)')
'Global interpolation find points ' // &
1010 'done, time (s):', time2-time_start