248 real(kind=
rp),
intent(in) :: x(:)
249 real(kind=
rp),
intent(in) :: y(:)
250 real(kind=
rp),
intent(in) :: z(:)
251 integer,
intent(in) :: gdim
252 integer,
intent(in) :: nelv
253 type(mpi_comm),
intent(in),
optional :: comm
254 type(
space_t),
intent(in) :: Xh
255 real(kind=
rp),
intent(in),
optional :: tol
256 real(kind=
rp),
intent(in),
optional :: pad
257 integer :: lx, ly, lz, ierr, i, n
258 character(len=8000) :: log_buf
259 real(kind=
dp) :: padding
260 real(kind=
rp) :: time1, time_start
261 character(len=255) :: mode_str
262 integer :: boxdim, envvar_len
266 if (
present(
comm))
then
272 if (
present(pad))
then
280 time_start = mpi_wtime()
281 call mpi_barrier(this%comm)
283 call mpi_comm_rank(this%comm, this%pe_rank, ierr)
284 call mpi_comm_size(this%comm, this%pe_size, ierr)
288 if (
present(tol))
then
294 call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
295 mpi_sum, this%comm, ierr)
303 call copy(this%x%x, x, n)
305 call copy(this%y%x, y, n)
307 call copy(this%z%x, z, n)
309 call this%Xh%init(xh%t, lx, ly, lz)
312 if (this%n_dof == -1)
then
316 call neko_log%section(
'Global Interpolation')
318 call neko_log%message(
'Initializing global interpolation')
319 call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
320 mode_str, envvar_len)
322 if (envvar_len .gt. 0)
then
323 if (mode_str(1:envvar_len) ==
'AABB')
then
327 call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
328 mode_str, envvar_len)
330 if (envvar_len .gt. 0)
then
331 if (mode_str(1:envvar_len) ==
'AABB')
then
336 if (.not.
allocated(this%el_finder))
then
339 if (.not.
allocated(this%pe_finder))
then
342 select type(el_find => this%el_finder)
344 call neko_log%message(
'Using AABB element finder')
345 call el_find%init(x, y, z, nelv, xh, padding)
347 call neko_log%message(
'Using Cartesian element finder')
348 boxdim =
max(lx*int(
real(nelv,
xp)**(1.0_xp/3.0_xp)),2)
349 boxdim = min(boxdim, 300)
350 call el_find%init(x, y, z, nelv, xh, boxdim, padding)
352 call neko_error(
'Unknown element finder type')
355 select type(pe_find => this%pe_finder)
357 call neko_log%message(
'Using AABB PE finder')
358 call pe_find%init(this%x%x, this%y%x, this%z%x, &
359 nelv, xh, this%comm, padding)
361 call neko_log%message(
'Using Cartesian PE finder')
362 boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp/3.0_xp))
363 boxdim =
max(boxdim,32)
364 boxdim = min(boxdim, &
365 int(8.0_xp*(30000.0_xp*this%pe_size)**(1.0_xp/3.0_xp)))
366 call pe_find%init(this%x%x, this%y%x, this%z%x, &
367 nelv, xh, this%comm, boxdim, padding)
372 call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, this%tol)
373 if (
allocated(this%n_points_pe))
deallocate(this%n_points_pe)
374 if (
allocated(this%n_points_pe_local))
deallocate(this%n_points_pe_local)
375 if (
allocated(this%n_points_offset_pe_local)) &
376 deallocate(this%n_points_offset_pe_local)
377 if (
allocated(this%n_points_offset_pe))
deallocate(this%n_points_offset_pe)
378 allocate(this%n_points_pe(0:(this%pe_size-1)))
379 allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
380 allocate(this%n_points_pe_local(0:(this%pe_size-1)))
381 allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
382 allocate(this%points_at_pe(0:(this%pe_size-1)))
383 do i = 0, this%pe_size-1
384 call this%points_at_pe(i)%init()
386 call mpi_barrier(this%comm)
388 write(log_buf,
'(A,E15.7)') &
389 'Global interpolation initialized (s):', time1-time_start
481 character(len=8000) :: log_buf
489 type(c_ptr) :: el_cands_d
491 integer :: i, j, stupid_intent
493 integer,
allocatable :: n_el_cands(:)
494 integer,
contiguous,
pointer :: el_cands(:), point_ids(:), send_recv(:)
495 real(kind=
rp),
allocatable :: res_results(:,:)
496 real(kind=
rp),
allocatable :: rst_results(:,:)
497 integer,
allocatable :: el_owner_results(:)
498 integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
499 real(kind=
rp) :: time1, time2, time_start
503 type(
stack_i4_t) :: send_pe_find, recv_pe_find
505 el_cands_d = c_null_ptr
506 call neko_log%section(
'Global Interpolation')
507 call glb_intrp_find%init_dofs(this%pe_size)
508 call send_pe_find%init()
509 call recv_pe_find%init()
510 call mpi_barrier(this%comm)
511 time_start = mpi_wtime()
512 write(log_buf,
'(A)')
'Global interpolation, finding points'
518 call this%pe_finder%find_batch(this%xyz, this%n_points, &
519 this%points_at_pe, this%n_points_pe)
520 call mpi_barrier(this%comm)
522 write(log_buf,
'(A,E15.7)') &
523 'Found PE candidates time since start of findpts (s):', time1-time_start
529 this%n_points_pe_local = 0
530 this%n_points_local = 0
531 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
532 1, mpi_integer, mpi_sum, this%comm, ierr)
533 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
534 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
537 this%n_points_offset_pe_local(0) = 0
538 this%n_points_offset_pe(0) = 0
539 do i = 1, (this%pe_size - 1)
540 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
541 + this%n_points_offset_pe_local(i-1)
542 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
543 + this%n_points_offset_pe(i-1)
545 do i = 0, (this%pe_size-1)
546 if (this%n_points_pe(i) .gt. 0)
then
547 call send_pe_find%push(i)
548 point_ids => this%points_at_pe(i)%array()
549 do j = 1, this%n_points_pe(i)
550 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
551 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
552 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
555 if (this%n_points_pe_local(i) .gt. 0)
then
556 call recv_pe_find%push(i)
557 do j = 1, this%n_points_pe_local(i)
558 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
559 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
560 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
567 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
569 call glb_intrp_find_back%init_dofs(this%pe_size)
571 do i = 0, (this%pe_size-1)
572 send_recv => glb_intrp_find%recv_dof(i)%array()
573 do j = 1, glb_intrp_find%recv_dof(i)%size()
574 call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
576 send_recv => glb_intrp_find%send_dof(i)%array()
577 do j = 1, glb_intrp_find%send_dof(i)%size()
579 call glb_intrp_find_back%recv_dof(i)%push(ii)
583 call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
586 if (
allocated(this%xyz_local))
then
587 deallocate(this%xyz_local)
589 allocate(this%xyz_local(3, this%n_points_local))
590 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
591 this%n_points_local*3)
593 call mpi_barrier(this%comm)
595 write(log_buf,
'(A,E15.7)') &
596 'Sent to points to PE candidates, time since start of find_points (s):', time1-time_start
599 call all_el_candidates%init()
601 if (
allocated(n_el_cands))
then
602 deallocate(n_el_cands)
605 allocate(n_el_cands(this%n_points_local))
607 call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
608 all_el_candidates, n_el_cands)
610 n_point_cand = all_el_candidates%size()
611 if (n_point_cand .gt. 1e8)
then
612 print *,
'Warning, many point candidates on rank', this%pe_rank,
'cands:', n_point_cand, &
613 'Consider increasing number of ranks'
615 call x_t%init(n_point_cand)
616 call y_t%init(n_point_cand)
617 call z_t%init(n_point_cand)
620 do i = 1 , this%n_points_local
621 do j = 1, n_el_cands(i)
623 x_t%x(ii) = this%xyz_local(1,i)
624 y_t%x(ii) = this%xyz_local(2,i)
625 z_t%x(ii) = this%xyz_local(3,i)
629 call mpi_barrier(this%comm)
631 write(log_buf,
'(A,E15.7)') &
632 'Element candidates found, now time for finding rst,time since start of find_points (s):', time1-time_start
634 call rst_local_cand%init(3,n_point_cand)
635 call resx%init(n_point_cand)
636 call resy%init(n_point_cand)
637 call resz%init(n_point_cand)
640 call mpi_barrier(this%comm)
642 el_cands => all_el_candidates%array()
649 call device_map(el_cands, el_cands_d,n_point_cand)
654 call this%rst_finder%find(rst_local_cand, &
656 el_cands, n_point_cand, &
666 call mpi_barrier(this%comm)
669 write(log_buf,
'(A,E15.7)') &
670 'Found rst with Newton iteration, time (s):', time2-time1
673 write(log_buf,
'(A,E15.7)') &
674 'Tolerance: ', this%tol
676 write(log_buf,
'(A)') &
677 'Checking validity of points and choosing best candidates.'
679 call mpi_barrier(this%comm,ierr)
681 if (
allocated(this%rst_local))
deallocate(this%rst_local)
682 if (
allocated(this%el_owner0_local))
deallocate(this%el_owner0_local)
683 allocate(this%rst_local(3,this%n_points_local))
684 allocate(this%el_owner0_local(this%n_points_local))
687 do i = 1 , this%n_points_local
688 this%xyz_local(1,i) = 10.0
689 this%xyz_local(2,i) = 10.0
690 this%xyz_local(3,i) = 10.0
691 this%rst_local(1,i) = 10.0
692 this%rst_local(2,i) = 10.0
693 this%rst_local(3,i) = 10.0
694 this%el_owner0_local(i) = -1
695 do j = 1, n_el_cands(i)
697 if (
rst_cmp(this%rst_local(:,i), rst_local_cand%x(:,ii),&
698 this%xyz_local(:,i), (/resx%x(ii),resy%x(ii),resz%x(ii)/), this%padding))
then
699 this%rst_local(1,i) = rst_local_cand%x(1,ii)
700 this%rst_local(2,i) = rst_local_cand%x(2,ii)
702 this%rst_local(3,i) = rst_local_cand%x(3,ii)
703 this%xyz_local(1,i) = resx%x(ii)
704 this%xyz_local(2,i) = resy%x(ii)
705 this%xyz_local(3,i) = resz%x(ii)
706 this%el_owner0_local(i) = el_cands(ii)
712 call res%init(3,this%n_points)
713 n_glb_point_cand = sum(this%n_points_pe)
714 if (
allocated(rst_results))
deallocate(rst_results)
715 if (
allocated(res_results))
deallocate(res_results)
716 if (
allocated(el_owner_results))
deallocate(el_owner_results)
717 allocate(rst_results(3,n_glb_point_cand))
718 allocate(res_results(3,n_glb_point_cand))
719 allocate(el_owner_results(n_glb_point_cand))
724 call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
725 this%n_points_local*3, n_glb_point_cand*3)
726 call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
727 this%n_points_local*3, n_glb_point_cand*3)
728 do i = 1,
size(glb_intrp_find_back%send_pe)
729 rank = glb_intrp_find_back%send_pe(i)
730 call mpi_isend(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1),&
731 this%n_points_pe_local(rank), &
732 mpi_integer, rank, 0, &
733 this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
734 glb_intrp_find_back%send_buf(i)%flag = .false.
736 do i = 1,
size(glb_intrp_find_back%recv_pe)
737 rank = glb_intrp_find_back%recv_pe(i)
738 call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
739 this%n_points_pe(rank), &
740 mpi_integer, rank, 0, &
741 this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
742 glb_intrp_find_back%recv_buf(i)%flag = .false.
744 call glb_intrp_find_back%nbwait_no_op()
746 do i = 1,
size(glb_intrp_find_back%recv_pe)
747 point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
748 do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
749 point_id = point_ids(j)
751 if (
rst_cmp(this%rst(:,point_id), rst_results(:,ii), &
752 res%x(:,point_id), res_results(:,ii), this%padding) .or. &
753 this%pe_owner(point_ids(j)) .eq. -1 )
then
754 this%rst(:,point_ids(j)) = rst_results(:,ii)
755 res%x(:,point_ids(j)) = res_results(:,ii)
756 this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
757 this%el_owner0(point_ids(j)) = el_owner_results(ii)
767 do i = 0, this%pe_size-1
768 call this%points_at_pe(i)%clear()
769 this%n_points_pe(i) = 0
772 do i = 1, this%n_points
774 if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1)
then
775 print *,
'No owning rank found for',&
776 ' point ', stupid_intent,
' with coords', this%xyz(:,i), &
777 ' Interpolation will always yield 0.0. Try increase padding.'
779 call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
780 this%n_points_pe(this%pe_owner(i)) = this%n_points_pe(this%pe_owner(i)) + 1
783 call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, mpi_integer, &
784 mpi_sum, this%comm, ierr)
785 call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
786 this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
787 this%n_points_offset_pe_local(0) = 0
788 this%n_points_offset_pe(0) = 0
789 do i = 1, (this%pe_size - 1)
790 this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
791 + this%n_points_offset_pe_local(i-1)
792 this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
793 + this%n_points_offset_pe(i-1)
795 call send_pe_find%free()
796 call recv_pe_find%free()
797 call glb_intrp_find%free()
798 call send_pe_find%init()
799 call recv_pe_find%init()
800 call glb_intrp_find%init_dofs(this%pe_size)
802 do i = 0, (this%pe_size-1)
803 if (this%n_points_pe(i) .gt. 0)
then
804 call send_pe_find%push(i)
805 point_ids => this%points_at_pe(i)%array()
806 do j = 1, this%n_points_pe(i)
807 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
808 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
809 call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
812 if (this%n_points_pe_local(i) .gt. 0)
then
813 call recv_pe_find%push(i)
814 do j = 1, this%n_points_pe_local(i)
815 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
816 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
817 call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
823 call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
824 call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
825 this%n_points_local*3)
826 call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
827 this%n_points_local*3)
829 do i = 1,
size(glb_intrp_find%send_pe)
830 rank = glb_intrp_find%send_pe(i)
831 point_ids => this%points_at_pe(rank)%array()
832 do j = 1, this%n_points_pe(rank)
834 el_owner_results(ii) = this%el_owner0(point_ids(j))
836 call mpi_isend(el_owner_results(this%n_points_offset_pe(rank)+1),&
837 this%n_points_pe(rank), &
838 mpi_integer, rank, 0, &
839 this%comm, glb_intrp_find%send_buf(i)%request, ierr)
840 glb_intrp_find%send_buf(i)%flag = .false.
842 do i = 1,
size(glb_intrp_find%recv_pe)
843 rank = glb_intrp_find%recv_pe(i)
844 call mpi_irecv(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1), &
845 this%n_points_pe_local(rank), &
846 mpi_integer, rank, 0, &
847 this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
848 glb_intrp_find%recv_buf(i)%flag = .false.
850 call glb_intrp_find%nbwait_no_op()
852 call glb_intrp_find%free()
857 call this%glb_intrp_comm%init_dofs(this%pe_size)
858 do i = 0, (this%pe_size-1)
859 if (this%n_points_pe(i) .gt. 0)
then
861 point_ids => this%points_at_pe(i)%array()
862 do j = 1, this%n_points_pe(i)
863 call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
866 if (this%n_points_pe_local(i) .gt. 0)
then
868 do j = 1, this%n_points_pe_local(i)
869 call this%glb_intrp_comm%send_dof(i)%push(j+this%n_points_offset_pe_local(i))
873 call this%glb_intrp_comm%init(send_pe, recv_pe,this%comm)
876 call this%temp_local%init(this%n_points_local)
877 call this%temp%init(this%n_points)
880 call this%local_interp%init(this%Xh, this%rst_local, &
887 call device_map(this%el_owner0_local, this%el_owner0_local_d, this%n_points_local)
888 call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
892 call this%check_points(this%x%x,this%y%x, this%z%x)
897 call glb_intrp_find_back%free()
898 call send_pe_find%free()
899 call recv_pe_find%free()
903 call rst_local_cand%free()
908 call all_el_candidates%free()
910 if (
allocated(n_el_cands))
deallocate(n_el_cands)
911 if (
allocated(rst_results))
deallocate(rst_results)
912 if (
allocated(res_results))
deallocate(res_results)
913 if (
allocated(el_owner_results))
deallocate(el_owner_results)
914 call mpi_barrier(this%comm, ierr)
916 write(log_buf,
'(A,E15.7)')
'Global interpolation find points done, time (s):', &