222    real(kind=
rp), 
intent(in) :: x(:)
 
  223    real(kind=
rp), 
intent(in) :: y(:)
 
  224    real(kind=
rp), 
intent(in) :: z(:)
 
  225    integer, 
intent(in) :: gdim
 
  226    integer, 
intent(in) :: nelv
 
  227    type(mpi_comm), 
intent(in), 
optional :: comm
 
  228    type(
space_t), 
intent(in) :: Xh
 
  229    real(kind=
rp), 
intent(in), 
optional :: tol
 
  230    real(kind=
rp), 
intent(in), 
optional :: pad
 
  231    integer :: lx, ly, lz, ierr, i, n
 
  232    character(len=8000) :: log_buf
 
  233    real(kind=
dp) :: padding
 
  234    real(kind=
rp) :: time1, time_start
 
  235    character(len=255) :: mode_str
 
  236    integer :: boxdim, envvar_len
 
  240    if (
present(
comm)) 
then 
  246    if (
present(pad)) 
then 
  254    time_start = mpi_wtime()
 
  255    call mpi_barrier(this%comm)
 
  257    call mpi_comm_rank(this%comm, this%pe_rank, ierr)
 
  258    call mpi_comm_size(this%comm, this%pe_size, ierr)
 
  262    if (
present(tol)) 
then 
  268    call mpi_allreduce(nelv, this%glb_nelv, 1, mpi_integer, &
 
  269         mpi_sum, this%comm, ierr)
 
  277    call copy(this%x%x, x, n)
 
  279    call copy(this%y%x, y, n)
 
  281    call copy(this%z%x, z, n)
 
  283    call this%Xh%init(xh%t, lx, ly, lz)
 
  286    call neko_log%section(
'Global Interpolation')
 
  288    call neko_log%message(
'Initializing global interpolation')
 
  289    call get_environment_variable(
"NEKO_GLOBAL_INTERP_EL_FINDER", &
 
  290         mode_str, envvar_len)
 
  292    if (envvar_len .gt. 0) 
then 
  293       if (mode_str(1:envvar_len) == 
'AABB') 
then 
  297    call get_environment_variable(
"NEKO_GLOBAL_INTERP_PE_FINDER", &
 
  298         mode_str, envvar_len)
 
  300    if (envvar_len .gt. 0) 
then 
  301       if (mode_str(1:envvar_len) == 
'AABB') 
then 
  306    if (.not. 
allocated(this%el_finder)) 
then 
  309    if (.not. 
allocated(this%pe_finder)) 
then 
  312    select type(el_find => this%el_finder)
 
  314       call neko_log%message(
'Using AABB element finder')
 
  315       call el_find%init(x, y, z, nelv, xh, padding)
 
  317       call neko_log%message(
'Using Cartesian element finder')
 
  318       boxdim = 
max(lx*int(
real(nelv,
xp)**(1.0_xp/3.0_xp)),2)
 
  319       boxdim = min(boxdim, 300)
 
  320       call el_find%init(x, y, z, nelv, xh, boxdim, padding)
 
  322       call neko_error(
'Unknown element finder type')
 
  325    select type(pe_find => this%pe_finder)
 
  327       call neko_log%message(
'Using AABB PE finder')
 
  328       call pe_find%init(this%x%x, this%y%x, this%z%x, &
 
  329            nelv, xh, this%comm, padding)
 
  331       call neko_log%message(
'Using Cartesian PE finder')
 
  332       boxdim = lx*int(
real(this%glb_nelv,
xp)**(1.0_xp/3.0_xp))
 
  333       boxdim = 
max(boxdim,32)
 
  334       boxdim = min(boxdim, &
 
  335            int(8.0_xp*(30000.0_xp*this%pe_size)**(1.0_xp/3.0_xp)))
 
  336       call pe_find%init(this%x%x, this%y%x, this%z%x, &
 
  337            nelv, xh, this%comm, boxdim, padding)
 
  342    call this%rst_finder%init(this%x%x, this%y%x, this%z%x, nelv, xh, this%tol)
 
  343    if (
allocated(this%n_points_pe)) 
deallocate(this%n_points_pe)
 
  344    if (
allocated(this%n_points_pe_local)) 
deallocate(this%n_points_pe_local)
 
  345    if (
allocated(this%n_points_offset_pe_local)) &
 
  346         deallocate(this%n_points_offset_pe_local)
 
  347    if (
allocated(this%n_points_offset_pe)) 
deallocate(this%n_points_offset_pe)
 
  348    allocate(this%n_points_pe(0:(this%pe_size-1)))
 
  349    allocate(this%n_points_offset_pe(0:(this%pe_size-1)))
 
  350    allocate(this%n_points_pe_local(0:(this%pe_size-1)))
 
  351    allocate(this%n_points_offset_pe_local(0:(this%pe_size-1)))
 
  352    allocate(this%points_at_pe(0:(this%pe_size-1)))
 
  353    do i = 0, this%pe_size-1
 
  354       call this%points_at_pe(i)%init()
 
  356    call mpi_barrier(this%comm)
 
  358    write(log_buf, 
'(A,E15.7)') &
 
  359         'Global interpolation initialized (s):', time1-time_start
 
 
  451    character(len=8000) :: log_buf
 
  459    type(c_ptr) :: el_cands_d
 
  461    integer :: i, j, stupid_intent
 
  463    integer, 
allocatable :: n_el_cands(:)
 
  464    integer, 
contiguous, 
pointer :: el_cands(:), point_ids(:), send_recv(:)
 
  465    real(kind=
rp), 
allocatable :: res_results(:,:)
 
  466    real(kind=
rp), 
allocatable :: rst_results(:,:)
 
  467    integer, 
allocatable :: el_owner_results(:)
 
  468    integer :: ierr, ii, n_point_cand, n_glb_point_cand, point_id, rank
 
  469    real(kind=
rp) :: time1, time2, time_start
 
  473    type(
stack_i4_t) :: send_pe_find, recv_pe_find
 
  475    el_cands_d = c_null_ptr
 
  476    call neko_log%section(
'Global Interpolation')
 
  477    call glb_intrp_find%init_dofs(this%pe_size)
 
  478    call send_pe_find%init()
 
  479    call recv_pe_find%init()
 
  480    call mpi_barrier(this%comm)
 
  481    time_start = mpi_wtime()
 
  482    write(log_buf,
'(A)') 
'Global interpolation, finding points' 
  488    call this%pe_finder%find_batch(this%xyz, this%n_points, &
 
  489         this%points_at_pe, this%n_points_pe)
 
  490    call mpi_barrier(this%comm)
 
  492    write(log_buf, 
'(A,E15.7)') &
 
  493         'Found PE candidates time since start of findpts (s):', time1-time_start
 
  499    this%n_points_pe_local = 0
 
  500    this%n_points_local = 0
 
  501    call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, &
 
  502         1, mpi_integer, mpi_sum, this%comm, ierr)
 
  503    call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
 
  504         this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
 
  507    this%n_points_offset_pe_local(0) = 0
 
  508    this%n_points_offset_pe(0) = 0
 
  509    do i = 1, (this%pe_size - 1)
 
  510       this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
 
  511            + this%n_points_offset_pe_local(i-1)
 
  512       this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
 
  513            + this%n_points_offset_pe(i-1)
 
  515    do i = 0, (this%pe_size-1)
 
  516       if (this%n_points_pe(i) .gt. 0) 
then 
  517          call send_pe_find%push(i)
 
  518          point_ids => this%points_at_pe(i)%array()
 
  519          do j = 1, this%n_points_pe(i)
 
  520             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
 
  521             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
 
  522             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
 
  525       if (this%n_points_pe_local(i) .gt. 0) 
then 
  526          call recv_pe_find%push(i)
 
  527          do j = 1, this%n_points_pe_local(i)
 
  528             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
 
  529             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
 
  530             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
 
  537    call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
 
  539    call glb_intrp_find_back%init_dofs(this%pe_size)
 
  541    do i = 0, (this%pe_size-1)
 
  542       send_recv => glb_intrp_find%recv_dof(i)%array()
 
  543       do j = 1, glb_intrp_find%recv_dof(i)%size()
 
  544          call glb_intrp_find_back%send_dof(i)%push(send_recv(j))
 
  546       send_recv => glb_intrp_find%send_dof(i)%array()
 
  547       do j = 1, glb_intrp_find%send_dof(i)%size()
 
  549          call glb_intrp_find_back%recv_dof(i)%push(ii)
 
  553    call glb_intrp_find_back%init(recv_pe_find, send_pe_find, this%comm)
 
  556    if (
allocated(this%xyz_local)) 
then 
  557       deallocate(this%xyz_local)
 
  559    allocate(this%xyz_local(3, this%n_points_local))
 
  560    call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
 
  561         this%n_points_local*3)
 
  563    call mpi_barrier(this%comm)
 
  565    write(log_buf, 
'(A,E15.7)') &
 
  566         'Sent to points to PE candidates, time since start of find_points (s):', time1-time_start
 
  569    call all_el_candidates%init()
 
  571    if (
allocated(n_el_cands)) 
then 
  572       deallocate(n_el_cands)
 
  575    allocate(n_el_cands(this%n_points_local))
 
  577    call this%el_finder%find_batch(this%xyz_local, this%n_points_local, &
 
  578         all_el_candidates, n_el_cands)
 
  580    n_point_cand = all_el_candidates%size()
 
  581    if (n_point_cand .gt. 1e8) 
then 
  582       print *,
'Warning, many point candidates on rank', this%pe_rank,
'cands:', n_point_cand, &
 
  583            'Consider increasing number of ranks' 
  585    call x_t%init(n_point_cand)
 
  586    call y_t%init(n_point_cand)
 
  587    call z_t%init(n_point_cand)
 
  590    do i = 1 , this%n_points_local
 
  591       do j = 1, n_el_cands(i)
 
  593          x_t%x(ii) = this%xyz_local(1,i)
 
  594          y_t%x(ii) = this%xyz_local(2,i)
 
  595          z_t%x(ii) = this%xyz_local(3,i)
 
  599    call mpi_barrier(this%comm)
 
  601    write(log_buf, 
'(A,E15.7)') &
 
  602         'Element candidates found, now time for finding rst,time since start of find_points (s):', time1-time_start
 
  604    call rst_local_cand%init(3,n_point_cand)
 
  605    call resx%init(n_point_cand)
 
  606    call resy%init(n_point_cand)
 
  607    call resz%init(n_point_cand)
 
  610    call mpi_barrier(this%comm)
 
  612    el_cands => all_el_candidates%array()
 
  619       call device_map(el_cands, el_cands_d,n_point_cand)
 
  624    call this%rst_finder%find(rst_local_cand, &
 
  626         el_cands, n_point_cand, &
 
  636    call mpi_barrier(this%comm)
 
  639    write(log_buf, 
'(A,E15.7)') &
 
  640         'Found rst with Newton iteration, time (s):', time2-time1
 
  643    write(log_buf,
'(A,E15.7)') &
 
  644         'Tolerance: ', this%tol
 
  646    write(log_buf,
'(A)') &
 
  647         'Checking validity of points and choosing best candidates.' 
  649    call mpi_barrier(this%comm,ierr)
 
  651    if (
allocated(this%rst_local)) 
deallocate(this%rst_local)
 
  652    if (
allocated(this%el_owner0_local)) 
deallocate(this%el_owner0_local)
 
  653    allocate(this%rst_local(3,this%n_points_local))
 
  654    allocate(this%el_owner0_local(this%n_points_local))
 
  657    do i = 1 , this%n_points_local
 
  658       this%xyz_local(1,i) = 10.0
 
  659       this%xyz_local(2,i) = 10.0
 
  660       this%xyz_local(3,i) = 10.0
 
  661       this%rst_local(1,i) = 10.0
 
  662       this%rst_local(2,i) = 10.0
 
  663       this%rst_local(3,i) = 10.0
 
  664       this%el_owner0_local(i) = -1
 
  665       do j = 1, n_el_cands(i)
 
  667          if (
rst_cmp(this%rst_local(:,i), rst_local_cand%x(:,ii),&
 
  668               this%xyz_local(:,i), (/resx%x(ii),resy%x(ii),resz%x(ii)/), this%padding)) 
then 
  669             this%rst_local(1,i) = rst_local_cand%x(1,ii)
 
  670             this%rst_local(2,i) = rst_local_cand%x(2,ii)
 
  672             this%rst_local(3,i) = rst_local_cand%x(3,ii)
 
  673             this%xyz_local(1,i) = resx%x(ii)
 
  674             this%xyz_local(2,i) = resy%x(ii)
 
  675             this%xyz_local(3,i) = resz%x(ii)
 
  676             this%el_owner0_local(i) = el_cands(ii)
 
  682    call res%init(3,this%n_points)
 
  683    n_glb_point_cand = sum(this%n_points_pe)
 
  684    if (
allocated(rst_results)) 
deallocate(rst_results)
 
  685    if (
allocated(res_results)) 
deallocate(res_results)
 
  686    if (
allocated(el_owner_results)) 
deallocate(el_owner_results)
 
  687    allocate(rst_results(3,n_glb_point_cand))
 
  688    allocate(res_results(3,n_glb_point_cand))
 
  689    allocate(el_owner_results(n_glb_point_cand))
 
  694    call glb_intrp_find_back%sendrecv(this%xyz_local, res_results, &
 
  695         this%n_points_local*3, n_glb_point_cand*3)
 
  696    call glb_intrp_find_back%sendrecv(this%rst_local, rst_results, &
 
  697         this%n_points_local*3, n_glb_point_cand*3)
 
  698    do i = 1, 
size(glb_intrp_find_back%send_pe)
 
  699       rank = glb_intrp_find_back%send_pe(i)
 
  700       call mpi_isend(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1),&
 
  701            this%n_points_pe_local(rank), &
 
  702            mpi_integer, rank, 0, &
 
  703            this%comm, glb_intrp_find_back%send_buf(i)%request, ierr)
 
  704       glb_intrp_find_back%send_buf(i)%flag = .false.
 
  706    do i = 1, 
size(glb_intrp_find_back%recv_pe)
 
  707       rank = glb_intrp_find_back%recv_pe(i)
 
  708       call mpi_irecv(el_owner_results(this%n_points_offset_pe(rank)+1),&
 
  709            this%n_points_pe(rank), &
 
  710            mpi_integer, rank, 0, &
 
  711            this%comm, glb_intrp_find_back%recv_buf(i)%request, ierr)
 
  712       glb_intrp_find_back%recv_buf(i)%flag = .false.
 
  714    call glb_intrp_find_back%nbwait_no_op()
 
  716    do i = 1, 
size(glb_intrp_find_back%recv_pe)
 
  717       point_ids => this%points_at_pe(glb_intrp_find_back%recv_pe(i))%array()
 
  718       do j = 1, this%n_points_pe(glb_intrp_find_back%recv_pe(i))
 
  719          point_id = point_ids(j)
 
  721          if (
rst_cmp(this%rst(:,point_id), rst_results(:,ii), &
 
  722               res%x(:,point_id), res_results(:,ii), this%padding) .or. &
 
  723               this%pe_owner(point_ids(j)) .eq. -1 ) 
then 
  724             this%rst(:,point_ids(j)) = rst_results(:,ii)
 
  725             res%x(:,point_ids(j)) = res_results(:,ii)
 
  726             this%pe_owner(point_ids(j)) = glb_intrp_find_back%recv_pe(i)
 
  727             this%el_owner0(point_ids(j)) = el_owner_results(ii)
 
  737    do i = 0, this%pe_size-1
 
  738       call this%points_at_pe(i)%clear()
 
  739       this%n_points_pe(i) = 0
 
  742    do i = 1, this%n_points
 
  744       if (this%pe_owner(i) .eq. -1 .or. this%el_owner0(i) .eq. -1) 
then 
  745          print *, 
'No owning rank found for',&
 
  746               ' point ', stupid_intent, 
' with coords', this%xyz(:,i), &
 
  747               ' Interpolation will always yield 0.0. Try increase padding.' 
  749          call this%points_at_pe(this%pe_owner(i))%push(stupid_intent)
 
  750          this%n_points_pe(this%pe_owner(i)) = this%n_points_pe(this%pe_owner(i)) + 1
 
  753    call mpi_reduce_scatter_block(this%n_points_pe, this%n_points_local, 1, mpi_integer, &
 
  754         mpi_sum, this%comm, ierr)
 
  755    call mpi_alltoall(this%n_points_pe, 1, mpi_integer,&
 
  756         this%n_points_pe_local, 1, mpi_integer, this%comm, ierr)
 
  757    this%n_points_offset_pe_local(0) = 0
 
  758    this%n_points_offset_pe(0) = 0
 
  759    do i = 1, (this%pe_size - 1)
 
  760       this%n_points_offset_pe_local(i) = this%n_points_pe_local(i-1)&
 
  761            + this%n_points_offset_pe_local(i-1)
 
  762       this%n_points_offset_pe(i) = this%n_points_pe(i-1)&
 
  763            + this%n_points_offset_pe(i-1)
 
  765    call send_pe_find%free()
 
  766    call recv_pe_find%free()
 
  767    call glb_intrp_find%free()
 
  768    call send_pe_find%init()
 
  769    call recv_pe_find%init()
 
  770    call glb_intrp_find%init_dofs(this%pe_size)
 
  772    do i = 0, (this%pe_size-1)
 
  773       if (this%n_points_pe(i) .gt. 0) 
then 
  774          call send_pe_find%push(i)
 
  775          point_ids => this%points_at_pe(i)%array()
 
  776          do j = 1, this%n_points_pe(i)
 
  777             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+1)
 
  778             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+2)
 
  779             call glb_intrp_find%send_dof(i)%push(3*(point_ids(j)-1)+3)
 
  782       if (this%n_points_pe_local(i) .gt. 0) 
then 
  783          call recv_pe_find%push(i)
 
  784          do j = 1, this%n_points_pe_local(i)
 
  785             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+1)
 
  786             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+2)
 
  787             call glb_intrp_find%recv_dof(i)%push(3*(j+this%n_points_offset_pe_local(i)-1)+3)
 
  793    call glb_intrp_find%init(send_pe_find, recv_pe_find, this%comm)
 
  794    call glb_intrp_find%sendrecv(this%xyz, this%xyz_local, this%n_points*3, &
 
  795         this%n_points_local*3)
 
  796    call glb_intrp_find%sendrecv(this%rst, this%rst_local, this%n_points*3, &
 
  797         this%n_points_local*3)
 
  799    do i = 1, 
size(glb_intrp_find%send_pe)
 
  800       rank = glb_intrp_find%send_pe(i)
 
  801       point_ids => this%points_at_pe(rank)%array()
 
  802       do j = 1, this%n_points_pe(rank)
 
  804          el_owner_results(ii) = this%el_owner0(point_ids(j))
 
  806       call mpi_isend(el_owner_results(this%n_points_offset_pe(rank)+1),&
 
  807            this%n_points_pe(rank), &
 
  808            mpi_integer, rank, 0, &
 
  809            this%comm, glb_intrp_find%send_buf(i)%request, ierr)
 
  810       glb_intrp_find%send_buf(i)%flag = .false.
 
  812    do i = 1, 
size(glb_intrp_find%recv_pe)
 
  813       rank = glb_intrp_find%recv_pe(i)
 
  814       call mpi_irecv(this%el_owner0_local(this%n_points_offset_pe_local(rank)+1), &
 
  815            this%n_points_pe_local(rank), &
 
  816            mpi_integer, rank, 0, &
 
  817            this%comm, glb_intrp_find%recv_buf(i)%request, ierr)
 
  818       glb_intrp_find%recv_buf(i)%flag = .false.
 
  820    call glb_intrp_find%nbwait_no_op()
 
  822    call glb_intrp_find%free()
 
  827    call this%glb_intrp_comm%init_dofs(this%pe_size)
 
  828    do i = 0, (this%pe_size-1)
 
  829       if (this%n_points_pe(i) .gt. 0) 
then 
  831          point_ids => this%points_at_pe(i)%array()
 
  832          do j = 1, this%n_points_pe(i)
 
  833             call this%glb_intrp_comm%recv_dof(i)%push(point_ids(j))
 
  836       if (this%n_points_pe_local(i) .gt. 0) 
then 
  838          do j = 1, this%n_points_pe_local(i)
 
  839             call this%glb_intrp_comm%send_dof(i)%push(j+this%n_points_offset_pe_local(i))
 
  843    call this%glb_intrp_comm%init(send_pe, recv_pe,this%comm)
 
  846    call this%temp_local%init(this%n_points_local)
 
  847    call this%temp%init(this%n_points)
 
  850    call this%local_interp%init(this%Xh, this%rst_local, &
 
  857       call device_map(this%el_owner0_local, this%el_owner0_local_d, this%n_points_local)
 
  858       call device_memcpy(this%el_owner0_local, this%el_owner0_local_d, &
 
  862    call this%check_points(this%x%x,this%y%x, this%z%x)
 
  867    call glb_intrp_find_back%free()
 
  868    call send_pe_find%free()
 
  869    call recv_pe_find%free()
 
  873    call rst_local_cand%free()
 
  878    call all_el_candidates%free()
 
  880    if (
allocated(n_el_cands)) 
deallocate(n_el_cands)
 
  881    if (
allocated(rst_results)) 
deallocate(rst_results)
 
  882    if (
allocated(res_results)) 
deallocate(res_results)
 
  883    if (
allocated(el_owner_results)) 
deallocate(el_owner_results)
 
  884    call mpi_barrier(this%comm, ierr)
 
  886    write(log_buf, 
'(A,E15.7)') 
'Global interpolation find points done, time (s):', &