84  use mpi_f08, 
only : mpi_allreduce, mpi_in_place, mpi_sum
 
   85  use, 
intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
 
   86       c_sizeof, c_null_ptr, c_loc, c_associated
 
   92     real(kind=
rp), 
allocatable :: xx(:,:)
 
   93     real(kind=
rp), 
allocatable :: bb(:,:)
 
   94     real(kind=
rp), 
allocatable :: xbar(:)
 
   95     type(c_ptr), 
allocatable :: xx_d(:)
 
   96     type(c_ptr), 
allocatable :: bb_d(:)
 
   97     type(c_ptr) :: xbar_d = c_null_ptr
 
   98     type(c_ptr) :: alpha_d = c_null_ptr
 
   99     type(c_ptr) :: xx_d_d = c_null_ptr
 
  100     type(c_ptr) :: bb_d_d = c_null_ptr
 
  102     real(kind=
rp) :: tol = 1e-7_rp
 
  104     real(kind=
rp) :: proj_res
 
  105     integer :: proj_m = 0
 
  106     integer :: activ_step 
 
 
  122    integer, 
intent(in) :: n
 
  123    integer, 
intent(in) :: L
 
  124    integer, 
optional, 
intent(in) :: activ_step
 
  126    integer(c_size_t) :: ptr_size
 
  134    if (
present(activ_step)) 
then 
  135       this%activ_step = activ_step
 
  144    if (this%L .le. 0) 
then 
  148    allocate(this%xx(n, this%L))
 
  149    allocate(this%bb(n, this%L))
 
  150    allocate(this%xbar(n))
 
  151    allocate(this%xx_d(this%L))
 
  152    allocate(this%bb_d(this%L))
 
  153    call rzero(this%xbar, n)
 
  155       call rzero(this%xx(1, i), n)
 
  156       call rzero(this%bb(1, i), n)
 
  161       call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
 
  167          this%xx_d(i) = c_null_ptr
 
  168          call device_map(this%xx(:, i), this%xx_d(i), n)
 
  170          this%bb_d(i) = c_null_ptr
 
  171          call device_map(this%bb(:, i), this%bb_d(i), n)
 
  175       ptr_size = c_sizeof(c_null_ptr) * this%L
 
  177       ptr = c_loc(this%xx_d)
 
  181       ptr = c_loc(this%bb_d)
 
 
  192    if (
allocated(this%xx)) 
then 
  195    if (
allocated(this%bb)) 
then 
  198    if (
allocated(this%xbar)) 
then 
  199       deallocate(this%xbar)
 
  201    if (
allocated(this%xx_d)) 
then 
  203          if (c_associated(this%xx_d(i))) 
then 
  208    if (c_associated(this%xx_d_d)) 
then 
  211    if (c_associated(this%xbar_d)) 
then 
  214    if (c_associated(this%alpha_d)) 
then 
  217    if (
allocated(this%bb_d)) 
then 
  219          if (c_associated(this%bb_d(i))) 
then 
  224    if (c_associated(this%bb_d_d)) 
then 
 
  233    integer, 
intent(inout) :: n
 
  234    real(kind=
rp), 
intent(inout), 
dimension(n) :: b
 
  235    integer, 
intent(in) :: tstep
 
  236    class(
coef_t), 
intent(inout) :: coef
 
  238    character(len=*), 
optional :: string
 
  240    if (tstep .gt. this%activ_step .and. this%L .gt. 0) 
then 
  241       if (dt_controller%is_variable_dt) 
then 
  243          if (dt_controller%dt_last_change .eq. 0) 
then 
  245          else if (dt_controller%dt_last_change .gt. this%activ_step - 1) 
then 
  248             call this%project_on(b, coef, n)
 
  249             if (
present(string)) 
then 
  250                call this%log_info(string, tstep)
 
  254          call this%project_on(b, coef, n)
 
  255          if (
present(string)) 
then 
  256             call this%log_info(string, tstep)
 
 
  266    integer, 
intent(inout) :: n
 
  267    class(
ax_t), 
intent(inout) :: Ax
 
  268    class(
coef_t), 
intent(inout) :: coef
 
  270    type(
gs_t), 
intent(inout) :: gs_h
 
  271    real(kind=
rp), 
intent(inout), 
dimension(n) :: x
 
  272    integer, 
intent(in) :: tstep
 
  275    if (tstep .gt. this%activ_step .and. this%L .gt. 0) 
then 
  276       if (.not.(dt_controller%is_variable_dt) .or. &
 
  277            (dt_controller%dt_last_change .gt. this%activ_step - 1)) 
then 
  278          call this%project_back(x, ax, coef, bclst, gs_h, n)
 
 
  286    integer, 
intent(inout) :: n
 
  287    class(
coef_t), 
intent(inout) :: coef
 
  288    real(kind=
rp), 
intent(inout), 
dimension(n) :: b
 
 
  300    integer, 
intent(inout) :: n
 
  301    class(
ax_t), 
intent(inout) :: Ax
 
  302    class(
coef_t), 
intent(inout) :: coef
 
  304    type(
gs_t), 
intent(inout) :: gs_h
 
  305    real(kind=
rp), 
intent(inout), 
dimension(n) :: x
 
  313       if (this%m .gt. 0) 
call device_add2(x_d, this%xbar_d, n)
 
  314       if (this%m .eq. this%L) 
then 
  317          this%m = min(this%m+1, this%L)
 
  323       if (this%m .gt. 0) 
call add2(x, this%xbar, n) 
 
  324       if (this%m .eq. this%L) 
then 
  327          this%m = min(this%m+1, this%L)
 
  330       call copy(this%xx(1, this%m), x, n) 
 
  333    call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
 
  334    call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
 
  335    call bclst%apply_scalar(this%bb(1, this%m), n)
 
 
  345    integer, 
intent(inout) :: n
 
  346    class(
coef_t), 
intent(inout) :: coef
 
  347    real(kind=
rp), 
intent(inout), 
dimension(n) :: b
 
  348    integer :: i, j, k, l, ierr
 
  349    real(kind=
rp) :: work(this%L), alpha(this%L), s
 
  351    associate(xbar => this%xbar, xx => this%xx, &
 
  354      if (this%m .le. 0) 
return 
  357      call rzero(alpha, this%m)
 
  358      this%proj_res = sqrt(
glsc3(b, b, coef%mult, n) / coef%volume)
 
  365               s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
 
  367            alpha(k) = alpha(k) + s
 
  372      call mpi_allreduce(mpi_in_place, alpha, this%m, &
 
  375      call rzero(work, this%m)
 
  380            xbar(i+l) = alpha(1) * xx(i+l,1)
 
  381            b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
 
  385               xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
 
  386               b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
 
  393               s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
 
  395            work(k) = work(k) + s
 
  399      call mpi_allreduce(work, alpha, this%m, &
 
  406               xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
 
  407               b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
 
 
  416    integer, 
intent(inout) :: n
 
  417    class(coef_t), 
intent(inout) :: coef
 
  418    real(kind=rp), 
intent(inout), 
dimension(n) :: b
 
  419    real(kind=rp) :: alpha(this%L)
 
  422    b_d = device_get_ptr(b)
 
  424    associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
 
  425         bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
 
  427      if (this%m .le. 0) 
return 
  431      this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
 
  433      if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) 
then 
  434         call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
 
  435              coef%mult_d, xbar_d, this%m, n)
 
  437         if (neko_bcknd_opencl .eq. 1) 
then 
  439               alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
 
  442            call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
 
  443            call device_memcpy(alpha, alpha_d, this%m, &
 
  444                 host_to_device, sync = .false.)
 
  446         call device_rzero(xbar_d, n)
 
  447         if (neko_bcknd_opencl .eq. 1) 
then 
  449               call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
 
  451            call cmult(alpha, -1.0_rp, this%m)
 
  453            call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
 
  454            call device_cmult(alpha_d, -1.0_rp, this%m)
 
  457         if (neko_bcknd_opencl .eq. 1) 
then 
  459               call device_add2s2(b_d, bb_d(i), alpha(i), n)
 
  460               alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
 
  463            call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
 
  464            call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
 
  465            call device_memcpy(alpha, alpha_d, this%m, &
 
  466                 host_to_device, sync = .false.)
 
  469         if (neko_bcknd_opencl .eq. 1) 
then 
  471               call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
 
  472               call cmult(alpha, -1.0_rp, this%m)
 
  473               call device_add2s2(b_d, bb_d(i), alpha(i), n)
 
  476            call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
 
  477            call device_cmult(alpha_d, -1.0_rp, this%m)
 
  478            call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
 
 
  488    type(coef_t), 
intent(in) :: coef
 
  489    integer, 
intent(in) :: n
 
  491    if (neko_bcknd_device .eq. 1) 
then 
 
  501    integer, 
intent(in) :: n
 
  502    type(c_ptr), 
dimension(this%L) :: xx_d, bb_d
 
  503    type(c_ptr), 
intent(in) :: w_d
 
  504    real(kind=rp) :: nrm, scl
 
  505    real(kind=rp) :: alpha(this%L)
 
  508    associate(m => this%m, xx_d_d => this%xx_d_d, &
 
  509         bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
 
  513      if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) 
then 
  514         call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
 
  515              w_d, xx_d(m), this%m, n, nrm)
 
  517         if (neko_bcknd_opencl .eq. 1)
then 
  519               alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
 
  522            call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
 
  525         call cmult(alpha, -1.0_rp,m)
 
  526         if (neko_bcknd_opencl .eq. 1)
then 
  528               call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
 
  529               call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
 
  531               alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
 
  534            call device_memcpy(alpha, alpha_d, this%m, &
 
  535                 host_to_device, sync = .false.)
 
  536            call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
 
  537            call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
 
  539            call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
 
  541         call cmult(alpha, -1.0_rp,m)
 
  542         if (neko_bcknd_opencl .eq. 1)
then 
  544               call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
 
  545               call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
 
  546               alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
 
  549            call device_memcpy(alpha, alpha_d, m, &
 
  550                 host_to_device, sync = .false.)
 
  551            call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
 
  552            call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
 
  553            call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
 
  557      alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
 
  558      alpha(m) = sqrt(alpha(m))
 
  560      if (alpha(m) .gt. this%tol*nrm) 
then  
  561         scl = 1.0_rp / alpha(m)
 
  562         call device_cmult(xx_d(m), scl, n)
 
  563         call device_cmult(bb_d(m), scl, n)
 
  567         if (pe_rank .eq. 0) 
then 
  568            call neko_warning(
'New vector not linearly indepependent!')
 
 
  580    integer, 
intent(in) :: n
 
  581    real(kind=rp), 
dimension(n, this%L), 
intent(inout) :: xx, bb
 
  582    real(kind=rp), 
dimension(n), 
intent(in) :: w
 
  583    real(kind=rp) :: nrm, scl1, scl2, c, s
 
  584    real(kind=rp) :: alpha(this%L), beta(this%L)
 
  585    integer :: i, j, k, l, h, ierr
 
  587    associate(m => this%m)
 
  594      do i = 1, n, neko_blk_size
 
  595         j = min(neko_blk_size, n-i+1)
 
  600               s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
 
  601               c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
 
  603            alpha(k) = alpha(k) + 0.5_rp * (s + c)
 
  607      call mpi_allreduce(mpi_in_place, alpha, this%m, &
 
  608           mpi_real_precision, mpi_sum, neko_comm, ierr)
 
  613      do i = 1, n, neko_blk_size
 
  614         j = min(neko_blk_size, n-i+1)
 
  617               xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
 
  618               bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
 
  624      do i = 1, n, neko_blk_size
 
  625         j = min(neko_blk_size, n-i+1)
 
  630               s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
 
  631               c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
 
  633            beta(k) = beta(k) + 0.5_rp * (s + c)
 
  637      call mpi_allreduce(mpi_in_place, beta, this%m-1, &
 
  638           mpi_real_precision, mpi_sum, neko_comm, ierr)
 
  642      do i = 1, n, neko_blk_size
 
  643         j = min(neko_blk_size,n-i+1)
 
  646               xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
 
  647               bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
 
  652            s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
 
  654         alpha(m) = alpha(m) + s
 
  657         alpha(k) = alpha(k) + beta(k)
 
  661      call mpi_allreduce(mpi_in_place, alpha(m), 1, &
 
  662           mpi_real_precision, mpi_sum, neko_comm, ierr)
 
  663      alpha(m) = sqrt(alpha(m))
 
  666      if (alpha(m) .gt. this%tol*nrm) 
then  
  668         scl1 = 1.0_rp / alpha(m)
 
  670            xx(1+i,m) = scl1 * xx(1+i,m)
 
  671            bb(1+i,m) = scl1 * bb(1+i,m)
 
  676         if (pe_rank .eq. 0) 
then 
  677            call neko_warning(
'New vector not linearly indepependent!')
 
 
  688    character(len=*), 
intent(in) :: string
 
  689    integer, 
intent(in) :: tstep
 
  690    character(len=LOG_SIZE) :: log_buf
 
  691    character(len=12) :: tstep_str
 
  693    if (this%proj_m .gt. 0) 
then 
  694       write(tstep_str, 
'(I12)') tstep
 
  695       write(log_buf, 
'(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
 
  696            adjustl(tstep_str), 
' | Projection:', string, &
 
  697            ', Vectors:', this%proj_m, &
 
  698            ', Original res.:', this%proj_res
 
  699       call neko_log%message(log_buf)
 
  700       call neko_log%newline()
 
 
  707    integer, 
intent(in) :: n
 
  714       if (neko_bcknd_device .eq. 1) 
then 
  715          call device_rzero(this%xx_d(i), n)
 
  716          call device_rzero(this%xx_d(i), n)
 
  719             this%xx(j,i) = 0.0_rp
 
  720             this%bb(j,i) = 0.0_rp
 
 
Return the device pointer for an associated Fortran array.
 
Map a Fortran array to a device (allocate and associate)
 
Copy data between host and device (or device and device)
 
Defines a Matrix-vector product.
 
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
 
integer, public pe_rank
MPI rank.
 
type(mpi_comm), public neko_comm
MPI communicator.
 
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
 
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication  (multiplication on first argument)
 
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
 
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
 
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
 
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
 
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
 
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
 
Interface for device projection.
 
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
 
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
 
Device abstraction, common interface for various accelerators.
 
integer, parameter, public host_to_device
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
 
type(log_t), public neko_log
Global log stream.
 
integer, parameter, public log_size
 
subroutine, public cmult(a, c, n)
Multiplication by constant c .
 
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
 
subroutine, public add2(a, b, n)
Vector addition .
 
subroutine, public copy(a, b, n)
Copy a vector .
 
subroutine, public rzero(a, n)
Zero a real vector.
 
integer, parameter neko_blk_size
 
integer, parameter neko_bcknd_device
 
integer, parameter neko_bcknd_opencl
 
logical, parameter neko_device_mpi
 
integer, parameter, public c_rp
 
integer, parameter, public rp
Global precision used in computations.
 
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
 
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
 
Project x onto X, the space of old solutions and back again.
 
subroutine device_project_on(this, b, coef, n)
 
subroutine projection_free(this)
 
subroutine bcknd_clear(this, n)
 
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
 
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
 
subroutine cpu_proj_ortho(this, xx, bb, w, n)
 
subroutine, public proj_ortho(this, coef, n)
 
subroutine bcknd_project_on(this, b, coef, n)
 
subroutine projection_init(this, n, l, activ_step)
 
subroutine cpu_project_on(this, b, coef, n)
 
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
 
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
 
subroutine print_proj_info(this, string, tstep)
 
Implements type time_step_controller.
 
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
 
Base type for a matrix-vector product providing .
 
A list of allocatable `bc_t`. Follows the standard interface of lists.
 
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
 
Provides a tool to set time step dt.