84 use mpi_f08,
only : mpi_allreduce, mpi_in_place, mpi_sum, mpi_wtime
85 use,
intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
92 real(kind=
rp),
allocatable :: xx(:,:)
93 real(kind=
rp),
allocatable :: bb(:,:)
94 real(kind=
rp),
allocatable :: xbar(:)
95 type(c_ptr),
allocatable :: xx_d(:)
96 type(c_ptr),
allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
102 real(kind=
rp) :: tol = 1e-7_rp
104 real(kind=
rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step
107 logical :: prj_reorthogonalize_basis = .false.
124 integer,
intent(in) :: n
125 integer,
intent(in) :: L
126 integer,
optional,
intent(in) :: activ_step
127 logical,
optional,
intent(in) :: reorthogonalize_basis
129 integer(c_size_t) :: ptr_size
137 if (
present(activ_step))
then
138 this%activ_step = activ_step
143 if (
present(reorthogonalize_basis))
then
144 this%prj_reorthogonalize_basis = reorthogonalize_basis
151 if (this%L .le. 0)
then
155 allocate(this%xx(n, this%L))
156 allocate(this%bb(n, this%L))
157 allocate(this%xbar(n))
158 call rzero(this%xbar, n)
160 call rzero(this%xx(1, i), n)
161 call rzero(this%bb(1, i), n)
165 allocate(this%xx_d(this%L))
166 allocate(this%bb_d(this%L))
169 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
175 this%xx_d(i) = c_null_ptr
176 call device_map(this%xx(:, i), this%xx_d(i), n)
178 this%bb_d(i) = c_null_ptr
179 call device_map(this%bb(:, i), this%bb_d(i), n)
183 ptr_size = c_sizeof(c_null_ptr) * this%L
185 ptr = c_loc(this%xx_d)
189 ptr = c_loc(this%bb_d)
200 if (c_associated(this%xx_d_d))
then
203 if (c_associated(this%bb_d_d))
then
206 if (c_associated(this%alpha_d))
then
209 if (
allocated(this%xx))
then
214 deallocate(this%xx_d)
218 if (
allocated(this%xbar))
then
222 deallocate(this%xbar)
224 if (
allocated(this%bb))
then
229 deallocate(this%bb_d)
237 string, Ax, gs_h, bclst)
239 integer,
intent(inout) :: n
240 real(kind=
rp),
intent(inout),
dimension(n) :: b
241 integer,
intent(in) :: tstep
242 class(
coef_t),
intent(inout) :: coef
244 class(
bc_list_t),
optional,
intent(inout) :: bclst
245 type(
gs_t),
optional,
intent(inout) :: gs_h
246 class(
ax_t),
optional,
intent(in) :: Ax
247 character(len=*),
optional :: string
249 if (tstep .gt. this%activ_step .and. this%L .gt. 0)
then
250 if (dt_controller%is_variable_dt)
then
252 if (dt_controller%dt_last_change .eq. 0)
then
254 else if (dt_controller%dt_last_change .gt. this%activ_step - 1)
then
256 if (this%prj_reorthogonalize_basis .and.
present(gs_h) &
257 .and.
present(ax) .and.
present(bclst))
then
258 call this%reortho_basis(ax, coef, gs_h, bclst, n)
262 call this%project_on(b, coef, n)
263 if (
present(string))
then
264 call this%log_info(string, tstep)
269 if (this%prj_reorthogonalize_basis .and.
present(gs_h) &
270 .and.
present(ax) .and.
present(bclst))
then
271 call this%reortho_basis(ax, coef, gs_h, bclst, n)
273 call this%project_on(b, coef, n)
274 if (
present(string))
then
275 call this%log_info(string, tstep)
285 integer,
intent(inout) :: n
286 class(
ax_t),
intent(inout) :: Ax
287 class(
coef_t),
intent(inout) :: coef
289 type(
gs_t),
intent(inout) :: gs_h
290 real(kind=
rp),
intent(inout),
dimension(n) :: x
291 integer,
intent(in) :: tstep
294 if (tstep .gt. this%activ_step .and. this%L .gt. 0)
then
295 if (.not.(dt_controller%is_variable_dt) .or. &
296 (dt_controller%dt_last_change .gt. this%activ_step - 1))
then
297 call this%project_back(x, ax, coef, bclst, gs_h, n)
305 integer,
intent(inout) :: n
306 class(
coef_t),
intent(inout) :: coef
307 real(kind=
rp),
intent(inout),
dimension(n) :: b
319 integer,
intent(inout) :: n
320 class(
ax_t),
intent(inout) :: Ax
321 class(
coef_t),
intent(inout) :: coef
323 type(
gs_t),
intent(inout) :: gs_h
324 real(kind=
rp),
intent(inout),
dimension(n) :: x
332 if (this%m .gt. 0)
call device_add2(x_d, this%xbar_d, n)
333 if (this%m .eq. this%L)
then
336 this%m = min(this%m+1, this%L)
342 if (this%m .gt. 0)
call add2(x, this%xbar, n)
343 if (this%m .eq. this%L)
then
346 this%m = min(this%m+1, this%L)
349 call copy(this%xx(1, this%m), x, n)
352 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
353 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
354 call bclst%apply_scalar(this%bb(1, this%m), n)
362 class(
ax_t),
intent(in) :: Ax
363 class(
coef_t),
intent(in) :: coef
364 type(
gs_t),
intent(inout) :: gs_h
366 integer,
intent(in) :: n
379 class(
ax_t),
intent(in) :: Ax
380 class(
coef_t),
intent(in) :: coef
381 type(
gs_t),
intent(inout) :: gs_h
383 integer,
intent(in) :: n
384 character(len=1000) :: msg
387 real(kind=
rp) :: alpha, s, norm_fac
388 real(kind=
rp) :: start_time, end_time, time
390 if (this%m .le. 0)
return
392 associate(xx => this%xx, bb => this%bb)
393 start_time = mpi_wtime()
397 call ax%compute(bb(1,i), xx(1,i), coef, coef%msh, coef%Xh)
398 call gs_h%gs_op_vector(bb(1,i), n, gs_op_add)
399 call blst%apply_scalar(bb(1,i), n)
407 alpha =
glsc3(xx(1,i), bb(1,j), coef%mult, n)
408 call add2s2(xx(1,i), xx(1,j), -alpha, n)
409 call add2s2(bb(1,i), bb(1,j), -alpha, n)
412 s =
glsc3(xx(1,i), bb(1,i), coef%mult, n)
413 norm_fac = 1.0_rp / sqrt(s)
414 call cmult(xx(1,i), norm_fac, n)
415 call cmult(bb(1,i), norm_fac, n)
418 end_time = mpi_wtime()
419 time = end_time - start_time
420 write(msg,
'(A, E15.7)')
"Projection basis reorthogonalization (s): ", time
427 class(ax_t),
intent(in) :: Ax
428 class(coef_t),
intent(in) :: coef
429 type(gs_t),
intent(inout) :: gs_h
430 type(bc_list_t),
intent(inout) :: blst
431 integer,
intent(in) :: n
432 character(len=1000) :: msg
435 real(kind=rp) :: alpha, s, norm_fac
436 real(kind=rp) :: start_time, end_time, time
438 if (this%m .le. 0)
return
440 associate(xx_d => this%xx_d, bb_d => this%bb_d)
441 start_time = mpi_wtime()
445 call ax%compute(this%bb(1,i), this%xx(1,i), coef, coef%msh, coef%Xh)
446 call gs_h%gs_op_vector(this%bb(1,i), n, gs_op_add)
447 call blst%apply_scalar(this%bb(1,i), n)
455 alpha = device_glsc3(xx_d(i), bb_d(j), coef%mult_d, n)
456 call device_add2s2(xx_d(i), xx_d(j), -alpha, n)
457 call device_add2s2(bb_d(i), bb_d(j), -alpha, n)
460 s = device_glsc3(xx_d(i), bb_d(i), coef%mult_d, n)
461 norm_fac = 1.0_rp / sqrt(s)
462 call device_cmult(xx_d(i), norm_fac, n)
463 call device_cmult(bb_d(i), norm_fac, n)
466 end_time = mpi_wtime()
467 time = end_time - start_time
468 write(msg,
'(A, E15.7)')
"Projection basis reorthogonalization (s): ", time
469 call neko_log%message(trim(msg))
476 integer,
intent(inout) :: n
477 class(coef_t),
intent(inout) :: coef
478 real(kind=rp),
intent(inout),
dimension(n) :: b
479 integer :: i, j, k, l, ierr
480 real(kind=rp) :: work(this%L), alpha(this%L), s
482 associate(xbar => this%xbar, xx => this%xx, &
485 if (this%m .le. 0)
return
488 call rzero(alpha, this%m)
489 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
491 do i = 1, n, neko_blk_size
492 j = min(neko_blk_size, n-i+1)
496 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
498 alpha(k) = alpha(k) + s
503 call mpi_allreduce(mpi_in_place, alpha, this%m, &
504 mpi_real_precision, mpi_sum, neko_comm, ierr)
506 call rzero(work, this%m)
508 do i = 1, n, neko_blk_size
509 j = min(neko_blk_size, n-i+1)
511 xbar(i+l) = alpha(1) * xx(i+l,1)
512 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
516 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
517 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
524 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
526 work(k) = work(k) + s
530 call mpi_allreduce(work, alpha, this%m, &
531 mpi_real_precision, mpi_sum, neko_comm, ierr)
533 do i = 1, n, neko_blk_size
534 j = min(neko_blk_size, n-i+1)
537 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
538 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
547 integer,
intent(inout) :: n
548 class(coef_t),
intent(inout) :: coef
549 real(kind=rp),
intent(inout),
dimension(n) :: b
550 real(kind=rp) :: alpha(this%L)
553 b_d = device_get_ptr(b)
555 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
556 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
558 if (this%m .le. 0)
return
562 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
564 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
565 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
566 coef%mult_d, xbar_d, this%m, n)
568 if (neko_bcknd_opencl .eq. 1)
then
570 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
573 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
574 call device_memcpy(alpha, alpha_d, this%m, &
575 host_to_device, sync = .false.)
577 call device_rzero(xbar_d, n)
578 if (neko_bcknd_opencl .eq. 1)
then
580 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
582 call cmult(alpha, -1.0_rp, this%m)
584 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
585 call device_cmult(alpha_d, -1.0_rp, this%m)
588 if (neko_bcknd_opencl .eq. 1)
then
590 call device_add2s2(b_d, bb_d(i), alpha(i), n)
591 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
594 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
595 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
596 call device_memcpy(alpha, alpha_d, this%m, &
597 host_to_device, sync = .false.)
600 if (neko_bcknd_opencl .eq. 1)
then
602 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
603 call cmult(alpha, -1.0_rp, this%m)
604 call device_add2s2(b_d, bb_d(i), alpha(i), n)
607 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
608 call device_cmult(alpha_d, -1.0_rp, this%m)
609 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
619 type(coef_t),
intent(in) :: coef
620 integer,
intent(in) :: n
622 if (neko_bcknd_device .eq. 1)
then
632 integer,
intent(in) :: n
633 type(c_ptr),
dimension(this%L) :: xx_d, bb_d
634 type(c_ptr),
intent(in) :: w_d
635 real(kind=rp) :: nrm, scl
636 real(kind=rp) :: alpha(this%L)
639 associate(m => this%m, xx_d_d => this%xx_d_d, &
640 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
644 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
645 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
646 w_d, xx_d(m), this%m, n, nrm)
648 if (neko_bcknd_opencl .eq. 1)
then
650 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
653 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
656 call cmult(alpha, -1.0_rp,m)
657 if (neko_bcknd_opencl .eq. 1)
then
659 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
660 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
662 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
665 call device_memcpy(alpha, alpha_d, this%m, &
666 host_to_device, sync = .false.)
667 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
668 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
670 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
672 call cmult(alpha, -1.0_rp,m)
673 if (neko_bcknd_opencl .eq. 1)
then
675 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
676 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
677 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
680 call device_memcpy(alpha, alpha_d, m, &
681 host_to_device, sync = .false.)
682 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
683 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
684 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
688 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
689 alpha(m) = sqrt(alpha(m))
691 if (alpha(m) .gt. this%tol*nrm)
then
692 scl = 1.0_rp / alpha(m)
693 call device_cmult(xx_d(m), scl, n)
694 call device_cmult(bb_d(m), scl, n)
698 if (pe_rank .eq. 0)
then
699 call neko_warning(
'New vector not linearly indepependent!')
711 integer,
intent(in) :: n
712 real(kind=rp),
dimension(n, this%L),
intent(inout) :: xx, bb
713 real(kind=rp),
dimension(n),
intent(in) :: w
714 real(kind=rp) :: nrm, scl1, scl2, c, s
715 real(kind=rp) :: alpha(this%L), beta(this%L)
716 integer :: i, j, k, l, h, ierr
718 associate(m => this%m)
725 do i = 1, n, neko_blk_size
726 j = min(neko_blk_size, n-i+1)
731 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
732 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
734 alpha(k) = alpha(k) + 0.5_rp * (s + c)
738 call mpi_allreduce(mpi_in_place, alpha, this%m, &
739 mpi_real_precision, mpi_sum, neko_comm, ierr)
744 do i = 1, n, neko_blk_size
745 j = min(neko_blk_size, n-i+1)
748 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
749 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
755 do i = 1, n, neko_blk_size
756 j = min(neko_blk_size, n-i+1)
761 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
762 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
764 beta(k) = beta(k) + 0.5_rp * (s + c)
768 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
769 mpi_real_precision, mpi_sum, neko_comm, ierr)
773 do i = 1, n, neko_blk_size
774 j = min(neko_blk_size,n-i+1)
777 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
778 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
783 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
785 alpha(m) = alpha(m) + s
788 alpha(k) = alpha(k) + beta(k)
792 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
793 mpi_real_precision, mpi_sum, neko_comm, ierr)
794 alpha(m) = sqrt(alpha(m))
797 if (alpha(m) .gt. this%tol*nrm)
then
799 scl1 = 1.0_rp / alpha(m)
801 xx(1+i,m) = scl1 * xx(1+i,m)
802 bb(1+i,m) = scl1 * bb(1+i,m)
807 if (pe_rank .eq. 0)
then
808 call neko_warning(
'New vector not linearly indepependent!')
819 character(len=*),
intent(in) :: string
820 integer,
intent(in) :: tstep
821 character(len=LOG_SIZE) :: log_buf
822 character(len=12) :: tstep_str
824 if (this%proj_m .gt. 0)
then
825 write(tstep_str,
'(I12)') tstep
826 write(log_buf,
'(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
827 adjustl(tstep_str),
' | Projection:', string, &
828 ', Vectors:', this%proj_m, &
829 ', Original res.:', this%proj_res
830 call neko_log%message(log_buf)
831 call neko_log%newline()
838 integer,
intent(in) :: n
845 if (neko_bcknd_device .eq. 1)
then
846 call device_rzero(this%xx_d(i), n)
847 call device_rzero(this%bb_d(i), n)
850 this%xx(j,i) = 0.0_rp
851 this%bb(j,i) = 0.0_rp
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Unmap a Fortran array from a device (deassociate and free)
Defines a Matrix-vector product.
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_rank
MPI rank.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine projection_init(this, n, l, activ_step, reorthogonalize_basis)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string, ax, gs_h, bclst)
subroutine bcknd_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine device_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine cpu_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Provides a tool to set time step dt.