85 use,
intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
91 real(kind=
rp),
allocatable :: xx(:,:)
92 real(kind=
rp),
allocatable :: bb(:,:)
93 real(kind=
rp),
allocatable :: xbar(:)
94 type(c_ptr),
allocatable :: xx_d(:)
95 type(c_ptr),
allocatable :: bb_d(:)
96 type(c_ptr) :: xbar_d = c_null_ptr
97 type(c_ptr) :: alpha_d = c_null_ptr
98 type(c_ptr) :: xx_d_d = c_null_ptr
99 type(c_ptr) :: bb_d_d = c_null_ptr
101 real(kind=
rp) :: tol = 1e-7_rp
103 real(kind=
rp) :: proj_res
104 integer :: proj_m = 0
105 integer :: activ_step
121 integer,
intent(in) :: n
122 integer,
optional,
intent(in) :: L, activ_step
124 integer(c_size_t) :: ptr_size
136 if (
present(activ_step))
then
137 this%activ_step = activ_step
144 allocate(this%xx(n, this%L))
145 allocate(this%bb(n, this%L))
146 allocate(this%xbar(n))
147 allocate(this%xx_d(this%L))
148 allocate(this%bb_d(this%L))
149 call rzero(this%xbar, n)
151 call rzero(this%xx(1, i), n)
152 call rzero(this%bb(1, i), n)
157 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
163 this%xx_d(i) = c_null_ptr
164 call device_map(this%xx(:, i), this%xx_d(i), n)
166 this%bb_d(i) = c_null_ptr
167 call device_map(this%bb(:, i), this%bb_d(i), n)
171 ptr_size = c_sizeof(c_null_ptr) * this%L
173 ptr = c_loc(this%xx_d)
177 ptr = c_loc(this%bb_d)
188 if (
allocated(this%xx))
then
191 if (
allocated(this%bb))
then
194 if (
allocated(this%xbar))
then
195 deallocate(this%xbar)
197 if (
allocated(this%xx_d))
then
199 if (c_associated(this%xx_d(i)))
then
204 if (c_associated(this%xx_d_d))
then
207 if (c_associated(this%xbar_d))
then
210 if (c_associated(this%alpha_d))
then
213 if (
allocated(this%bb_d))
then
215 if (c_associated(this%bb_d(i)))
then
220 if (c_associated(this%bb_d_d))
then
229 integer,
intent(inout) :: n
230 real(kind=
rp),
intent(inout),
dimension(n) :: b
231 integer,
intent(in) :: tstep
232 class(
coef_t),
intent(inout) :: coef
234 character(len=*),
optional :: string
236 if (tstep .gt. this%activ_step .and. this%L .gt. 0)
then
237 if (dt_controller%if_variable_dt)
then
239 if (dt_controller%dt_last_change .eq. 0)
then
241 else if (dt_controller%dt_last_change .gt. this%activ_step - 1)
then
244 call this%project_on(b, coef, n)
245 if (
present(string))
then
246 call this%log_info(string)
250 call this%project_on(b, coef, n)
251 if (
present(string))
then
252 call this%log_info(string)
262 integer,
intent(inout) :: n
263 class(
ax_t),
intent(inout) :: Ax
264 class(
coef_t),
intent(inout) :: coef
266 type(
gs_t),
intent(inout) :: gs_h
267 real(kind=
rp),
intent(inout),
dimension(n) :: x
268 integer,
intent(in) :: tstep
271 if (tstep .gt. this%activ_step .and. this%L .gt. 0)
then
272 if (.not.(dt_controller%if_variable_dt) .or. &
273 (dt_controller%dt_last_change .gt. this%activ_step - 1))
then
274 call this%project_back(x, ax, coef, bclst, gs_h, n)
282 integer,
intent(inout) :: n
283 class(
coef_t),
intent(inout) :: coef
284 real(kind=
rp),
intent(inout),
dimension(n) :: b
296 integer,
intent(inout) :: n
297 class(
ax_t),
intent(inout) :: Ax
298 class(
coef_t),
intent(inout) :: coef
300 type(
gs_t),
intent(inout) :: gs_h
301 real(kind=
rp),
intent(inout),
dimension(n) :: x
309 if (this%m .gt. 0)
call device_add2(x_d, this%xbar_d, n)
310 if (this%m .eq. this%L)
then
313 this%m = min(this%m+1, this%L)
319 if (this%m .gt. 0)
call add2(x, this%xbar, n)
320 if (this%m .eq. this%L)
then
323 this%m = min(this%m+1, this%L)
326 call copy(this%xx(1, this%m), x, n)
329 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
330 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
331 call bclst%apply_scalar(this%bb(1, this%m), n)
345 integer,
intent(inout) :: n
346 class(
coef_t),
intent(inout) :: coef
347 real(kind=
rp),
intent(inout),
dimension(n) :: b
348 integer :: i, j, k, l, ierr
349 real(kind=
rp) :: work(this%L), alpha(this%L), s
351 associate(xbar => this%xbar, xx => this%xx, &
354 if (this%m .le. 0)
return
357 call rzero(alpha, this%m)
358 this%proj_res = sqrt(
glsc3(b, b, coef%mult, n) / coef%volume)
365 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
367 alpha(k) = alpha(k) + s
372 call mpi_allreduce(mpi_in_place, alpha, this%m, &
375 call rzero(work, this%m)
380 xbar(i+l) = alpha(1) * xx(i+l,1)
381 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
385 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
386 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
393 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
395 work(k) = work(k) + s
399 call mpi_allreduce(work, alpha, this%m, &
406 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
407 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
416 integer,
intent(inout) :: n
417 class(coef_t),
intent(inout) :: coef
418 real(kind=rp),
intent(inout),
dimension(n) :: b
419 real(kind=rp) :: alpha(this%L)
422 b_d = device_get_ptr(b)
424 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
425 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
427 if (this%m .le. 0)
return
431 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
433 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
434 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
435 coef%mult_d, xbar_d, this%m, n)
437 if (neko_bcknd_opencl .eq. 1)
then
439 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
442 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
443 call device_memcpy(alpha, alpha_d, this%m, &
444 host_to_device, sync = .false.)
446 call device_rzero(xbar_d, n)
447 if (neko_bcknd_opencl .eq. 1)
then
449 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
451 call cmult(alpha, -1.0_rp, this%m)
453 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
454 call device_cmult(alpha_d, -1.0_rp, this%m)
457 if (neko_bcknd_opencl .eq. 1)
then
459 call device_add2s2(b_d, bb_d(i), alpha(i), n)
460 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
463 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
464 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
465 call device_memcpy(alpha, alpha_d, this%m, &
466 host_to_device, sync = .false.)
469 if (neko_bcknd_opencl .eq. 1)
then
471 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
472 call cmult(alpha, -1.0_rp, this%m)
473 call device_add2s2(b_d, bb_d(i), alpha(i), n)
476 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
477 call device_cmult(alpha_d, -1.0_rp, this%m)
478 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
488 integer,
intent(inout) :: n
489 type(c_ptr),
dimension(this%L) :: xx_d, bb_d
491 real(kind=rp) :: nrm, scl
492 real(kind=rp) :: alpha(this%L)
495 associate(m => this%m, xx_d_d => this%xx_d_d, &
496 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
500 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
501 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
502 w_d, xx_d(m), this%m, n, nrm)
504 if (neko_bcknd_opencl .eq. 1)
then
506 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
509 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
512 call cmult(alpha, -1.0_rp,m)
513 if (neko_bcknd_opencl .eq. 1)
then
515 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
516 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
518 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
521 call device_memcpy(alpha, alpha_d, this%m, &
522 host_to_device, sync = .false.)
523 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
524 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
526 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
528 call cmult(alpha, -1.0_rp,m)
529 if (neko_bcknd_opencl .eq. 1)
then
531 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
532 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
533 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
536 call device_memcpy(alpha, alpha_d, m, &
537 host_to_device, sync = .false.)
538 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
539 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
540 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
544 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
545 alpha(m) = sqrt(alpha(m))
547 if (alpha(m) .gt. this%tol*nrm)
then
548 scl = 1.0_rp / alpha(m)
549 call device_cmult(xx_d(m), scl, n)
550 call device_cmult(bb_d(m), scl, n)
554 if (pe_rank .eq. 0)
then
555 call neko_warning(
'New vector not linearly indepependent!')
567 integer,
intent(inout) :: n
568 real(kind=rp),
dimension(n, this%L),
intent(inout) :: xx, bb
569 real(kind=rp),
dimension(n),
intent(inout) :: w
570 real(kind=rp) :: nrm, scl1, scl2, c, s
571 real(kind=rp) :: alpha(this%L), beta(this%L)
572 integer :: i, j, k, l, h, ierr
574 associate(m => this%m)
581 do i = 1, n, neko_blk_size
582 j = min(neko_blk_size, n-i+1)
587 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
588 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
590 alpha(k) = alpha(k) + 0.5_rp * (s + c)
594 call mpi_allreduce(mpi_in_place, alpha, this%m, &
595 mpi_real_precision, mpi_sum, neko_comm, ierr)
600 do i = 1, n, neko_blk_size
601 j = min(neko_blk_size, n-i+1)
604 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
605 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
611 do i = 1, n, neko_blk_size
612 j = min(neko_blk_size, n-i+1)
617 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
618 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
620 beta(k) = beta(k) + 0.5_rp * (s + c)
624 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
625 mpi_real_precision, mpi_sum, neko_comm, ierr)
629 do i = 1, n, neko_blk_size
630 j = min(neko_blk_size,n-i+1)
633 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
634 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
639 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
641 alpha(m) = alpha(m) + s
644 alpha(k) = alpha(k) + beta(k)
648 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
649 mpi_real_precision, mpi_sum, neko_comm, ierr)
650 alpha(m) = sqrt(alpha(m))
653 if (alpha(m) .gt. this%tol*nrm)
then
655 scl1 = 1.0_rp / alpha(m)
657 xx(1+i,m) = scl1 * xx(1+i,m)
658 bb(1+i,m) = scl1 * bb(1+i,m)
663 if (pe_rank .eq. 0)
then
664 call neko_warning(
'New vector not linearly indepependent!')
675 character(len=*) :: string
676 character(len=LOG_SIZE) :: log_buf
678 if (this%proj_m .gt. 0)
then
679 write(log_buf,
'(A,A)')
'Projection ', string
680 call neko_log%message(log_buf)
681 write(log_buf,
'(A,A)')
'Proj. vec.:',
' Orig. residual:'
682 call neko_log%message(log_buf)
683 write(log_buf,
'(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
684 call neko_log%message(log_buf)
691 integer,
intent(in) :: n
698 if (neko_bcknd_device .eq. 1)
then
699 call device_rzero(this%xx_d(i), n)
700 call device_rzero(this%xx_d(i), n)
703 this%xx(j,i) = 0.0_rp
704 this%bb(j,i) = 0.0_rp
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string)
Implements type time_step_controller.
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Provides a tool to set time step dt.