80 use,
intrinsic :: iso_c_binding
87 real(kind=
rp),
allocatable :: xx(:,:)
88 real(kind=
rp),
allocatable :: bb(:,:)
89 real(kind=
rp),
allocatable :: xbar(:)
90 type(c_ptr),
allocatable :: xx_d(:)
91 type(c_ptr),
allocatable :: bb_d(:)
92 type(c_ptr) :: xbar_d = c_null_ptr
93 type(c_ptr) :: alpha_d = c_null_ptr
94 type(c_ptr) :: xx_d_d = c_null_ptr
95 type(c_ptr) :: bb_d_d = c_null_ptr
97 real(kind=
rp) :: tol = 1e-7_rp
99 real(kind=
rp) :: proj_res
100 integer :: proj_m = 0
101 integer :: activ_step
117 integer,
intent(in) :: n
118 integer,
optional,
intent(in) :: L, activ_step
120 integer(c_size_t) :: ptr_size
132 if (
present(activ_step))
then
133 this%activ_step = activ_step
140 allocate(this%xx(n,this%L))
141 allocate(this%bb(n,this%L))
142 allocate(this%xbar(n))
143 allocate(this%xx_d(this%L))
144 allocate(this%bb_d(this%L))
145 call rzero(this%xbar,n)
147 call rzero(this%xx(1,i),n)
148 call rzero(this%bb(1,i),n)
153 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L,c_size_t))
159 this%xx_d(i) = c_null_ptr
160 call device_map(this%xx(:,i), this%xx_d(i), n)
162 this%bb_d(i) = c_null_ptr
163 call device_map(this%bb(:,i), this%bb_d(i), n)
167 ptr_size = c_sizeof(c_null_ptr) * this%L
169 ptr = c_loc(this%xx_d)
173 ptr = c_loc(this%bb_d)
184 if (
allocated(this%xx))
then
187 if (
allocated(this%bb))
then
190 if (
allocated(this%xbar))
then
191 deallocate(this%xbar)
193 if (
allocated(this%xx_d))
then
195 if (c_associated(this%xx_d(i)))
then
200 if (c_associated(this%xx_d_d))
then
203 if (c_associated(this%xbar_d))
then
206 if (c_associated(this%alpha_d))
then
209 if (
allocated(this%bb_d))
then
211 if (c_associated(this%bb_d(i)))
then
216 if (c_associated(this%bb_d_d))
then
224 integer,
intent(inout) :: n
225 real(kind=
rp),
intent(inout),
dimension(n) :: b
226 integer,
intent(in) :: tstep
227 class(
coef_t),
intent(inout) :: coef
229 character(len=*),
optional :: string
231 if( tstep .gt. this%activ_step .and. this%L .gt. 0)
then
232 if (dt_controller%if_variable_dt)
then
233 if (dt_controller%dt_last_change .eq. 0)
then
235 else if (dt_controller%dt_last_change .gt. this%activ_step - 1)
then
238 call this%project_on(b, coef, n)
239 if (
present(string))
then
240 call this%log_info(string)
244 call this%project_on(b, coef, n)
245 if (
present(string))
then
246 call this%log_info(string)
255 integer,
intent(inout) :: n
256 class(
ax_t),
intent(inout) :: Ax
257 class(
coef_t),
intent(inout) :: coef
259 type(
gs_t),
intent(inout) :: gs_h
260 real(kind=
rp),
intent(inout),
dimension(n) :: x
261 integer,
intent(in) :: tstep
264 if (tstep .gt. this%activ_step .and. this%L .gt. 0)
then
265 if (.not.(dt_controller%if_variable_dt) .or. &
266 (dt_controller%dt_last_change .gt. this%activ_step - 1))
then
267 call this%project_back(x, ax, coef, bclst, gs_h, n)
275 integer,
intent(inout) :: n
276 class(
coef_t),
intent(inout) :: coef
277 real(kind=
rp),
intent(inout),
dimension(n) :: b
289 integer,
intent(inout) :: n
290 class(
ax_t),
intent(inout) :: Ax
291 class(
coef_t),
intent(inout) :: coef
293 type(
gs_t),
intent(inout) :: gs_h
294 real(kind=
rp),
intent(inout),
dimension(n) :: x
301 if (this%m .gt. 0)
call device_add2(x_d,this%xbar_d,n)
302 if (this%m .eq. this%L)
then
305 this%m = min(this%m+1,this%L)
311 if (this%m.gt.0)
call add2(x,this%xbar,n)
312 if (this%m .eq. this%L)
then
315 this%m = min(this%m+1,this%L)
318 call copy (this%xx(1,this%m),x,n)
321 call ax%compute(this%bb(1,this%m), x, coef, coef%msh, coef%Xh)
322 call gs_h%gs_op_vector(this%bb(1,this%m), n, gs_op_add)
323 call bclst%apply_scalar(this%bb(1,this%m), n)
337 integer,
intent(inout) :: n
338 class(
coef_t),
intent(inout) :: coef
339 real(kind=
rp),
intent(inout),
dimension(n) :: b
340 integer :: i, j, k, l, ierr
341 real(kind=
rp) :: work(this%L), alpha(this%L), s
343 associate(xbar => this%xbar, xx => this%xx, &
346 if (this%m .le. 0)
return
349 call rzero(alpha, this%m)
350 this%proj_res = sqrt(
glsc3(b,b,coef%mult,n)/coef%volume)
357 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
359 alpha(k) = alpha(k) + s
364 call mpi_allreduce(mpi_in_place, alpha, this%m, &
367 call rzero(work, this%m)
372 xbar(i+l) = alpha(1) * xx(i+l,1)
373 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
377 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
378 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
385 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
387 work(k) = work(k) + s
391 call mpi_allreduce(work, alpha, this%m, &
398 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
399 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
408 integer,
intent(inout) :: n
409 class(coef_t),
intent(inout) :: coef
410 real(kind=rp),
intent(inout),
dimension(n) :: b
411 real(kind=rp) :: alpha(this%L)
414 b_d = device_get_ptr(b)
416 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
417 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
419 if (this%m .le. 0)
return
423 this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume)
425 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
426 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
427 coef%mult_d, xbar_d, this%m, n)
429 if (neko_bcknd_opencl .eq. 1)
then
431 alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
434 call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
435 call device_memcpy(alpha, alpha_d, this%m, &
436 host_to_device, sync=.false.)
438 call device_rzero(xbar_d, n)
439 if (neko_bcknd_opencl .eq. 1)
then
441 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
443 call cmult(alpha, -1.0_rp, this%m)
445 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
446 call device_cmult(alpha_d, -1.0_rp, this%m)
449 if (neko_bcknd_opencl .eq. 1)
then
451 call device_add2s2(b_d, bb_d(i), alpha(i), n)
452 alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
455 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
456 call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
457 call device_memcpy(alpha, alpha_d, this%m, &
458 host_to_device, sync=.false.)
461 if (neko_bcknd_opencl .eq. 1)
then
463 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
464 call cmult(alpha, -1.0_rp, this%m)
465 call device_add2s2(b_d, bb_d(i), alpha(i), n)
468 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
469 call device_cmult(alpha_d, -1.0_rp, this%m)
470 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
480 integer,
intent(inout) :: n
481 type(c_ptr),
dimension(this%L) :: xx_d, bb_d
483 real(kind=rp) :: nrm, scl
484 real(kind=rp) :: alpha(this%L)
487 associate(m => this%m, xx_d_d => this%xx_d_d, &
488 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
492 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1))
then
493 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
494 w_d, xx_d(m), this%m, n, nrm)
496 if (neko_bcknd_opencl .eq. 1)
then
498 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
501 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
504 call cmult(alpha, -1.0_rp,m)
505 if (neko_bcknd_opencl .eq. 1)
then
507 call device_add2s2(xx_d(m),xx_d(i),alpha(i), n)
508 call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
510 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
513 call device_memcpy(alpha, alpha_d, this%m, &
514 host_to_device, sync=.false.)
515 call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
516 call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
518 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
520 call cmult(alpha, -1.0_rp,m)
521 if (neko_bcknd_opencl .eq. 1)
then
523 call device_add2s2(xx_d(m),xx_d(i),alpha(i),n)
524 call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
525 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
528 call device_memcpy(alpha, alpha_d, m, &
529 host_to_device, sync=.false.)
530 call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
531 call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
532 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
536 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
537 alpha(m) = sqrt(alpha(m))
539 if(alpha(m) .gt. this%tol*nrm)
then
540 scl = 1.0_rp / alpha(m)
541 call device_cmult(xx_d(m), scl, n)
542 call device_cmult(bb_d(m), scl, n)
546 if(pe_rank .eq. 0)
then
547 call neko_warning(
'New vector not linearly indepependent!')
559 integer,
intent(inout) :: n
560 real(kind=rp),
dimension(n, this%L),
intent(inout) :: xx, bb
561 real(kind=rp),
dimension(n),
intent(inout) :: w
562 real(kind=rp) :: nrm, scl1, scl2, c, s
563 real(kind=rp) :: alpha(this%L), beta(this%L)
564 integer :: i, j, k, l, h, ierr
566 associate(m => this%m)
573 do i = 1, n, neko_blk_size
574 j = min(neko_blk_size, n-i+1)
579 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
580 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
582 alpha(k) = alpha(k) + 0.5_rp * (s + c)
586 call mpi_allreduce(mpi_in_place, alpha, this%m, &
587 mpi_real_precision, mpi_sum, neko_comm, ierr)
592 do i = 1, n, neko_blk_size
593 j = min(neko_blk_size, n-i+1)
596 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
597 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
603 do i = 1, n, neko_blk_size
604 j = min(neko_blk_size, n-i+1)
609 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
610 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
612 beta(k) = beta(k) + 0.5_rp * (s + c)
616 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
617 mpi_real_precision, mpi_sum, neko_comm, ierr)
621 do i = 1, n, neko_blk_size
622 j = min(neko_blk_size,n-i+1)
625 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
626 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
631 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
633 alpha(m) = alpha(m) + s
636 alpha(k) = alpha(k) + beta(k)
640 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
641 mpi_real_precision, mpi_sum, neko_comm, ierr)
642 alpha(m) = sqrt(alpha(m))
645 if(alpha(m) .gt. this%tol*nrm)
then
647 scl1 = 1.0_rp / alpha(m)
649 xx(1+i,m) = scl1 * xx(1+i,m)
650 bb(1+i,m) = scl1 * bb(1+i,m)
655 if(pe_rank .eq. 0)
then
656 call neko_warning(
'New vector not linearly indepependent!')
667 character(len=*) :: string
668 character(len=LOG_SIZE) :: log_buf
670 if (this%proj_m .gt. 0)
then
671 write(log_buf,
'(A,A)')
'Projection ', string
672 call neko_log%message(log_buf)
673 write(log_buf,
'(A,A)')
'Proj. vec.:',
' Orig. residual:'
674 call neko_log%message(log_buf)
675 write(log_buf,
'(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
676 call neko_log%message(log_buf)
683 integer,
intent(in) :: n
690 if (neko_bcknd_device .eq. 1)
then
691 call device_rzero(this%xx_d(i), n)
692 call device_rzero(this%xx_d(i), n)
695 this%xx(j,i) = 0.0_rp
696 this%bb(j,i) = 0.0_rp
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string)
Implements type time_step_controller.
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Provides a tool to set time step dt.