Neko  0.8.1
A portable framework for high-order spectral element flow simulations
projection.f90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
63 module projection
64  use num_types
65  use math
66  use coefs
67  use ax_product
68  use bc
69  use comm
70  use gather_scatter
71  use neko_config
72  use device
73  use device_math
75  use profiler
76  use logger
77  use, intrinsic :: iso_c_binding
79 
80  implicit none
81  private
82 
83  type, public :: projection_t
84  real(kind=rp), allocatable :: xx(:,:)
85  real(kind=rp), allocatable :: bb(:,:)
86  real(kind=rp), allocatable :: xbar(:)
87  type(c_ptr), allocatable :: xx_d(:)
88  type(c_ptr), allocatable :: bb_d(:)
89  type(c_ptr) :: xbar_d = c_null_ptr
90  type(c_ptr) :: alpha_d = c_null_ptr
91  type(c_ptr) :: xx_d_d = c_null_ptr
92  type(c_ptr) :: bb_d_d = c_null_ptr
93  integer :: m, l
94  real(kind=rp) :: tol = 1e-7_rp
95  !logging variables
96  real(kind=rp) :: proj_res
97  integer :: proj_m = 0
98  integer :: activ_step ! steps to activate projection
99  contains
100  procedure, pass(this) :: clear => bcknd_clear
101  procedure, pass(this) :: project_on => bcknd_project_on
102  procedure, pass(this) :: project_back => bcknd_project_back
103  procedure, pass(this) :: log_info => print_proj_info
104  procedure, pass(this) :: init => projection_init
105  procedure, pass(this) :: free => projection_free
106  procedure, pass(this) :: pre_solving => projection_pre_solving
107  procedure, pass(this) :: post_solving => projection_post_solving
108  end type projection_t
109 
110 contains
111 
112  subroutine projection_init(this, n, L, activ_step)
113  class(projection_t), target, intent(inout) :: this
114  integer, intent(in) :: n
115  integer, optional, intent(in) :: L, activ_step
116  integer :: i
117  integer(c_size_t) :: ptr_size
118  type(c_ptr) :: ptr
119  real(c_rp) :: dummy
120 
121  call this%free()
122 
123  if (present(l)) then
124  this%L = l
125  else
126  this%L = 20
127  end if
128 
129  if (present(activ_step)) then
130  this%activ_step = activ_step
131  else
132  this%activ_step = 5
133  end if
134 
135  this%m = 0
136 
137  allocate(this%xx(n,this%L))
138  allocate(this%bb(n,this%L))
139  allocate(this%xbar(n))
140  allocate(this%xx_d(this%L))
141  allocate(this%bb_d(this%L))
142  call rzero(this%xbar,n)
143  do i = 1, this%L
144  call rzero(this%xx(1,i),n)
145  call rzero(this%bb(1,i),n)
146  end do
147  if (neko_bcknd_device .eq. 1) then
148 
149  call device_map(this%xbar, this%xbar_d,n)
150  call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L,c_size_t))
151 
152  call device_rzero(this%xbar_d, n)
153  call device_rzero(this%alpha_d, this%L)
154 
155  do i = 1, this%L
156  this%xx_d(i) = c_null_ptr
157  call device_map(this%xx(:,i), this%xx_d(i), n)
158  call device_rzero(this%xx_d(i), n)
159  this%bb_d(i) = c_null_ptr
160  call device_map(this%bb(:,i), this%bb_d(i), n)
161  call device_rzero(this%bb_d(i), n)
162  end do
163 
164  ptr_size = c_sizeof(c_null_ptr) * this%L
165  call device_alloc(this%xx_d_d, ptr_size)
166  ptr = c_loc(this%xx_d)
167  call device_memcpy(ptr,this%xx_d_d, ptr_size, &
168  host_to_device, sync=.false.)
169  call device_alloc(this%bb_d_d, ptr_size)
170  ptr = c_loc(this%bb_d)
171  call device_memcpy(ptr,this%bb_d_d, ptr_size, &
172  host_to_device, sync=.false.)
173  end if
174 
175 
176  end subroutine projection_init
177 
178  subroutine projection_free(this)
179  class(projection_t), intent(inout) :: this
180  integer :: i
181  if (allocated(this%xx)) then
182  deallocate(this%xx)
183  end if
184  if (allocated(this%bb)) then
185  deallocate(this%bb)
186  end if
187  if (allocated(this%xbar)) then
188  deallocate(this%xbar)
189  end if
190  if (allocated(this%xx_d)) then
191  do i = 1, this%L
192  if (c_associated(this%xx_d(i))) then
193  call device_free(this%xx_d(i))
194  end if
195  end do
196  end if
197  if (c_associated(this%xx_d_d)) then
198  call device_free(this%xx_d_d)
199  end if
200  if (c_associated(this%xbar_d)) then
201  call device_free(this%xbar_d)
202  end if
203  if (c_associated(this%alpha_d)) then
204  call device_free(this%alpha_d)
205  end if
206  if (allocated(this%bb_d)) then
207  do i = 1, this%L
208  if (c_associated(this%bb_d(i))) then
209  call device_free(this%bb_d(i))
210  end if
211  end do
212  end if
213  if (c_associated(this%bb_d_d)) then
214  call device_free(this%bb_d_d)
215  end if
216 
217  end subroutine projection_free
218 
219  subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
220  class(projection_t), intent(inout) :: this
221  integer, intent(inout) :: n
222  real(kind=rp), intent(inout), dimension(n) :: b
223  integer, intent(in) :: tstep
224  class(coef_t), intent(inout) :: coef
225  type(time_step_controller_t), intent(in) :: dt_controller
226  character(len=*), optional :: string
227 
228  if( tstep .gt. this%activ_step .and. this%L .gt. 0) then
229  if (dt_controller%if_variable_dt) then
230  if (dt_controller%dt_last_change .eq. 0) then ! the time step at which dt is changed
231  call this%clear(n)
232  else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
233  ! activate projection some steps after dt is changed
234  ! note that dt_last_change start from 0
235  call this%project_on(b, coef, n)
236  if (present(string)) then
237  call this%log_info(string)
238  end if
239  end if
240  else
241  call this%project_on(b, coef, n)
242  if (present(string)) then
243  call this%log_info(string)
244  end if
245  end if
246  end if
247 
248  end subroutine projection_pre_solving
249 
250  subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
251  class(projection_t), intent(inout) :: this
252  integer, intent(inout) :: n
253  class(ax_t), intent(inout) :: Ax
254  class(coef_t), intent(inout) :: coef
255  class(bc_list_t), intent(inout) :: bclst
256  type(gs_t), intent(inout) :: gs_h
257  real(kind=rp), intent(inout), dimension(n) :: x
258  integer, intent(in) :: tstep
259  type(time_step_controller_t), intent(in) :: dt_controller
260 
261  if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
262  if (.not.(dt_controller%if_variable_dt) .or. &
263  (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
264  call this%project_back(x, ax, coef, bclst, gs_h, n)
265  end if
266  end if
267 
268  end subroutine projection_post_solving
269 
270  subroutine bcknd_project_on(this, b, coef, n)
271  class(projection_t), intent(inout) :: this
272  integer, intent(inout) :: n
273  class(coef_t), intent(inout) :: coef
274  real(kind=rp), intent(inout), dimension(n) :: b
275  call profiler_start_region('Project on', 16)
276  if (neko_bcknd_device .eq. 1) then
277  call device_project_on(this, b, coef, n)
278  else
279  call cpu_project_on(this, b, coef, n)
280  end if
282  end subroutine bcknd_project_on
283 
284  subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
285  class(projection_t) :: this
286  integer, intent(inout) :: n
287  class(ax_t), intent(inout) :: Ax
288  class(coef_t), intent(inout) :: coef
289  class(bc_list_t), intent(inout) :: bclst
290  type(gs_t), intent(inout) :: gs_h
291  real(kind=rp), intent(inout), dimension(n) :: x
292  type(c_ptr) :: x_d
293 
294  call profiler_start_region('Project back', 17)
295 
296  if (neko_bcknd_device .eq. 1) then
297  x_d = device_get_ptr(x)
298  if (this%m .gt. 0) call device_add2(x_d,this%xbar_d,n) ! Restore desired solution
299  if (this%m .eq. this%L) then
300  this%m = 1
301  else
302  this%m = min(this%m+1,this%L)
303  end if
304 
305  call device_copy(this%xx_d(this%m),x_d,n) ! Update (X,B)
306 
307  else
308  if (this%m.gt.0) call add2(x,this%xbar,n) ! Restore desired solution
309  if (this%m .eq. this%L) then
310  this%m = 1
311  else
312  this%m = min(this%m+1,this%L)
313  end if
314 
315  call copy (this%xx(1,this%m),x,n) ! Update (X,B)
316  end if
317 
318  call ax%compute(this%bb(1,this%m), x, coef, coef%msh, coef%Xh)
319  call gs_h%gs_op_vector(this%bb(1,this%m), n, gs_op_add)
320  call bc_list_apply_scalar(bclst, this%bb(1,this%m), n)
321 
322  if (neko_bcknd_device .eq. 1) then
323  call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
324  else
325  call cpu_proj_ortho (this,this%xx,this%bb,coef%mult,n)
326  end if
328  end subroutine bcknd_project_back
329 
330 
331 
332  subroutine cpu_project_on(this, b, coef, n)
333  class(projection_t), intent(inout) :: this
334  integer, intent(inout) :: n
335  class(coef_t), intent(inout) :: coef
336  real(kind=rp), intent(inout), dimension(n) :: b
337  integer :: i, j, k, ierr
338  real(kind=rp) :: work(this%L), alpha(this%L)
339 
340  associate(xbar => this%xbar, xx => this%xx, &
341  bb => this%bb)
342 
343  if (this%m .le. 0) return
344 
345  !First round of CGS
346  call rzero(alpha, this%m)
347  this%proj_res = sqrt(glsc3(b,b,coef%mult,n)/coef%volume)
348  this%proj_m = this%m
349  do i = 1, n, neko_blk_size
350  j = min(neko_blk_size, n-i+1)
351  do k = 1, this%m
352  alpha(k) = alpha(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j)
353  end do
354  end do
355 
356  !First one outside loop to avoid zeroing xbar and bbar
357  call mpi_allreduce(mpi_in_place, alpha, this%m, &
358  mpi_real_precision, mpi_sum, neko_comm, ierr)
359 
360  call rzero(work, this%m)
361 
362  do i = 1, n, neko_blk_size
363  j = min(neko_blk_size, n-i+1)
364  call cmult2(xbar(i), xx(i,1), alpha(1), j)
365  call add2s2(b(i), bb(i,1), -alpha(1), j)
366  do k = 2,this%m
367  call add2s2(xbar(i), xx(i,k), alpha(k), j)
368  call add2s2(b(i), bb(i,k), -alpha(k), j)
369  end do
370  !Second round of CGS
371  do k = 1, this%m
372  work(k) = work(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j)
373  end do
374  end do
375 
376  call mpi_allreduce(work, alpha, this%m, &
377  mpi_real_precision, mpi_sum, neko_comm, ierr)
378 
379  do i = 1, n, neko_blk_size
380  j = min(neko_blk_size, n-i+1)
381  do k = 1,this%m
382  call add2s2(xbar(i), xx(i,k), alpha(k), j)
383  call add2s2(b(i), bb(i,k), -alpha(k), j)
384  end do
385  end do
386  end associate
387  end subroutine cpu_project_on
388 
389  subroutine device_project_on(this, b, coef, n)
390  class(projection_t), intent(inout) :: this
391  integer, intent(inout) :: n
392  class(coef_t), intent(inout) :: coef
393  real(kind=rp), intent(inout), dimension(n) :: b
394  real(kind=rp) :: alpha(this%L)
395  type(c_ptr) :: b_d
396  integer :: i
397  b_d = device_get_ptr(b)
398 
399  associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
400  bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
401 
402  if (this%m .le. 0) return
403 
404 
405 
406  this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume)
407  this%proj_m = this%m
408  if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
409  call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
410  coef%mult_d, xbar_d, this%m, n)
411  else
412  if (neko_bcknd_opencl .eq. 1) then
413  do i = 1, this%m
414  alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
415  end do
416  else
417  call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
418  call device_memcpy(alpha, alpha_d, this%m, &
419  host_to_device, sync=.false.)
420  end if
421  call device_rzero(xbar_d, n)
422  if (neko_bcknd_opencl .eq. 1) then
423  do i = 1, this%m
424  call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
425  end do
426  call cmult(alpha, -1.0_rp, this%m)
427  else
428  call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
429  call device_cmult(alpha_d, -1.0_rp, this%m)
430  end if
431 
432  if (neko_bcknd_opencl .eq. 1) then
433  do i = 1, this%m
434  call device_add2s2(b_d, bb_d(i), alpha(i), n)
435  alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
436  end do
437  else
438  call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
439  call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
440  call device_memcpy(alpha, alpha_d, this%m, &
441  host_to_device, sync=.false.)
442  end if
443 
444  if (neko_bcknd_opencl .eq. 1) then
445  do i = 1, this%m
446  call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
447  call cmult(alpha, -1.0_rp, this%m)
448  call device_add2s2(b_d, bb_d(i), alpha(i), n)
449  end do
450  else
451  call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
452  call device_cmult(alpha_d, -1.0_rp, this%m)
453  call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
454  end if
455  end if
456 
457  end associate
458  end subroutine device_project_on
459 
460  !This is a lot more primitive than on the CPU
461  subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
462  type(projection_t) :: this
463  integer, intent(inout) :: n
464  type(c_ptr), dimension(this%L) :: xx_d, bb_d
465  type(c_ptr) :: w_d
466  real(kind=rp) :: nrm, scl
467  real(kind=rp) :: alpha(this%L)
468  integer :: i
469 
470  associate(m => this%m, xx_d_d => this%xx_d_d, &
471  bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
472 
473  if(m .le. 0) return
474 
475  if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
476  call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
477  w_d, xx_d(m), this%m, n, nrm)
478  else
479  if (neko_bcknd_opencl .eq. 1)then
480  do i = 1, m
481  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
482  end do
483  else
484  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
485  end if
486  nrm = sqrt(alpha(m))
487  call cmult(alpha, -1.0_rp,m)
488  if (neko_bcknd_opencl .eq. 1)then
489  do i = 1, m - 1
490  call device_add2s2(xx_d(m),xx_d(i),alpha(i), n)
491  call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
492 
493  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
494  end do
495  else
496  call device_memcpy(alpha, alpha_d, this%m, &
497  host_to_device, sync=.false.)
498  call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
499  call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
500 
501  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
502  end if
503  call cmult(alpha, -1.0_rp,m)
504  if (neko_bcknd_opencl .eq. 1)then
505  do i = 1, m - 1
506  call device_add2s2(xx_d(m),xx_d(i),alpha(i),n)
507  call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
508  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
509  end do
510  else
511  call device_memcpy(alpha, alpha_d, m, &
512  host_to_device, sync=.false.)
513  call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
514  call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
515  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
516  end if
517  end if
518 
519  alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
520  alpha(m) = sqrt(alpha(m))
521 
522  if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
523  scl = 1.0_rp / alpha(m)
524  call device_cmult(xx_d(m), scl, n)
525  call device_cmult(bb_d(m), scl, n)
526 
527 
528  else !New vector is not linearly independent, forget about it
529  if(pe_rank .eq. 0) then
530  call neko_warning('New vector not linearly indepependent!')
531  end if
532  m = m - 1 !Remove column
533  endif
534 
535  end associate
536 
537  end subroutine device_proj_ortho
538 
539 
540  subroutine cpu_proj_ortho(this, xx, bb, w, n)
541  type(projection_t) :: this
542  integer, intent(inout) :: n
543  real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
544  real(kind=rp), dimension(n), intent(inout) :: w
545  real(kind=rp) :: nrm, scl1, scl2, c, s
546  real(kind=rp) :: alpha(this%L), beta(this%L)
547  integer :: i, j, k, h, ierr
548 
549  associate(m => this%m)
550 
551  if(m .le. 0) return !No vectors to ortho-normalize
552 
553  ! AX = B
554  ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
555  call rzero(alpha, m)
556  do i = 1, n, neko_blk_size
557  j = min(neko_blk_size, n-i+1)
558  do k = 1, m !First round CGS
559  alpha(k) = alpha(k) + 0.5_rp * (vlsc3(xx(i,k), w(i), bb(i,m), j) &
560  + vlsc3(bb(i,k), w(i), xx(i,m), j))
561  end do
562  end do
563 
564  call mpi_allreduce(mpi_in_place, alpha, this%m, &
565  mpi_real_precision, mpi_sum, neko_comm, ierr)
566 
567  nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
568 
569 
570  do i = 1, n, neko_blk_size
571  j = min(neko_blk_size, n-i+1)
572  do k = 1,m-1
573  call add2s2(xx(i,m), xx(i,k), -alpha(k), j)
574  call add2s2(bb(i,m), bb(i,k), -alpha(k), j)
575  end do
576  end do
577  call rzero(beta,m)
578 
579  do i = 1, n, neko_blk_size
580  j = min(neko_blk_size, n-i+1)
581  do k = 1,m-1
582  beta(k) = beta(k) + 0.5_rp * (vlsc3(xx(i,k), w(i), bb(i,m), j) &
583  + vlsc3(bb(i,k), w(i), xx(i,m), j))
584  end do
585  end do
586 
587  call mpi_allreduce(mpi_in_place, beta, this%m-1, &
588  mpi_real_precision, mpi_sum, neko_comm, ierr)
589 
590  alpha(m) = 0.0_rp
591 
592  do i = 1, n, neko_blk_size
593  j = min(neko_blk_size,n-i+1)
594  do k = 1, m-1
595  call add2s2(xx(i,m), xx(i,k), -beta(k), j)
596  call add2s2(bb(i,m), bb(i,k), -beta(k), j)
597  end do
598  alpha(m) = alpha(m) + vlsc3(xx(i,m), w(i), bb(i,m), j)
599  end do
600  do k = 1, m-1
601  alpha(k) = alpha(k) + beta(k)
602  end do
603 
604  !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
605  call mpi_allreduce(mpi_in_place, alpha(m), 1, &
606  mpi_real_precision, mpi_sum, neko_comm, ierr)
607  alpha(m) = sqrt(alpha(m))
608  !dx and db now stored in last column of xx and bb
609 
610  if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
611  !Normalize dx and db
612  scl1 = 1.0_rp / alpha(m)
613  call cmult(xx(1,m), scl1, n)
614  call cmult(bb(1,m), scl1, n)
615 
616  else !New vector is not linearly independent, forget about it
617  k = m !location of rank deficient column
618  if(pe_rank .eq. 0) then
619  call neko_warning('New vector not linearly indepependent!')
620  end if
621  m = m - 1 !Remove column
622  endif
623 
624  end associate
625 
626  end subroutine cpu_proj_ortho
627 
628  subroutine print_proj_info(this,string)
629  class(projection_t) :: this
630  character(len=*) :: string
631  character(len=LOG_SIZE) :: log_buf
632 
633  if (this%proj_m .gt. 0) then
634  write(log_buf, '(A,A)') 'Projection ', string
635  call neko_log%message(log_buf)
636  write(log_buf, '(A,A)') 'Proj. vec.:',' Orig. residual:'
637  call neko_log%message(log_buf)
638  write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
639  call neko_log%message(log_buf)
640  end if
641 
642  end subroutine print_proj_info
643 
644  subroutine bcknd_clear(this, n)
645  class(projection_t) :: this
646  integer, intent(in) :: n
647  integer :: i
648 
649  this%m = 0
650  this%proj_m = 0
651 
652  do i = 1, this%L
653  if (neko_bcknd_device .eq. 1) then
654  call device_rzero(this%xx_d(i), n)
655  call device_rzero(this%xx_d(i), n)
656  else
657  call rzero(this%xx(1,i),n)
658  call rzero(this%bb(1,i),n)
659  end if
660  end do
661 
662  end subroutine bcknd_clear
663 
664 end module projection
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
subroutine, public bc_list_apply_scalar(bclst, x, n, t, tstep)
Apply a list of boundary conditions to a scalar field.
Definition: bc.f90:515
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:22
subroutine, public device_add2(a_d, b_d, n)
subroutine, public device_rzero(a_d, n)
subroutine, public device_copy(a_d, b_d, n)
Interface for device projection.
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:172
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition: device.F90:151
Gather-scatter.
Logging routines.
Definition: log.f90:34
Definition: math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition: math.f90:617
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:810
subroutine, public add2(a, b, n)
Vector addition .
Definition: math.f90:503
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:211
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:167
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition: math.f90:589
real(kind=rp) function, public vlsc3(u, v, w, n)
Compute multiplication sum .
Definition: math.f90:475
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_blk_size
Definition: neko_config.f90:48
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Profiling interface.
Definition: profiler.F90:34
subroutine, public profiler_end_region
End the most recently started profiler region.
Definition: profiler.F90:95
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition: profiler.F90:76
Project x onto X, the space of old solutions and back again.
Definition: projection.f90:63
subroutine projection_init(this, n, L, activ_step)
Definition: projection.f90:113
subroutine device_project_on(this, b, coef, n)
Definition: projection.f90:390
subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
Definition: projection.f90:251
subroutine projection_free(this)
Definition: projection.f90:179
subroutine bcknd_clear(this, n)
Definition: projection.f90:645
subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
Definition: projection.f90:285
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
Definition: projection.f90:462
subroutine cpu_proj_ortho(this, xx, bb, w, n)
Definition: projection.f90:541
subroutine bcknd_project_on(this, b, coef, n)
Definition: projection.f90:271
subroutine cpu_project_on(this, b, coef, n)
Definition: projection.f90:333
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
Definition: projection.f90:220
subroutine print_proj_info(this, string)
Definition: projection.f90:629
Implements type time_step_controller.
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:102
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
Provides a tool to set time step dt.