Neko  0.9.0
A portable framework for high-order spectral element flow simulations
projection.f90
Go to the documentation of this file.
1 ! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2 !
3 ! The UChicago Argonne, LLC as Operator of Argonne National
4 ! Laboratory holds copyright in the Software. The copyright holder
5 ! reserves all rights except those expressly granted to licensees,
6 ! and U.S. Government license rights.
7 !
8 ! Redistribution and use in source and binary forms, with or without
9 ! modification, are permitted provided that the following conditions
10 ! are met:
11 !
12 ! 1. Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the disclaimer below.
14 !
15 ! 2. Redistributions in binary form must reproduce the above copyright
16 ! notice, this list of conditions and the disclaimer (as noted below)
17 ! in the documentation and/or other materials provided with the
18 ! distribution.
19 !
20 ! 3. Neither the name of ANL nor the names of its contributors
21 ! may be used to endorse or promote products derived from this software
22 ! without specific prior written permission.
23 !
24 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 ! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29 ! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 ! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 ! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 !
37 ! Additional BSD Notice
38 ! ---------------------
39 ! 1. This notice is required to be provided under our contract with
40 ! the U.S. Department of Energy (DOE). This work was produced at
41 ! Argonne National Laboratory under Contract
42 ! No. DE-AC02-06CH11357 with the DOE.
43 !
44 ! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45 ! LLC nor any of their employees, makes any warranty,
46 ! express or implied, or assumes any liability or responsibility for the
47 ! accuracy, completeness, or usefulness of any information, apparatus,
48 ! product, or process disclosed, or represents that its use would not
49 ! infringe privately-owned rights.
50 !
51 ! 3. Also, reference herein to any specific commercial products, process,
52 ! or services by trade name, trademark, manufacturer or otherwise does
53 ! not necessarily constitute or imply its endorsement, recommendation,
54 ! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55 ! The views and opinions of authors expressed
56 ! herein do not necessarily state or reflect those of the United States
57 ! Government or UCHICAGO ARGONNE, LLC, and shall
58 ! not be used for advertising or product endorsement purposes.
59 !
63 module projection
64  use num_types, only : rp, c_rp
65  use math, only : rzero, glsc3, add2, copy, cmult
66  use coefs, only : coef_t
67  use ax_product, only : ax_t
68  use bc, only : bc_list_t, bc_list_apply_scalar
69  use comm
70  use gather_scatter, only : gs_t, gs_op_add
71  use neko_config, only : neko_bcknd_device
72  use device
78  use logger, only : log_size, neko_log
79  use utils, only : neko_warning
80  use, intrinsic :: iso_c_binding
82 
83  implicit none
84  private
85 
86  type, public :: projection_t
87  real(kind=rp), allocatable :: xx(:,:)
88  real(kind=rp), allocatable :: bb(:,:)
89  real(kind=rp), allocatable :: xbar(:)
90  type(c_ptr), allocatable :: xx_d(:)
91  type(c_ptr), allocatable :: bb_d(:)
92  type(c_ptr) :: xbar_d = c_null_ptr
93  type(c_ptr) :: alpha_d = c_null_ptr
94  type(c_ptr) :: xx_d_d = c_null_ptr
95  type(c_ptr) :: bb_d_d = c_null_ptr
96  integer :: m, l
97  real(kind=rp) :: tol = 1e-7_rp
98  !logging variables
99  real(kind=rp) :: proj_res
100  integer :: proj_m = 0
101  integer :: activ_step ! steps to activate projection
102  contains
103  procedure, pass(this) :: clear => bcknd_clear
104  procedure, pass(this) :: project_on => bcknd_project_on
105  procedure, pass(this) :: project_back => bcknd_project_back
106  procedure, pass(this) :: log_info => print_proj_info
107  procedure, pass(this) :: init => projection_init
108  procedure, pass(this) :: free => projection_free
109  procedure, pass(this) :: pre_solving => projection_pre_solving
110  procedure, pass(this) :: post_solving => projection_post_solving
111  end type projection_t
112 
113 contains
114 
115  subroutine projection_init(this, n, L, activ_step)
116  class(projection_t), target, intent(inout) :: this
117  integer, intent(in) :: n
118  integer, optional, intent(in) :: L, activ_step
119  integer :: i
120  integer(c_size_t) :: ptr_size
121  type(c_ptr) :: ptr
122  real(c_rp) :: dummy
123 
124  call this%free()
125 
126  if (present(l)) then
127  this%L = l
128  else
129  this%L = 20
130  end if
131 
132  if (present(activ_step)) then
133  this%activ_step = activ_step
134  else
135  this%activ_step = 5
136  end if
137 
138  this%m = 0
139 
140  allocate(this%xx(n,this%L))
141  allocate(this%bb(n,this%L))
142  allocate(this%xbar(n))
143  allocate(this%xx_d(this%L))
144  allocate(this%bb_d(this%L))
145  call rzero(this%xbar,n)
146  do i = 1, this%L
147  call rzero(this%xx(1,i),n)
148  call rzero(this%bb(1,i),n)
149  end do
150  if (neko_bcknd_device .eq. 1) then
151 
152  call device_map(this%xbar, this%xbar_d,n)
153  call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L,c_size_t))
154 
155  call device_rzero(this%xbar_d, n)
156  call device_rzero(this%alpha_d, this%L)
157 
158  do i = 1, this%L
159  this%xx_d(i) = c_null_ptr
160  call device_map(this%xx(:,i), this%xx_d(i), n)
161  call device_rzero(this%xx_d(i), n)
162  this%bb_d(i) = c_null_ptr
163  call device_map(this%bb(:,i), this%bb_d(i), n)
164  call device_rzero(this%bb_d(i), n)
165  end do
166 
167  ptr_size = c_sizeof(c_null_ptr) * this%L
168  call device_alloc(this%xx_d_d, ptr_size)
169  ptr = c_loc(this%xx_d)
170  call device_memcpy(ptr,this%xx_d_d, ptr_size, &
171  host_to_device, sync=.false.)
172  call device_alloc(this%bb_d_d, ptr_size)
173  ptr = c_loc(this%bb_d)
174  call device_memcpy(ptr,this%bb_d_d, ptr_size, &
175  host_to_device, sync=.false.)
176  end if
177 
178 
179  end subroutine projection_init
180 
181  subroutine projection_free(this)
182  class(projection_t), intent(inout) :: this
183  integer :: i
184  if (allocated(this%xx)) then
185  deallocate(this%xx)
186  end if
187  if (allocated(this%bb)) then
188  deallocate(this%bb)
189  end if
190  if (allocated(this%xbar)) then
191  deallocate(this%xbar)
192  end if
193  if (allocated(this%xx_d)) then
194  do i = 1, this%L
195  if (c_associated(this%xx_d(i))) then
196  call device_free(this%xx_d(i))
197  end if
198  end do
199  end if
200  if (c_associated(this%xx_d_d)) then
201  call device_free(this%xx_d_d)
202  end if
203  if (c_associated(this%xbar_d)) then
204  call device_free(this%xbar_d)
205  end if
206  if (c_associated(this%alpha_d)) then
207  call device_free(this%alpha_d)
208  end if
209  if (allocated(this%bb_d)) then
210  do i = 1, this%L
211  if (c_associated(this%bb_d(i))) then
212  call device_free(this%bb_d(i))
213  end if
214  end do
215  end if
216  if (c_associated(this%bb_d_d)) then
217  call device_free(this%bb_d_d)
218  end if
219 
220  end subroutine projection_free
221 
222  subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
223  class(projection_t), intent(inout) :: this
224  integer, intent(inout) :: n
225  real(kind=rp), intent(inout), dimension(n) :: b
226  integer, intent(in) :: tstep
227  class(coef_t), intent(inout) :: coef
228  type(time_step_controller_t), intent(in) :: dt_controller
229  character(len=*), optional :: string
230 
231  if( tstep .gt. this%activ_step .and. this%L .gt. 0) then
232  if (dt_controller%if_variable_dt) then
233  if (dt_controller%dt_last_change .eq. 0) then ! the time step at which dt is changed
234  call this%clear(n)
235  else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
236  ! activate projection some steps after dt is changed
237  ! note that dt_last_change start from 0
238  call this%project_on(b, coef, n)
239  if (present(string)) then
240  call this%log_info(string)
241  end if
242  end if
243  else
244  call this%project_on(b, coef, n)
245  if (present(string)) then
246  call this%log_info(string)
247  end if
248  end if
249  end if
250 
251  end subroutine projection_pre_solving
252 
253  subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
254  class(projection_t), intent(inout) :: this
255  integer, intent(inout) :: n
256  class(ax_t), intent(inout) :: Ax
257  class(coef_t), intent(inout) :: coef
258  class(bc_list_t), intent(inout) :: bclst
259  type(gs_t), intent(inout) :: gs_h
260  real(kind=rp), intent(inout), dimension(n) :: x
261  integer, intent(in) :: tstep
262  type(time_step_controller_t), intent(in) :: dt_controller
263 
264  if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
265  if (.not.(dt_controller%if_variable_dt) .or. &
266  (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
267  call this%project_back(x, ax, coef, bclst, gs_h, n)
268  end if
269  end if
270 
271  end subroutine projection_post_solving
272 
273  subroutine bcknd_project_on(this, b, coef, n)
274  class(projection_t), intent(inout) :: this
275  integer, intent(inout) :: n
276  class(coef_t), intent(inout) :: coef
277  real(kind=rp), intent(inout), dimension(n) :: b
278  call profiler_start_region('Project on', 16)
279  if (neko_bcknd_device .eq. 1) then
280  call device_project_on(this, b, coef, n)
281  else
282  call cpu_project_on(this, b, coef, n)
283  end if
284  call profiler_end_region('Project on', 16)
285  end subroutine bcknd_project_on
286 
287  subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
288  class(projection_t) :: this
289  integer, intent(inout) :: n
290  class(ax_t), intent(inout) :: Ax
291  class(coef_t), intent(inout) :: coef
292  class(bc_list_t), intent(inout) :: bclst
293  type(gs_t), intent(inout) :: gs_h
294  real(kind=rp), intent(inout), dimension(n) :: x
295  type(c_ptr) :: x_d
296 
297  call profiler_start_region('Project back', 17)
298 
299  if (neko_bcknd_device .eq. 1) then
300  x_d = device_get_ptr(x)
301  if (this%m .gt. 0) call device_add2(x_d,this%xbar_d,n) ! Restore desired solution
302  if (this%m .eq. this%L) then
303  this%m = 1
304  else
305  this%m = min(this%m+1,this%L)
306  end if
307 
308  call device_copy(this%xx_d(this%m),x_d,n) ! Update (X,B)
309 
310  else
311  if (this%m.gt.0) call add2(x,this%xbar,n) ! Restore desired solution
312  if (this%m .eq. this%L) then
313  this%m = 1
314  else
315  this%m = min(this%m+1,this%L)
316  end if
317 
318  call copy (this%xx(1,this%m),x,n) ! Update (X,B)
319  end if
320 
321  call ax%compute(this%bb(1,this%m), x, coef, coef%msh, coef%Xh)
322  call gs_h%gs_op_vector(this%bb(1,this%m), n, gs_op_add)
323  call bc_list_apply_scalar(bclst, this%bb(1,this%m), n)
324 
325  if (neko_bcknd_device .eq. 1) then
326  call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
327  else
328  call cpu_proj_ortho (this,this%xx,this%bb,coef%mult,n)
329  end if
330  call profiler_end_region('Project back', 17)
331  end subroutine bcknd_project_back
332 
333 
334 
335  subroutine cpu_project_on(this, b, coef, n)
336  class(projection_t), intent(inout) :: this
337  integer, intent(inout) :: n
338  class(coef_t), intent(inout) :: coef
339  real(kind=rp), intent(inout), dimension(n) :: b
340  integer :: i, j, k, l, ierr
341  real(kind=rp) :: work(this%L), alpha(this%L), s
342 
343  associate(xbar => this%xbar, xx => this%xx, &
344  bb => this%bb)
345 
346  if (this%m .le. 0) return
347 
348  !First round of CGS
349  call rzero(alpha, this%m)
350  this%proj_res = sqrt(glsc3(b,b,coef%mult,n)/coef%volume)
351  this%proj_m = this%m
352  do i = 1, n, neko_blk_size
353  j = min(neko_blk_size, n-i+1)
354  do k = 1, this%m
355  s = 0.0_rp
356  do l = 0, (j-1)
357  s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
358  end do
359  alpha(k) = alpha(k) + s
360  end do
361  end do
362 
363  !First one outside loop to avoid zeroing xbar and bbar
364  call mpi_allreduce(mpi_in_place, alpha, this%m, &
365  mpi_real_precision, mpi_sum, neko_comm, ierr)
366 
367  call rzero(work, this%m)
368 
369  do i = 1, n, neko_blk_size
370  j = min(neko_blk_size, n-i+1)
371  do l = 0, (j-1)
372  xbar(i+l) = alpha(1) * xx(i+l,1)
373  b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
374  end do
375  do k = 2,this%m
376  do l = 0, (j-1)
377  xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
378  b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
379  end do
380  end do
381  !Second round of CGS
382  do k = 1, this%m
383  s = 0.0_rp
384  do l = 0, (j-1)
385  s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
386  end do
387  work(k) = work(k) + s
388  end do
389  end do
390 
391  call mpi_allreduce(work, alpha, this%m, &
392  mpi_real_precision, mpi_sum, neko_comm, ierr)
393 
394  do i = 1, n, neko_blk_size
395  j = min(neko_blk_size, n-i+1)
396  do k = 1,this%m
397  do l = 0, (j-1)
398  xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
399  b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
400  end do
401  end do
402  end do
403  end associate
404  end subroutine cpu_project_on
405 
406  subroutine device_project_on(this, b, coef, n)
407  class(projection_t), intent(inout) :: this
408  integer, intent(inout) :: n
409  class(coef_t), intent(inout) :: coef
410  real(kind=rp), intent(inout), dimension(n) :: b
411  real(kind=rp) :: alpha(this%L)
412  type(c_ptr) :: b_d
413  integer :: i
414  b_d = device_get_ptr(b)
415 
416  associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
417  bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
418 
419  if (this%m .le. 0) return
420 
421 
422 
423  this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume)
424  this%proj_m = this%m
425  if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
426  call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
427  coef%mult_d, xbar_d, this%m, n)
428  else
429  if (neko_bcknd_opencl .eq. 1) then
430  do i = 1, this%m
431  alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
432  end do
433  else
434  call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
435  call device_memcpy(alpha, alpha_d, this%m, &
436  host_to_device, sync=.false.)
437  end if
438  call device_rzero(xbar_d, n)
439  if (neko_bcknd_opencl .eq. 1) then
440  do i = 1, this%m
441  call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
442  end do
443  call cmult(alpha, -1.0_rp, this%m)
444  else
445  call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
446  call device_cmult(alpha_d, -1.0_rp, this%m)
447  end if
448 
449  if (neko_bcknd_opencl .eq. 1) then
450  do i = 1, this%m
451  call device_add2s2(b_d, bb_d(i), alpha(i), n)
452  alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
453  end do
454  else
455  call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
456  call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
457  call device_memcpy(alpha, alpha_d, this%m, &
458  host_to_device, sync=.false.)
459  end if
460 
461  if (neko_bcknd_opencl .eq. 1) then
462  do i = 1, this%m
463  call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
464  call cmult(alpha, -1.0_rp, this%m)
465  call device_add2s2(b_d, bb_d(i), alpha(i), n)
466  end do
467  else
468  call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
469  call device_cmult(alpha_d, -1.0_rp, this%m)
470  call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
471  end if
472  end if
473 
474  end associate
475  end subroutine device_project_on
476 
477  !This is a lot more primitive than on the CPU
478  subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
479  type(projection_t) :: this
480  integer, intent(inout) :: n
481  type(c_ptr), dimension(this%L) :: xx_d, bb_d
482  type(c_ptr) :: w_d
483  real(kind=rp) :: nrm, scl
484  real(kind=rp) :: alpha(this%L)
485  integer :: i
486 
487  associate(m => this%m, xx_d_d => this%xx_d_d, &
488  bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
489 
490  if(m .le. 0) return
491 
492  if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
493  call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
494  w_d, xx_d(m), this%m, n, nrm)
495  else
496  if (neko_bcknd_opencl .eq. 1)then
497  do i = 1, m
498  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
499  end do
500  else
501  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
502  end if
503  nrm = sqrt(alpha(m))
504  call cmult(alpha, -1.0_rp,m)
505  if (neko_bcknd_opencl .eq. 1)then
506  do i = 1, m - 1
507  call device_add2s2(xx_d(m),xx_d(i),alpha(i), n)
508  call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
509 
510  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
511  end do
512  else
513  call device_memcpy(alpha, alpha_d, this%m, &
514  host_to_device, sync=.false.)
515  call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
516  call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
517 
518  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
519  end if
520  call cmult(alpha, -1.0_rp,m)
521  if (neko_bcknd_opencl .eq. 1)then
522  do i = 1, m - 1
523  call device_add2s2(xx_d(m),xx_d(i),alpha(i),n)
524  call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
525  alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
526  end do
527  else
528  call device_memcpy(alpha, alpha_d, m, &
529  host_to_device, sync=.false.)
530  call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
531  call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
532  call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
533  end if
534  end if
535 
536  alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
537  alpha(m) = sqrt(alpha(m))
538 
539  if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
540  scl = 1.0_rp / alpha(m)
541  call device_cmult(xx_d(m), scl, n)
542  call device_cmult(bb_d(m), scl, n)
543 
544 
545  else !New vector is not linearly independent, forget about it
546  if(pe_rank .eq. 0) then
547  call neko_warning('New vector not linearly indepependent!')
548  end if
549  m = m - 1 !Remove column
550  endif
551 
552  end associate
553 
554  end subroutine device_proj_ortho
555 
556 
557  subroutine cpu_proj_ortho(this, xx, bb, w, n)
558  type(projection_t) :: this
559  integer, intent(inout) :: n
560  real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
561  real(kind=rp), dimension(n), intent(inout) :: w
562  real(kind=rp) :: nrm, scl1, scl2, c, s
563  real(kind=rp) :: alpha(this%L), beta(this%L)
564  integer :: i, j, k, l, h, ierr
565 
566  associate(m => this%m)
567 
568  if(m .le. 0) return !No vectors to ortho-normalize
569 
570  ! AX = B
571  ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
572  call rzero(alpha, m)
573  do i = 1, n, neko_blk_size
574  j = min(neko_blk_size, n-i+1)
575  do k = 1, m !First round CGS
576  s = 0.0_rp
577  c = 0.0_rp
578  do l = 0, (j-1)
579  s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
580  c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
581  end do
582  alpha(k) = alpha(k) + 0.5_rp * (s + c)
583  end do
584  end do
585 
586  call mpi_allreduce(mpi_in_place, alpha, this%m, &
587  mpi_real_precision, mpi_sum, neko_comm, ierr)
588 
589  nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
590 
591 
592  do i = 1, n, neko_blk_size
593  j = min(neko_blk_size, n-i+1)
594  do k = 1,m-1
595  do l = 0, (j-1)
596  xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
597  bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
598  end do
599  end do
600  end do
601  call rzero(beta,m)
602 
603  do i = 1, n, neko_blk_size
604  j = min(neko_blk_size, n-i+1)
605  do k = 1,m-1
606  s = 0.0_rp
607  c = 0.0_rp
608  do l = 0, (j-1)
609  s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
610  c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
611  end do
612  beta(k) = beta(k) + 0.5_rp * (s + c)
613  end do
614  end do
615 
616  call mpi_allreduce(mpi_in_place, beta, this%m-1, &
617  mpi_real_precision, mpi_sum, neko_comm, ierr)
618 
619  alpha(m) = 0.0_rp
620 
621  do i = 1, n, neko_blk_size
622  j = min(neko_blk_size,n-i+1)
623  do k = 1, m-1
624  do l = 0, (j-1)
625  xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
626  bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
627  end do
628  end do
629  s = 0.0_rp
630  do l = 0, (j-1)
631  s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
632  end do
633  alpha(m) = alpha(m) + s
634  end do
635  do k = 1, m-1
636  alpha(k) = alpha(k) + beta(k)
637  end do
638 
639  !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
640  call mpi_allreduce(mpi_in_place, alpha(m), 1, &
641  mpi_real_precision, mpi_sum, neko_comm, ierr)
642  alpha(m) = sqrt(alpha(m))
643  !dx and db now stored in last column of xx and bb
644 
645  if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
646  !Normalize dx and db
647  scl1 = 1.0_rp / alpha(m)
648  do i = 0, (n - 1)
649  xx(1+i,m) = scl1 * xx(1+i,m)
650  bb(1+i,m) = scl1 * bb(1+i,m)
651  end do
652 
653  else !New vector is not linearly independent, forget about it
654  k = m !location of rank deficient column
655  if(pe_rank .eq. 0) then
656  call neko_warning('New vector not linearly indepependent!')
657  end if
658  m = m - 1 !Remove column
659  endif
660 
661  end associate
662 
663  end subroutine cpu_proj_ortho
664 
665  subroutine print_proj_info(this,string)
666  class(projection_t) :: this
667  character(len=*) :: string
668  character(len=LOG_SIZE) :: log_buf
669 
670  if (this%proj_m .gt. 0) then
671  write(log_buf, '(A,A)') 'Projection ', string
672  call neko_log%message(log_buf)
673  write(log_buf, '(A,A)') 'Proj. vec.:',' Orig. residual:'
674  call neko_log%message(log_buf)
675  write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
676  call neko_log%message(log_buf)
677  end if
678 
679  end subroutine print_proj_info
680 
681  subroutine bcknd_clear(this, n)
682  class(projection_t) :: this
683  integer, intent(in) :: n
684  integer :: i, j
685 
686  this%m = 0
687  this%proj_m = 0
688 
689  do i = 1, this%L
690  if (neko_bcknd_device .eq. 1) then
691  call device_rzero(this%xx_d(i), n)
692  call device_rzero(this%xx_d(i), n)
693  else
694  do j = 1, n
695  this%xx(j,i) = 0.0_rp
696  this%bb(j,i) = 0.0_rp
697  end do
698  end if
699  end do
700 
701  end subroutine bcknd_clear
702 
703 end module projection
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
subroutine, public bc_list_apply_scalar(bclst, x, n, t, tstep)
Apply a list of boundary conditions to a scalar field.
Definition: bc.f90:530
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:23
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Definition: device_math.F90:76
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition: device.F90:164
Gather-scatter.
Logging routines.
Definition: log.f90:34
type(log_t), public neko_log
Global log stream.
Definition: log.f90:65
integer, parameter, public log_size
Definition: log.f90:42
Definition: math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition: math.f90:311
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:895
subroutine, public add2(a, b, n)
Vector addition .
Definition: math.f90:587
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Profiling interface.
Definition: profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition: profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition: profiler.F90:109
Project x onto X, the space of old solutions and back again.
Definition: projection.f90:63
subroutine projection_init(this, n, L, activ_step)
Definition: projection.f90:116
subroutine device_project_on(this, b, coef, n)
Definition: projection.f90:407
subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
Definition: projection.f90:254
subroutine projection_free(this)
Definition: projection.f90:182
subroutine bcknd_clear(this, n)
Definition: projection.f90:682
subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
Definition: projection.f90:288
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
Definition: projection.f90:479
subroutine cpu_proj_ortho(this, xx, bb, w, n)
Definition: projection.f90:558
subroutine bcknd_project_on(this, b, coef, n)
Definition: projection.f90:274
subroutine cpu_project_on(this, b, coef, n)
Definition: projection.f90:336
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
Definition: projection.f90:223
subroutine print_proj_info(this, string)
Definition: projection.f90:666
Implements type time_step_controller.
Utilities.
Definition: utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition: utils.f90:245
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Provides a tool to set time step dt.