Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use comm
70 use gather_scatter, only : gs_t, gs_op_add
72 use device
78 use logger, only : log_size, neko_log
79 use utils, only : neko_warning
80 use, intrinsic :: iso_c_binding
82
83 implicit none
84 private
85
86 type, public :: projection_t
87 real(kind=rp), allocatable :: xx(:,:)
88 real(kind=rp), allocatable :: bb(:,:)
89 real(kind=rp), allocatable :: xbar(:)
90 type(c_ptr), allocatable :: xx_d(:)
91 type(c_ptr), allocatable :: bb_d(:)
92 type(c_ptr) :: xbar_d = c_null_ptr
93 type(c_ptr) :: alpha_d = c_null_ptr
94 type(c_ptr) :: xx_d_d = c_null_ptr
95 type(c_ptr) :: bb_d_d = c_null_ptr
96 integer :: m, l
97 real(kind=rp) :: tol = 1e-7_rp
98 !logging variables
99 real(kind=rp) :: proj_res
100 integer :: proj_m = 0
101 integer :: activ_step ! steps to activate projection
102 contains
103 procedure, pass(this) :: clear => bcknd_clear
104 procedure, pass(this) :: project_on => bcknd_project_on
105 procedure, pass(this) :: project_back => bcknd_project_back
106 procedure, pass(this) :: log_info => print_proj_info
107 procedure, pass(this) :: init => projection_init
108 procedure, pass(this) :: free => projection_free
109 procedure, pass(this) :: pre_solving => projection_pre_solving
110 procedure, pass(this) :: post_solving => projection_post_solving
111 end type projection_t
112
113contains
114
115 subroutine projection_init(this, n, L, activ_step)
116 class(projection_t), target, intent(inout) :: this
117 integer, intent(in) :: n
118 integer, optional, intent(in) :: L, activ_step
119 integer :: i
120 integer(c_size_t) :: ptr_size
121 type(c_ptr) :: ptr
122 real(c_rp) :: dummy
123
124 call this%free()
125
126 if (present(l)) then
127 this%L = l
128 else
129 this%L = 20
130 end if
131
132 if (present(activ_step)) then
133 this%activ_step = activ_step
134 else
135 this%activ_step = 5
136 end if
137
138 this%m = 0
139
140 allocate(this%xx(n,this%L))
141 allocate(this%bb(n,this%L))
142 allocate(this%xbar(n))
143 allocate(this%xx_d(this%L))
144 allocate(this%bb_d(this%L))
145 call rzero(this%xbar,n)
146 do i = 1, this%L
147 call rzero(this%xx(1,i),n)
148 call rzero(this%bb(1,i),n)
149 end do
150 if (neko_bcknd_device .eq. 1) then
151
152 call device_map(this%xbar, this%xbar_d,n)
153 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L,c_size_t))
154
155 call device_rzero(this%xbar_d, n)
156 call device_rzero(this%alpha_d, this%L)
157
158 do i = 1, this%L
159 this%xx_d(i) = c_null_ptr
160 call device_map(this%xx(:,i), this%xx_d(i), n)
161 call device_rzero(this%xx_d(i), n)
162 this%bb_d(i) = c_null_ptr
163 call device_map(this%bb(:,i), this%bb_d(i), n)
164 call device_rzero(this%bb_d(i), n)
165 end do
166
167 ptr_size = c_sizeof(c_null_ptr) * this%L
168 call device_alloc(this%xx_d_d, ptr_size)
169 ptr = c_loc(this%xx_d)
170 call device_memcpy(ptr,this%xx_d_d, ptr_size, &
171 host_to_device, sync=.false.)
172 call device_alloc(this%bb_d_d, ptr_size)
173 ptr = c_loc(this%bb_d)
174 call device_memcpy(ptr,this%bb_d_d, ptr_size, &
175 host_to_device, sync=.false.)
176 end if
177
178
179 end subroutine projection_init
180
181 subroutine projection_free(this)
182 class(projection_t), intent(inout) :: this
183 integer :: i
184 if (allocated(this%xx)) then
185 deallocate(this%xx)
186 end if
187 if (allocated(this%bb)) then
188 deallocate(this%bb)
189 end if
190 if (allocated(this%xbar)) then
191 deallocate(this%xbar)
192 end if
193 if (allocated(this%xx_d)) then
194 do i = 1, this%L
195 if (c_associated(this%xx_d(i))) then
196 call device_free(this%xx_d(i))
197 end if
198 end do
199 end if
200 if (c_associated(this%xx_d_d)) then
201 call device_free(this%xx_d_d)
202 end if
203 if (c_associated(this%xbar_d)) then
204 call device_free(this%xbar_d)
205 end if
206 if (c_associated(this%alpha_d)) then
207 call device_free(this%alpha_d)
208 end if
209 if (allocated(this%bb_d)) then
210 do i = 1, this%L
211 if (c_associated(this%bb_d(i))) then
212 call device_free(this%bb_d(i))
213 end if
214 end do
215 end if
216 if (c_associated(this%bb_d_d)) then
217 call device_free(this%bb_d_d)
218 end if
219
220 end subroutine projection_free
221
222 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
223 class(projection_t), intent(inout) :: this
224 integer, intent(inout) :: n
225 real(kind=rp), intent(inout), dimension(n) :: b
226 integer, intent(in) :: tstep
227 class(coef_t), intent(inout) :: coef
228 type(time_step_controller_t), intent(in) :: dt_controller
229 character(len=*), optional :: string
230
231 if( tstep .gt. this%activ_step .and. this%L .gt. 0) then
232 if (dt_controller%if_variable_dt) then
233 if (dt_controller%dt_last_change .eq. 0) then ! the time step at which dt is changed
234 call this%clear(n)
235 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
236 ! activate projection some steps after dt is changed
237 ! note that dt_last_change start from 0
238 call this%project_on(b, coef, n)
239 if (present(string)) then
240 call this%log_info(string)
241 end if
242 end if
243 else
244 call this%project_on(b, coef, n)
245 if (present(string)) then
246 call this%log_info(string)
247 end if
248 end if
249 end if
250
251 end subroutine projection_pre_solving
252
253 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
254 class(projection_t), intent(inout) :: this
255 integer, intent(inout) :: n
256 class(ax_t), intent(inout) :: Ax
257 class(coef_t), intent(inout) :: coef
258 class(bc_list_t), intent(inout) :: bclst
259 type(gs_t), intent(inout) :: gs_h
260 real(kind=rp), intent(inout), dimension(n) :: x
261 integer, intent(in) :: tstep
262 type(time_step_controller_t), intent(in) :: dt_controller
263
264 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
265 if (.not.(dt_controller%if_variable_dt) .or. &
266 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
267 call this%project_back(x, ax, coef, bclst, gs_h, n)
268 end if
269 end if
270
271 end subroutine projection_post_solving
272
273 subroutine bcknd_project_on(this, b, coef, n)
274 class(projection_t), intent(inout) :: this
275 integer, intent(inout) :: n
276 class(coef_t), intent(inout) :: coef
277 real(kind=rp), intent(inout), dimension(n) :: b
278 call profiler_start_region('Project on', 16)
279 if (neko_bcknd_device .eq. 1) then
280 call device_project_on(this, b, coef, n)
281 else
282 call cpu_project_on(this, b, coef, n)
283 end if
284 call profiler_end_region('Project on', 16)
285 end subroutine bcknd_project_on
286
287 subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
288 class(projection_t) :: this
289 integer, intent(inout) :: n
290 class(ax_t), intent(inout) :: Ax
291 class(coef_t), intent(inout) :: coef
292 class(bc_list_t), intent(inout) :: bclst
293 type(gs_t), intent(inout) :: gs_h
294 real(kind=rp), intent(inout), dimension(n) :: x
295 type(c_ptr) :: x_d
296
297 call profiler_start_region('Project back', 17)
298
299 if (neko_bcknd_device .eq. 1) then
300 x_d = device_get_ptr(x)
301 if (this%m .gt. 0) call device_add2(x_d,this%xbar_d,n) ! Restore desired solution
302 if (this%m .eq. this%L) then
303 this%m = 1
304 else
305 this%m = min(this%m+1,this%L)
306 end if
307
308 call device_copy(this%xx_d(this%m),x_d,n) ! Update (X,B)
309
310 else
311 if (this%m.gt.0) call add2(x,this%xbar,n) ! Restore desired solution
312 if (this%m .eq. this%L) then
313 this%m = 1
314 else
315 this%m = min(this%m+1,this%L)
316 end if
317
318 call copy (this%xx(1,this%m),x,n) ! Update (X,B)
319 end if
320
321 call ax%compute(this%bb(1,this%m), x, coef, coef%msh, coef%Xh)
322 call gs_h%gs_op_vector(this%bb(1,this%m), n, gs_op_add)
323 call bclst%apply_scalar(this%bb(1,this%m), n)
324
325 if (neko_bcknd_device .eq. 1) then
326 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
327 else
328 call cpu_proj_ortho (this,this%xx,this%bb,coef%mult,n)
329 end if
330 call profiler_end_region('Project back', 17)
331 end subroutine bcknd_project_back
332
333
334
335 subroutine cpu_project_on(this, b, coef, n)
336 class(projection_t), intent(inout) :: this
337 integer, intent(inout) :: n
338 class(coef_t), intent(inout) :: coef
339 real(kind=rp), intent(inout), dimension(n) :: b
340 integer :: i, j, k, l, ierr
341 real(kind=rp) :: work(this%L), alpha(this%L), s
342
343 associate(xbar => this%xbar, xx => this%xx, &
344 bb => this%bb)
345
346 if (this%m .le. 0) return
347
348 !First round of CGS
349 call rzero(alpha, this%m)
350 this%proj_res = sqrt(glsc3(b,b,coef%mult,n)/coef%volume)
351 this%proj_m = this%m
352 do i = 1, n, neko_blk_size
353 j = min(neko_blk_size, n-i+1)
354 do k = 1, this%m
355 s = 0.0_rp
356 do l = 0, (j-1)
357 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
358 end do
359 alpha(k) = alpha(k) + s
360 end do
361 end do
362
363 !First one outside loop to avoid zeroing xbar and bbar
364 call mpi_allreduce(mpi_in_place, alpha, this%m, &
365 mpi_real_precision, mpi_sum, neko_comm, ierr)
366
367 call rzero(work, this%m)
368
369 do i = 1, n, neko_blk_size
370 j = min(neko_blk_size, n-i+1)
371 do l = 0, (j-1)
372 xbar(i+l) = alpha(1) * xx(i+l,1)
373 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
374 end do
375 do k = 2,this%m
376 do l = 0, (j-1)
377 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
378 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
379 end do
380 end do
381 !Second round of CGS
382 do k = 1, this%m
383 s = 0.0_rp
384 do l = 0, (j-1)
385 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
386 end do
387 work(k) = work(k) + s
388 end do
389 end do
390
391 call mpi_allreduce(work, alpha, this%m, &
392 mpi_real_precision, mpi_sum, neko_comm, ierr)
393
394 do i = 1, n, neko_blk_size
395 j = min(neko_blk_size, n-i+1)
396 do k = 1,this%m
397 do l = 0, (j-1)
398 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
399 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
400 end do
401 end do
402 end do
403 end associate
404 end subroutine cpu_project_on
405
406 subroutine device_project_on(this, b, coef, n)
407 class(projection_t), intent(inout) :: this
408 integer, intent(inout) :: n
409 class(coef_t), intent(inout) :: coef
410 real(kind=rp), intent(inout), dimension(n) :: b
411 real(kind=rp) :: alpha(this%L)
412 type(c_ptr) :: b_d
413 integer :: i
414 b_d = device_get_ptr(b)
415
416 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
417 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
418
419 if (this%m .le. 0) return
420
421
422
423 this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume)
424 this%proj_m = this%m
425 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
426 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
427 coef%mult_d, xbar_d, this%m, n)
428 else
429 if (neko_bcknd_opencl .eq. 1) then
430 do i = 1, this%m
431 alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
432 end do
433 else
434 call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
435 call device_memcpy(alpha, alpha_d, this%m, &
436 host_to_device, sync=.false.)
437 end if
438 call device_rzero(xbar_d, n)
439 if (neko_bcknd_opencl .eq. 1) then
440 do i = 1, this%m
441 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
442 end do
443 call cmult(alpha, -1.0_rp, this%m)
444 else
445 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
446 call device_cmult(alpha_d, -1.0_rp, this%m)
447 end if
448
449 if (neko_bcknd_opencl .eq. 1) then
450 do i = 1, this%m
451 call device_add2s2(b_d, bb_d(i), alpha(i), n)
452 alpha(i) = device_glsc3(b_d,xx_d(i),coef%mult_d,n)
453 end do
454 else
455 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
456 call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
457 call device_memcpy(alpha, alpha_d, this%m, &
458 host_to_device, sync=.false.)
459 end if
460
461 if (neko_bcknd_opencl .eq. 1) then
462 do i = 1, this%m
463 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
464 call cmult(alpha, -1.0_rp, this%m)
465 call device_add2s2(b_d, bb_d(i), alpha(i), n)
466 end do
467 else
468 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
469 call device_cmult(alpha_d, -1.0_rp, this%m)
470 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
471 end if
472 end if
473
474 end associate
475 end subroutine device_project_on
476
477 !This is a lot more primitive than on the CPU
478 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
479 type(projection_t) :: this
480 integer, intent(inout) :: n
481 type(c_ptr), dimension(this%L) :: xx_d, bb_d
482 type(c_ptr) :: w_d
483 real(kind=rp) :: nrm, scl
484 real(kind=rp) :: alpha(this%L)
485 integer :: i
486
487 associate(m => this%m, xx_d_d => this%xx_d_d, &
488 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
489
490 if(m .le. 0) return
491
492 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
493 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
494 w_d, xx_d(m), this%m, n, nrm)
495 else
496 if (neko_bcknd_opencl .eq. 1)then
497 do i = 1, m
498 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
499 end do
500 else
501 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
502 end if
503 nrm = sqrt(alpha(m))
504 call cmult(alpha, -1.0_rp,m)
505 if (neko_bcknd_opencl .eq. 1)then
506 do i = 1, m - 1
507 call device_add2s2(xx_d(m),xx_d(i),alpha(i), n)
508 call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
509
510 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
511 end do
512 else
513 call device_memcpy(alpha, alpha_d, this%m, &
514 host_to_device, sync=.false.)
515 call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
516 call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
517
518 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
519 end if
520 call cmult(alpha, -1.0_rp,m)
521 if (neko_bcknd_opencl .eq. 1)then
522 do i = 1, m - 1
523 call device_add2s2(xx_d(m),xx_d(i),alpha(i),n)
524 call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
525 alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
526 end do
527 else
528 call device_memcpy(alpha, alpha_d, m, &
529 host_to_device, sync=.false.)
530 call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
531 call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
532 call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
533 end if
534 end if
535
536 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
537 alpha(m) = sqrt(alpha(m))
538
539 if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
540 scl = 1.0_rp / alpha(m)
541 call device_cmult(xx_d(m), scl, n)
542 call device_cmult(bb_d(m), scl, n)
543
544
545 else !New vector is not linearly independent, forget about it
546 if(pe_rank .eq. 0) then
547 call neko_warning('New vector not linearly indepependent!')
548 end if
549 m = m - 1 !Remove column
550 endif
551
552 end associate
553
554 end subroutine device_proj_ortho
555
556
557 subroutine cpu_proj_ortho(this, xx, bb, w, n)
558 type(projection_t) :: this
559 integer, intent(inout) :: n
560 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
561 real(kind=rp), dimension(n), intent(inout) :: w
562 real(kind=rp) :: nrm, scl1, scl2, c, s
563 real(kind=rp) :: alpha(this%L), beta(this%L)
564 integer :: i, j, k, l, h, ierr
565
566 associate(m => this%m)
567
568 if(m .le. 0) return !No vectors to ortho-normalize
569
570 ! AX = B
571 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
572 call rzero(alpha, m)
573 do i = 1, n, neko_blk_size
574 j = min(neko_blk_size, n-i+1)
575 do k = 1, m !First round CGS
576 s = 0.0_rp
577 c = 0.0_rp
578 do l = 0, (j-1)
579 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
580 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
581 end do
582 alpha(k) = alpha(k) + 0.5_rp * (s + c)
583 end do
584 end do
585
586 call mpi_allreduce(mpi_in_place, alpha, this%m, &
587 mpi_real_precision, mpi_sum, neko_comm, ierr)
588
589 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
590
591
592 do i = 1, n, neko_blk_size
593 j = min(neko_blk_size, n-i+1)
594 do k = 1,m-1
595 do l = 0, (j-1)
596 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
597 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
598 end do
599 end do
600 end do
601 call rzero(beta,m)
602
603 do i = 1, n, neko_blk_size
604 j = min(neko_blk_size, n-i+1)
605 do k = 1,m-1
606 s = 0.0_rp
607 c = 0.0_rp
608 do l = 0, (j-1)
609 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
610 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
611 end do
612 beta(k) = beta(k) + 0.5_rp * (s + c)
613 end do
614 end do
615
616 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
617 mpi_real_precision, mpi_sum, neko_comm, ierr)
618
619 alpha(m) = 0.0_rp
620
621 do i = 1, n, neko_blk_size
622 j = min(neko_blk_size,n-i+1)
623 do k = 1, m-1
624 do l = 0, (j-1)
625 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
626 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
627 end do
628 end do
629 s = 0.0_rp
630 do l = 0, (j-1)
631 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
632 end do
633 alpha(m) = alpha(m) + s
634 end do
635 do k = 1, m-1
636 alpha(k) = alpha(k) + beta(k)
637 end do
638
639 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
640 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
641 mpi_real_precision, mpi_sum, neko_comm, ierr)
642 alpha(m) = sqrt(alpha(m))
643 !dx and db now stored in last column of xx and bb
644
645 if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
646 !Normalize dx and db
647 scl1 = 1.0_rp / alpha(m)
648 do i = 0, (n - 1)
649 xx(1+i,m) = scl1 * xx(1+i,m)
650 bb(1+i,m) = scl1 * bb(1+i,m)
651 end do
652
653 else !New vector is not linearly independent, forget about it
654 k = m !location of rank deficient column
655 if(pe_rank .eq. 0) then
656 call neko_warning('New vector not linearly indepependent!')
657 end if
658 m = m - 1 !Remove column
659 endif
660
661 end associate
662
663 end subroutine cpu_proj_ortho
664
665 subroutine print_proj_info(this,string)
666 class(projection_t) :: this
667 character(len=*) :: string
668 character(len=LOG_SIZE) :: log_buf
669
670 if (this%proj_m .gt. 0) then
671 write(log_buf, '(A,A)') 'Projection ', string
672 call neko_log%message(log_buf)
673 write(log_buf, '(A,A)') 'Proj. vec.:',' Orig. residual:'
674 call neko_log%message(log_buf)
675 write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
676 call neko_log%message(log_buf)
677 end if
678
679 end subroutine print_proj_info
680
681 subroutine bcknd_clear(this, n)
682 class(projection_t) :: this
683 integer, intent(in) :: n
684 integer :: i, j
685
686 this%m = 0
687 this%proj_m = 0
688
689 do i = 1, this%L
690 if (neko_bcknd_device .eq. 1) then
691 call device_rzero(this%xx_d(i), n)
692 call device_rzero(this%xx_d(i), n)
693 else
694 do j = 1, n
695 this%xx(j,i) = 0.0_rp
696 this%bb(j,i) = 0.0_rp
697 end do
698 end if
699 end do
700
701 end subroutine bcknd_clear
702
703end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:164
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:310
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:586
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:266
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55