Loading [MathJax]/jax/output/HTML-CSS/config.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
83 use comm, only : neko_comm, pe_rank, mpi_allreduce, mpi_in_place, &
84 mpi_sum, mpi_real_precision
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 contains
108 procedure, pass(this) :: clear => bcknd_clear
109 procedure, pass(this) :: project_on => bcknd_project_on
110 procedure, pass(this) :: project_back => bcknd_project_back
111 procedure, pass(this) :: log_info => print_proj_info
112 procedure, pass(this) :: init => projection_init
113 procedure, pass(this) :: free => projection_free
114 procedure, pass(this) :: pre_solving => projection_pre_solving
115 procedure, pass(this) :: post_solving => projection_post_solving
116 end type projection_t
117
118contains
119
120 subroutine projection_init(this, n, L, activ_step)
121 class(projection_t), target, intent(inout) :: this
122 integer, intent(in) :: n
123 integer, optional, intent(in) :: L, activ_step
124 integer :: i
125 integer(c_size_t) :: ptr_size
126 type(c_ptr) :: ptr
127 real(c_rp) :: dummy
128
129 call this%free()
130
131 if (present(l)) then
132 this%L = l
133 else
134 this%L = 20
135 end if
136
137 if (present(activ_step)) then
138 this%activ_step = activ_step
139 else
140 this%activ_step = 5
141 end if
142
143 this%m = 0
144
145 allocate(this%xx(n, this%L))
146 allocate(this%bb(n, this%L))
147 allocate(this%xbar(n))
148 allocate(this%xx_d(this%L))
149 allocate(this%bb_d(this%L))
150 call rzero(this%xbar, n)
151 do i = 1, this%L
152 call rzero(this%xx(1, i), n)
153 call rzero(this%bb(1, i), n)
154 end do
155 if (neko_bcknd_device .eq. 1) then
156
157 call device_map(this%xbar, this%xbar_d, n)
158 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
159
160 call device_rzero(this%xbar_d, n)
161 call device_rzero(this%alpha_d, this%L)
162
163 do i = 1, this%L
164 this%xx_d(i) = c_null_ptr
165 call device_map(this%xx(:, i), this%xx_d(i), n)
166 call device_rzero(this%xx_d(i), n)
167 this%bb_d(i) = c_null_ptr
168 call device_map(this%bb(:, i), this%bb_d(i), n)
169 call device_rzero(this%bb_d(i), n)
170 end do
171
172 ptr_size = c_sizeof(c_null_ptr) * this%L
173 call device_alloc(this%xx_d_d, ptr_size)
174 ptr = c_loc(this%xx_d)
175 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
176 host_to_device, sync = .false.)
177 call device_alloc(this%bb_d_d, ptr_size)
178 ptr = c_loc(this%bb_d)
179 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
180 host_to_device, sync = .false.)
181 end if
182
183
184 end subroutine projection_init
185
186 subroutine projection_free(this)
187 class(projection_t), intent(inout) :: this
188 integer :: i
189 if (allocated(this%xx)) then
190 deallocate(this%xx)
191 end if
192 if (allocated(this%bb)) then
193 deallocate(this%bb)
194 end if
195 if (allocated(this%xbar)) then
196 deallocate(this%xbar)
197 end if
198 if (allocated(this%xx_d)) then
199 do i = 1, this%L
200 if (c_associated(this%xx_d(i))) then
201 call device_free(this%xx_d(i))
202 end if
203 end do
204 end if
205 if (c_associated(this%xx_d_d)) then
206 call device_free(this%xx_d_d)
207 end if
208 if (c_associated(this%xbar_d)) then
209 call device_free(this%xbar_d)
210 end if
211 if (c_associated(this%alpha_d)) then
212 call device_free(this%alpha_d)
213 end if
214 if (allocated(this%bb_d)) then
215 do i = 1, this%L
216 if (c_associated(this%bb_d(i))) then
217 call device_free(this%bb_d(i))
218 end if
219 end do
220 end if
221 if (c_associated(this%bb_d_d)) then
222 call device_free(this%bb_d_d)
223 end if
224
225 end subroutine projection_free
226
227 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
228 string)
229 class(projection_t), intent(inout) :: this
230 integer, intent(inout) :: n
231 real(kind=rp), intent(inout), dimension(n) :: b
232 integer, intent(in) :: tstep
233 class(coef_t), intent(inout) :: coef
234 type(time_step_controller_t), intent(in) :: dt_controller
235 character(len=*), optional :: string
236
237 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
238 if (dt_controller%if_variable_dt) then
239 ! the time step at which dt is changed
240 if (dt_controller%dt_last_change .eq. 0) then
241 call this%clear(n)
242 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
243 ! activate projection some steps after dt is changed
244 ! note that dt_last_change start from 0
245 call this%project_on(b, coef, n)
246 if (present(string)) then
247 call this%log_info(string)
248 end if
249 end if
250 else
251 call this%project_on(b, coef, n)
252 if (present(string)) then
253 call this%log_info(string)
254 end if
255 end if
256 end if
257
258 end subroutine projection_pre_solving
259
260 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
261 dt_controller)
262 class(projection_t), intent(inout) :: this
263 integer, intent(inout) :: n
264 class(ax_t), intent(inout) :: Ax
265 class(coef_t), intent(inout) :: coef
266 class(bc_list_t), intent(inout) :: bclst
267 type(gs_t), intent(inout) :: gs_h
268 real(kind=rp), intent(inout), dimension(n) :: x
269 integer, intent(in) :: tstep
270 type(time_step_controller_t), intent(in) :: dt_controller
271
272 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
273 if (.not.(dt_controller%if_variable_dt) .or. &
274 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
275 call this%project_back(x, ax, coef, bclst, gs_h, n)
276 end if
277 end if
278
279 end subroutine projection_post_solving
280
281 subroutine bcknd_project_on(this, b, coef, n)
282 class(projection_t), intent(inout) :: this
283 integer, intent(inout) :: n
284 class(coef_t), intent(inout) :: coef
285 real(kind=rp), intent(inout), dimension(n) :: b
286 call profiler_start_region('Project on', 16)
287 if (neko_bcknd_device .eq. 1) then
288 call device_project_on(this, b, coef, n)
289 else
290 call cpu_project_on(this, b, coef, n)
291 end if
292 call profiler_end_region('Project on', 16)
293 end subroutine bcknd_project_on
294
295 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
296 class(projection_t), intent(inout) :: this
297 integer, intent(inout) :: n
298 class(ax_t), intent(inout) :: Ax
299 class(coef_t), intent(inout) :: coef
300 class(bc_list_t), intent(inout) :: bclst
301 type(gs_t), intent(inout) :: gs_h
302 real(kind=rp), intent(inout), dimension(n) :: x
303 type(c_ptr) :: x_d
304
305 call profiler_start_region('Project back', 17)
306
307 if (neko_bcknd_device .eq. 1) then
308 x_d = device_get_ptr(x)
309 ! Restore desired solution
310 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
311 if (this%m .eq. this%L) then
312 this%m = 1
313 else
314 this%m = min(this%m+1, this%L)
315 end if
316
317 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
318
319 else
320 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
321 if (this%m .eq. this%L) then
322 this%m = 1
323 else
324 this%m = min(this%m+1, this%L)
325 end if
326
327 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
328 end if
329
330 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
331 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
332 call bclst%apply_scalar(this%bb(1, this%m), n)
333
334 call proj_ortho(this, coef, n)
335 call profiler_end_region('Project back', 17)
336 end subroutine bcknd_project_back
337
338
339
340 subroutine cpu_project_on(this, b, coef, n)
341 class(projection_t), intent(inout) :: this
342 integer, intent(inout) :: n
343 class(coef_t), intent(inout) :: coef
344 real(kind=rp), intent(inout), dimension(n) :: b
345 integer :: i, j, k, l, ierr
346 real(kind=rp) :: work(this%L), alpha(this%L), s
347
348 associate(xbar => this%xbar, xx => this%xx, &
349 bb => this%bb)
350
351 if (this%m .le. 0) return
352
353 !First round of CGS
354 call rzero(alpha, this%m)
355 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
356 this%proj_m = this%m
357 do i = 1, n, neko_blk_size
358 j = min(neko_blk_size, n-i+1)
359 do k = 1, this%m
360 s = 0.0_rp
361 do l = 0, (j-1)
362 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
363 end do
364 alpha(k) = alpha(k) + s
365 end do
366 end do
367
368 !First one outside loop to avoid zeroing xbar and bbar
369 call mpi_allreduce(mpi_in_place, alpha, this%m, &
370 mpi_real_precision, mpi_sum, neko_comm, ierr)
371
372 call rzero(work, this%m)
373
374 do i = 1, n, neko_blk_size
375 j = min(neko_blk_size, n-i+1)
376 do l = 0, (j-1)
377 xbar(i+l) = alpha(1) * xx(i+l,1)
378 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
379 end do
380 do k = 2, this%m
381 do l = 0, (j-1)
382 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
383 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
384 end do
385 end do
386 !Second round of CGS
387 do k = 1, this%m
388 s = 0.0_rp
389 do l = 0, (j-1)
390 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
391 end do
392 work(k) = work(k) + s
393 end do
394 end do
395
396 call mpi_allreduce(work, alpha, this%m, &
397 mpi_real_precision, mpi_sum, neko_comm, ierr)
398
399 do i = 1, n, neko_blk_size
400 j = min(neko_blk_size, n-i+1)
401 do k = 1, this%m
402 do l = 0, (j-1)
403 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
404 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
405 end do
406 end do
407 end do
408 end associate
409 end subroutine cpu_project_on
410
411 subroutine device_project_on(this, b, coef, n)
412 class(projection_t), intent(inout) :: this
413 integer, intent(inout) :: n
414 class(coef_t), intent(inout) :: coef
415 real(kind=rp), intent(inout), dimension(n) :: b
416 real(kind=rp) :: alpha(this%L)
417 type(c_ptr) :: b_d
418 integer :: i
419 b_d = device_get_ptr(b)
420
421 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
422 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
423
424 if (this%m .le. 0) return
425
426
427
428 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
429 this%proj_m = this%m
430 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
431 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
432 coef%mult_d, xbar_d, this%m, n)
433 else
434 if (neko_bcknd_opencl .eq. 1) then
435 do i = 1, this%m
436 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
437 end do
438 else
439 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
440 call device_memcpy(alpha, alpha_d, this%m, &
441 host_to_device, sync = .false.)
442 end if
443 call device_rzero(xbar_d, n)
444 if (neko_bcknd_opencl .eq. 1) then
445 do i = 1, this%m
446 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
447 end do
448 call cmult(alpha, -1.0_rp, this%m)
449 else
450 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
451 call device_cmult(alpha_d, -1.0_rp, this%m)
452 end if
453
454 if (neko_bcknd_opencl .eq. 1) then
455 do i = 1, this%m
456 call device_add2s2(b_d, bb_d(i), alpha(i), n)
457 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
458 end do
459 else
460 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
461 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
462 call device_memcpy(alpha, alpha_d, this%m, &
463 host_to_device, sync = .false.)
464 end if
465
466 if (neko_bcknd_opencl .eq. 1) then
467 do i = 1, this%m
468 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
469 call cmult(alpha, -1.0_rp, this%m)
470 call device_add2s2(b_d, bb_d(i), alpha(i), n)
471 end do
472 else
473 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
474 call device_cmult(alpha_d, -1.0_rp, this%m)
475 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
476 end if
477 end if
478
479 end associate
480 end subroutine device_project_on
481
482 !Choose between CPU or device for proj_ortho
483 subroutine proj_ortho(this, coef, n)
484 class(projection_t), intent(inout) :: this
485 type(coef_t), intent(in) :: coef
486 integer, intent(in) :: n
487
488 if (neko_bcknd_device .eq. 1) then
489 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
490 else
491 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
492 end if
493 end subroutine proj_ortho
494
495 !This is a lot more primitive than on the CPU
496 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
497 type(projection_t), intent(inout) :: this
498 integer, intent(in) :: n
499 type(c_ptr), dimension(this%L) :: xx_d, bb_d
500 type(c_ptr), intent(in) :: w_d
501 real(kind=rp) :: nrm, scl
502 real(kind=rp) :: alpha(this%L)
503 integer :: i
504
505 associate(m => this%m, xx_d_d => this%xx_d_d, &
506 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
507
508 if (m .le. 0) return
509
510 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
511 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
512 w_d, xx_d(m), this%m, n, nrm)
513 else
514 if (neko_bcknd_opencl .eq. 1)then
515 do i = 1, m
516 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
517 end do
518 else
519 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
520 end if
521 nrm = sqrt(alpha(m))
522 call cmult(alpha, -1.0_rp,m)
523 if (neko_bcknd_opencl .eq. 1)then
524 do i = 1, m - 1
525 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
526 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
527
528 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
529 end do
530 else
531 call device_memcpy(alpha, alpha_d, this%m, &
532 host_to_device, sync = .false.)
533 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
534 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
535
536 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
537 end if
538 call cmult(alpha, -1.0_rp,m)
539 if (neko_bcknd_opencl .eq. 1)then
540 do i = 1, m - 1
541 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
542 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
543 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
544 end do
545 else
546 call device_memcpy(alpha, alpha_d, m, &
547 host_to_device, sync = .false.)
548 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
549 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
550 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
551 end if
552 end if
553
554 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
555 alpha(m) = sqrt(alpha(m))
556
557 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
558 scl = 1.0_rp / alpha(m)
559 call device_cmult(xx_d(m), scl, n)
560 call device_cmult(bb_d(m), scl, n)
561
562
563 else !New vector is not linearly independent, forget about it
564 if (pe_rank .eq. 0) then
565 call neko_warning('New vector not linearly indepependent!')
566 end if
567 m = m - 1 !Remove column
568 end if
569
570 end associate
571
572 end subroutine device_proj_ortho
573
574
575 subroutine cpu_proj_ortho(this, xx, bb, w, n)
576 type(projection_t), intent(inout) :: this
577 integer, intent(in) :: n
578 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
579 real(kind=rp), dimension(n), intent(in) :: w
580 real(kind=rp) :: nrm, scl1, scl2, c, s
581 real(kind=rp) :: alpha(this%L), beta(this%L)
582 integer :: i, j, k, l, h, ierr
583
584 associate(m => this%m)
585
586 if (m .le. 0) return !No vectors to ortho-normalize
587
588 ! AX = B
589 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
590 call rzero(alpha, m)
591 do i = 1, n, neko_blk_size
592 j = min(neko_blk_size, n-i+1)
593 do k = 1, m !First round CGS
594 s = 0.0_rp
595 c = 0.0_rp
596 do l = 0, (j-1)
597 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
598 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
599 end do
600 alpha(k) = alpha(k) + 0.5_rp * (s + c)
601 end do
602 end do
603
604 call mpi_allreduce(mpi_in_place, alpha, this%m, &
605 mpi_real_precision, mpi_sum, neko_comm, ierr)
606
607 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
608
609
610 do i = 1, n, neko_blk_size
611 j = min(neko_blk_size, n-i+1)
612 do k = 1, m-1
613 do l = 0, (j-1)
614 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
615 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
616 end do
617 end do
618 end do
619 call rzero(beta,m)
620
621 do i = 1, n, neko_blk_size
622 j = min(neko_blk_size, n-i+1)
623 do k = 1, m-1
624 s = 0.0_rp
625 c = 0.0_rp
626 do l = 0, (j-1)
627 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
628 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
629 end do
630 beta(k) = beta(k) + 0.5_rp * (s + c)
631 end do
632 end do
633
634 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
635 mpi_real_precision, mpi_sum, neko_comm, ierr)
636
637 alpha(m) = 0.0_rp
638
639 do i = 1, n, neko_blk_size
640 j = min(neko_blk_size,n-i+1)
641 do k = 1, m-1
642 do l = 0, (j-1)
643 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
644 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
645 end do
646 end do
647 s = 0.0_rp
648 do l = 0, (j-1)
649 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
650 end do
651 alpha(m) = alpha(m) + s
652 end do
653 do k = 1, m-1
654 alpha(k) = alpha(k) + beta(k)
655 end do
656
657 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
658 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
659 mpi_real_precision, mpi_sum, neko_comm, ierr)
660 alpha(m) = sqrt(alpha(m))
661 !dx and db now stored in last column of xx and bb
662
663 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
664 !Normalize dx and db
665 scl1 = 1.0_rp / alpha(m)
666 do i = 0, (n - 1)
667 xx(1+i,m) = scl1 * xx(1+i,m)
668 bb(1+i,m) = scl1 * bb(1+i,m)
669 end do
670
671 else !New vector is not linearly independent, forget about it
672 k = m !location of rank deficient column
673 if (pe_rank .eq. 0) then
674 call neko_warning('New vector not linearly indepependent!')
675 end if
676 m = m - 1 !Remove column
677 end if
678
679 end associate
680
681 end subroutine cpu_proj_ortho
682
683 subroutine print_proj_info(this, string)
684 class(projection_t), intent(in) :: this
685 character(len=*), intent(in) :: string
686 character(len=LOG_SIZE) :: log_buf
687
688 if (this%proj_m .gt. 0) then
689 write(log_buf, '(A,A)') 'Projection ', string
690 call neko_log%message(log_buf)
691 write(log_buf, '(A,A)') 'Proj. vec.:', ' Orig. residual:'
692 call neko_log%message(log_buf)
693 write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
694 call neko_log%message(log_buf)
695 end if
696
697 end subroutine print_proj_info
698
699 subroutine bcknd_clear(this, n)
700 class(projection_t), intent(inout) :: this
701 integer, intent(in) :: n
702 integer :: i, j
703
704 this%m = 0
705 this%proj_m = 0
706
707 do i = 1, this%L
708 if (neko_bcknd_device .eq. 1) then
709 call device_rzero(this%xx_d(i), n)
710 call device_rzero(this%xx_d(i), n)
711 else
712 do j = 1, n
713 this%xx(j,i) = 0.0_rp
714 this%bb(j,i) = 0.0_rp
715 end do
716 end if
717 end do
718
719 end subroutine bcknd_clear
720
721end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:51
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:38
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:46
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:179
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:310
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:586
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:266
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55