Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
83 use comm, only : neko_comm, pe_rank, mpi_allreduce, mpi_in_place, &
84 mpi_sum, mpi_real_precision
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89
90 type, public :: projection_t
91 real(kind=rp), allocatable :: xx(:,:)
92 real(kind=rp), allocatable :: bb(:,:)
93 real(kind=rp), allocatable :: xbar(:)
94 type(c_ptr), allocatable :: xx_d(:)
95 type(c_ptr), allocatable :: bb_d(:)
96 type(c_ptr) :: xbar_d = c_null_ptr
97 type(c_ptr) :: alpha_d = c_null_ptr
98 type(c_ptr) :: xx_d_d = c_null_ptr
99 type(c_ptr) :: bb_d_d = c_null_ptr
100 integer :: m, l
101 real(kind=rp) :: tol = 1e-7_rp
102 !logging variables
103 real(kind=rp) :: proj_res
104 integer :: proj_m = 0
105 integer :: activ_step ! steps to activate projection
106 contains
107 procedure, pass(this) :: clear => bcknd_clear
108 procedure, pass(this) :: project_on => bcknd_project_on
109 procedure, pass(this) :: project_back => bcknd_project_back
110 procedure, pass(this) :: log_info => print_proj_info
111 procedure, pass(this) :: init => projection_init
112 procedure, pass(this) :: free => projection_free
113 procedure, pass(this) :: pre_solving => projection_pre_solving
114 procedure, pass(this) :: post_solving => projection_post_solving
115 end type projection_t
116
117contains
118
119 subroutine projection_init(this, n, L, activ_step)
120 class(projection_t), target, intent(inout) :: this
121 integer, intent(in) :: n
122 integer, optional, intent(in) :: L, activ_step
123 integer :: i
124 integer(c_size_t) :: ptr_size
125 type(c_ptr) :: ptr
126 real(c_rp) :: dummy
127
128 call this%free()
129
130 if (present(l)) then
131 this%L = l
132 else
133 this%L = 20
134 end if
135
136 if (present(activ_step)) then
137 this%activ_step = activ_step
138 else
139 this%activ_step = 5
140 end if
141
142 this%m = 0
143
144 allocate(this%xx(n, this%L))
145 allocate(this%bb(n, this%L))
146 allocate(this%xbar(n))
147 allocate(this%xx_d(this%L))
148 allocate(this%bb_d(this%L))
149 call rzero(this%xbar, n)
150 do i = 1, this%L
151 call rzero(this%xx(1, i), n)
152 call rzero(this%bb(1, i), n)
153 end do
154 if (neko_bcknd_device .eq. 1) then
155
156 call device_map(this%xbar, this%xbar_d, n)
157 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
158
159 call device_rzero(this%xbar_d, n)
160 call device_rzero(this%alpha_d, this%L)
161
162 do i = 1, this%L
163 this%xx_d(i) = c_null_ptr
164 call device_map(this%xx(:, i), this%xx_d(i), n)
165 call device_rzero(this%xx_d(i), n)
166 this%bb_d(i) = c_null_ptr
167 call device_map(this%bb(:, i), this%bb_d(i), n)
168 call device_rzero(this%bb_d(i), n)
169 end do
170
171 ptr_size = c_sizeof(c_null_ptr) * this%L
172 call device_alloc(this%xx_d_d, ptr_size)
173 ptr = c_loc(this%xx_d)
174 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
175 host_to_device, sync = .false.)
176 call device_alloc(this%bb_d_d, ptr_size)
177 ptr = c_loc(this%bb_d)
178 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
179 host_to_device, sync = .false.)
180 end if
181
182
183 end subroutine projection_init
184
185 subroutine projection_free(this)
186 class(projection_t), intent(inout) :: this
187 integer :: i
188 if (allocated(this%xx)) then
189 deallocate(this%xx)
190 end if
191 if (allocated(this%bb)) then
192 deallocate(this%bb)
193 end if
194 if (allocated(this%xbar)) then
195 deallocate(this%xbar)
196 end if
197 if (allocated(this%xx_d)) then
198 do i = 1, this%L
199 if (c_associated(this%xx_d(i))) then
200 call device_free(this%xx_d(i))
201 end if
202 end do
203 end if
204 if (c_associated(this%xx_d_d)) then
205 call device_free(this%xx_d_d)
206 end if
207 if (c_associated(this%xbar_d)) then
208 call device_free(this%xbar_d)
209 end if
210 if (c_associated(this%alpha_d)) then
211 call device_free(this%alpha_d)
212 end if
213 if (allocated(this%bb_d)) then
214 do i = 1, this%L
215 if (c_associated(this%bb_d(i))) then
216 call device_free(this%bb_d(i))
217 end if
218 end do
219 end if
220 if (c_associated(this%bb_d_d)) then
221 call device_free(this%bb_d_d)
222 end if
223
224 end subroutine projection_free
225
226 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
227 string)
228 class(projection_t), intent(inout) :: this
229 integer, intent(inout) :: n
230 real(kind=rp), intent(inout), dimension(n) :: b
231 integer, intent(in) :: tstep
232 class(coef_t), intent(inout) :: coef
233 type(time_step_controller_t), intent(in) :: dt_controller
234 character(len=*), optional :: string
235
236 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
237 if (dt_controller%if_variable_dt) then
238 ! the time step at which dt is changed
239 if (dt_controller%dt_last_change .eq. 0) then
240 call this%clear(n)
241 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
242 ! activate projection some steps after dt is changed
243 ! note that dt_last_change start from 0
244 call this%project_on(b, coef, n)
245 if (present(string)) then
246 call this%log_info(string)
247 end if
248 end if
249 else
250 call this%project_on(b, coef, n)
251 if (present(string)) then
252 call this%log_info(string)
253 end if
254 end if
255 end if
256
257 end subroutine projection_pre_solving
258
259 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
260 dt_controller)
261 class(projection_t), intent(inout) :: this
262 integer, intent(inout) :: n
263 class(ax_t), intent(inout) :: Ax
264 class(coef_t), intent(inout) :: coef
265 class(bc_list_t), intent(inout) :: bclst
266 type(gs_t), intent(inout) :: gs_h
267 real(kind=rp), intent(inout), dimension(n) :: x
268 integer, intent(in) :: tstep
269 type(time_step_controller_t), intent(in) :: dt_controller
270
271 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
272 if (.not.(dt_controller%if_variable_dt) .or. &
273 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
274 call this%project_back(x, ax, coef, bclst, gs_h, n)
275 end if
276 end if
277
278 end subroutine projection_post_solving
279
280 subroutine bcknd_project_on(this, b, coef, n)
281 class(projection_t), intent(inout) :: this
282 integer, intent(inout) :: n
283 class(coef_t), intent(inout) :: coef
284 real(kind=rp), intent(inout), dimension(n) :: b
285 call profiler_start_region('Project on', 16)
286 if (neko_bcknd_device .eq. 1) then
287 call device_project_on(this, b, coef, n)
288 else
289 call cpu_project_on(this, b, coef, n)
290 end if
291 call profiler_end_region('Project on', 16)
292 end subroutine bcknd_project_on
293
294 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
295 class(projection_t) :: this
296 integer, intent(inout) :: n
297 class(ax_t), intent(inout) :: Ax
298 class(coef_t), intent(inout) :: coef
299 class(bc_list_t), intent(inout) :: bclst
300 type(gs_t), intent(inout) :: gs_h
301 real(kind=rp), intent(inout), dimension(n) :: x
302 type(c_ptr) :: x_d
303
304 call profiler_start_region('Project back', 17)
305
306 if (neko_bcknd_device .eq. 1) then
307 x_d = device_get_ptr(x)
308 ! Restore desired solution
309 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
310 if (this%m .eq. this%L) then
311 this%m = 1
312 else
313 this%m = min(this%m+1, this%L)
314 end if
315
316 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
317
318 else
319 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
320 if (this%m .eq. this%L) then
321 this%m = 1
322 else
323 this%m = min(this%m+1, this%L)
324 end if
325
326 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
327 end if
328
329 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
330 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
331 call bclst%apply_scalar(this%bb(1, this%m), n)
332
333 if (neko_bcknd_device .eq. 1) then
334 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
335 else
336 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
337 end if
338 call profiler_end_region('Project back', 17)
339 end subroutine bcknd_project_back
340
341
342
343 subroutine cpu_project_on(this, b, coef, n)
344 class(projection_t), intent(inout) :: this
345 integer, intent(inout) :: n
346 class(coef_t), intent(inout) :: coef
347 real(kind=rp), intent(inout), dimension(n) :: b
348 integer :: i, j, k, l, ierr
349 real(kind=rp) :: work(this%L), alpha(this%L), s
350
351 associate(xbar => this%xbar, xx => this%xx, &
352 bb => this%bb)
353
354 if (this%m .le. 0) return
355
356 !First round of CGS
357 call rzero(alpha, this%m)
358 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
359 this%proj_m = this%m
360 do i = 1, n, neko_blk_size
361 j = min(neko_blk_size, n-i+1)
362 do k = 1, this%m
363 s = 0.0_rp
364 do l = 0, (j-1)
365 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
366 end do
367 alpha(k) = alpha(k) + s
368 end do
369 end do
370
371 !First one outside loop to avoid zeroing xbar and bbar
372 call mpi_allreduce(mpi_in_place, alpha, this%m, &
373 mpi_real_precision, mpi_sum, neko_comm, ierr)
374
375 call rzero(work, this%m)
376
377 do i = 1, n, neko_blk_size
378 j = min(neko_blk_size, n-i+1)
379 do l = 0, (j-1)
380 xbar(i+l) = alpha(1) * xx(i+l,1)
381 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
382 end do
383 do k = 2, this%m
384 do l = 0, (j-1)
385 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
386 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
387 end do
388 end do
389 !Second round of CGS
390 do k = 1, this%m
391 s = 0.0_rp
392 do l = 0, (j-1)
393 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
394 end do
395 work(k) = work(k) + s
396 end do
397 end do
398
399 call mpi_allreduce(work, alpha, this%m, &
400 mpi_real_precision, mpi_sum, neko_comm, ierr)
401
402 do i = 1, n, neko_blk_size
403 j = min(neko_blk_size, n-i+1)
404 do k = 1, this%m
405 do l = 0, (j-1)
406 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
407 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
408 end do
409 end do
410 end do
411 end associate
412 end subroutine cpu_project_on
413
414 subroutine device_project_on(this, b, coef, n)
415 class(projection_t), intent(inout) :: this
416 integer, intent(inout) :: n
417 class(coef_t), intent(inout) :: coef
418 real(kind=rp), intent(inout), dimension(n) :: b
419 real(kind=rp) :: alpha(this%L)
420 type(c_ptr) :: b_d
421 integer :: i
422 b_d = device_get_ptr(b)
423
424 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
425 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
426
427 if (this%m .le. 0) return
428
429
430
431 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
432 this%proj_m = this%m
433 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
434 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
435 coef%mult_d, xbar_d, this%m, n)
436 else
437 if (neko_bcknd_opencl .eq. 1) then
438 do i = 1, this%m
439 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
440 end do
441 else
442 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
443 call device_memcpy(alpha, alpha_d, this%m, &
444 host_to_device, sync = .false.)
445 end if
446 call device_rzero(xbar_d, n)
447 if (neko_bcknd_opencl .eq. 1) then
448 do i = 1, this%m
449 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
450 end do
451 call cmult(alpha, -1.0_rp, this%m)
452 else
453 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
454 call device_cmult(alpha_d, -1.0_rp, this%m)
455 end if
456
457 if (neko_bcknd_opencl .eq. 1) then
458 do i = 1, this%m
459 call device_add2s2(b_d, bb_d(i), alpha(i), n)
460 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
461 end do
462 else
463 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
464 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
465 call device_memcpy(alpha, alpha_d, this%m, &
466 host_to_device, sync = .false.)
467 end if
468
469 if (neko_bcknd_opencl .eq. 1) then
470 do i = 1, this%m
471 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
472 call cmult(alpha, -1.0_rp, this%m)
473 call device_add2s2(b_d, bb_d(i), alpha(i), n)
474 end do
475 else
476 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
477 call device_cmult(alpha_d, -1.0_rp, this%m)
478 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
479 end if
480 end if
481
482 end associate
483 end subroutine device_project_on
484
485 !This is a lot more primitive than on the CPU
486 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
487 type(projection_t) :: this
488 integer, intent(inout) :: n
489 type(c_ptr), dimension(this%L) :: xx_d, bb_d
490 type(c_ptr) :: w_d
491 real(kind=rp) :: nrm, scl
492 real(kind=rp) :: alpha(this%L)
493 integer :: i
494
495 associate(m => this%m, xx_d_d => this%xx_d_d, &
496 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
497
498 if (m .le. 0) return
499
500 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
501 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
502 w_d, xx_d(m), this%m, n, nrm)
503 else
504 if (neko_bcknd_opencl .eq. 1)then
505 do i = 1, m
506 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
507 end do
508 else
509 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
510 end if
511 nrm = sqrt(alpha(m))
512 call cmult(alpha, -1.0_rp,m)
513 if (neko_bcknd_opencl .eq. 1)then
514 do i = 1, m - 1
515 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
516 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
517
518 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
519 end do
520 else
521 call device_memcpy(alpha, alpha_d, this%m, &
522 host_to_device, sync = .false.)
523 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
524 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
525
526 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
527 end if
528 call cmult(alpha, -1.0_rp,m)
529 if (neko_bcknd_opencl .eq. 1)then
530 do i = 1, m - 1
531 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
532 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
533 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
534 end do
535 else
536 call device_memcpy(alpha, alpha_d, m, &
537 host_to_device, sync = .false.)
538 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
539 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
540 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
541 end if
542 end if
543
544 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
545 alpha(m) = sqrt(alpha(m))
546
547 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
548 scl = 1.0_rp / alpha(m)
549 call device_cmult(xx_d(m), scl, n)
550 call device_cmult(bb_d(m), scl, n)
551
552
553 else !New vector is not linearly independent, forget about it
554 if (pe_rank .eq. 0) then
555 call neko_warning('New vector not linearly indepependent!')
556 end if
557 m = m - 1 !Remove column
558 end if
559
560 end associate
561
562 end subroutine device_proj_ortho
563
564
565 subroutine cpu_proj_ortho(this, xx, bb, w, n)
566 type(projection_t) :: this
567 integer, intent(inout) :: n
568 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
569 real(kind=rp), dimension(n), intent(inout) :: w
570 real(kind=rp) :: nrm, scl1, scl2, c, s
571 real(kind=rp) :: alpha(this%L), beta(this%L)
572 integer :: i, j, k, l, h, ierr
573
574 associate(m => this%m)
575
576 if (m .le. 0) return !No vectors to ortho-normalize
577
578 ! AX = B
579 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
580 call rzero(alpha, m)
581 do i = 1, n, neko_blk_size
582 j = min(neko_blk_size, n-i+1)
583 do k = 1, m !First round CGS
584 s = 0.0_rp
585 c = 0.0_rp
586 do l = 0, (j-1)
587 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
588 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
589 end do
590 alpha(k) = alpha(k) + 0.5_rp * (s + c)
591 end do
592 end do
593
594 call mpi_allreduce(mpi_in_place, alpha, this%m, &
595 mpi_real_precision, mpi_sum, neko_comm, ierr)
596
597 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
598
599
600 do i = 1, n, neko_blk_size
601 j = min(neko_blk_size, n-i+1)
602 do k = 1, m-1
603 do l = 0, (j-1)
604 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
605 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
606 end do
607 end do
608 end do
609 call rzero(beta,m)
610
611 do i = 1, n, neko_blk_size
612 j = min(neko_blk_size, n-i+1)
613 do k = 1, m-1
614 s = 0.0_rp
615 c = 0.0_rp
616 do l = 0, (j-1)
617 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
618 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
619 end do
620 beta(k) = beta(k) + 0.5_rp * (s + c)
621 end do
622 end do
623
624 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
625 mpi_real_precision, mpi_sum, neko_comm, ierr)
626
627 alpha(m) = 0.0_rp
628
629 do i = 1, n, neko_blk_size
630 j = min(neko_blk_size,n-i+1)
631 do k = 1, m-1
632 do l = 0, (j-1)
633 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
634 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
635 end do
636 end do
637 s = 0.0_rp
638 do l = 0, (j-1)
639 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
640 end do
641 alpha(m) = alpha(m) + s
642 end do
643 do k = 1, m-1
644 alpha(k) = alpha(k) + beta(k)
645 end do
646
647 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
648 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
649 mpi_real_precision, mpi_sum, neko_comm, ierr)
650 alpha(m) = sqrt(alpha(m))
651 !dx and db now stored in last column of xx and bb
652
653 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
654 !Normalize dx and db
655 scl1 = 1.0_rp / alpha(m)
656 do i = 0, (n - 1)
657 xx(1+i,m) = scl1 * xx(1+i,m)
658 bb(1+i,m) = scl1 * bb(1+i,m)
659 end do
660
661 else !New vector is not linearly independent, forget about it
662 k = m !location of rank deficient column
663 if (pe_rank .eq. 0) then
664 call neko_warning('New vector not linearly indepependent!')
665 end if
666 m = m - 1 !Remove column
667 end if
668
669 end associate
670
671 end subroutine cpu_proj_ortho
672
673 subroutine print_proj_info(this, string)
674 class(projection_t) :: this
675 character(len=*) :: string
676 character(len=LOG_SIZE) :: log_buf
677
678 if (this%proj_m .gt. 0) then
679 write(log_buf, '(A,A)') 'Projection ', string
680 call neko_log%message(log_buf)
681 write(log_buf, '(A,A)') 'Proj. vec.:', ' Orig. residual:'
682 call neko_log%message(log_buf)
683 write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res
684 call neko_log%message(log_buf)
685 end if
686
687 end subroutine print_proj_info
688
689 subroutine bcknd_clear(this, n)
690 class(projection_t) :: this
691 integer, intent(in) :: n
692 integer :: i, j
693
694 this%m = 0
695 this%proj_m = 0
696
697 do i = 1, this%L
698 if (neko_bcknd_device .eq. 1) then
699 call device_rzero(this%xx_d(i), n)
700 call device_rzero(this%xx_d(i), n)
701 else
702 do j = 1, n
703 this%xx(j,i) = 0.0_rp
704 this%bb(j,i) = 0.0_rp
705 end do
706 end if
707 end do
708
709 end subroutine bcknd_clear
710
711end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:92
Map a Fortran array to a device (allocate and associate)
Definition device.F90:68
Copy data between host and device (or device and device)
Definition device.F90:62
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:197
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:176
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:310
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:586
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:266
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55