Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
84 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 contains
108 procedure, pass(this) :: clear => bcknd_clear
109 procedure, pass(this) :: project_on => bcknd_project_on
110 procedure, pass(this) :: project_back => bcknd_project_back
111 procedure, pass(this) :: log_info => print_proj_info
112 procedure, pass(this) :: init => projection_init
113 procedure, pass(this) :: free => projection_free
114 procedure, pass(this) :: pre_solving => projection_pre_solving
115 procedure, pass(this) :: post_solving => projection_post_solving
116 end type projection_t
117
118contains
119
120 subroutine projection_init(this, n, L, activ_step)
121 class(projection_t), target, intent(inout) :: this
122 integer, intent(in) :: n
123 integer, intent(in) :: L
124 integer, optional, intent(in) :: activ_step
125 integer :: i
126 integer(c_size_t) :: ptr_size
127 type(c_ptr) :: ptr
128 real(c_rp) :: dummy
129
130 call this%free()
131
132 this%L = l
133
134 if (present(activ_step)) then
135 this%activ_step = activ_step
136 else
137 this%activ_step = 5
138 end if
139
140 this%m = 0
141
142 ! Return if the space is 0, to avoid allocating zero sized
143 ! arrays, which are not supported for all backends
144 if (this%L .le. 0) then
145 return
146 end if
147
148 allocate(this%xx(n, this%L))
149 allocate(this%bb(n, this%L))
150 allocate(this%xbar(n))
151 allocate(this%xx_d(this%L))
152 allocate(this%bb_d(this%L))
153 call rzero(this%xbar, n)
154 do i = 1, this%L
155 call rzero(this%xx(1, i), n)
156 call rzero(this%bb(1, i), n)
157 end do
158 if (neko_bcknd_device .eq. 1) then
159
160 call device_map(this%xbar, this%xbar_d, n)
161 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
162
163 call device_rzero(this%xbar_d, n)
164 call device_rzero(this%alpha_d, this%L)
165
166 do i = 1, this%L
167 this%xx_d(i) = c_null_ptr
168 call device_map(this%xx(:, i), this%xx_d(i), n)
169 call device_rzero(this%xx_d(i), n)
170 this%bb_d(i) = c_null_ptr
171 call device_map(this%bb(:, i), this%bb_d(i), n)
172 call device_rzero(this%bb_d(i), n)
173 end do
174
175 ptr_size = c_sizeof(c_null_ptr) * this%L
176 call device_alloc(this%xx_d_d, ptr_size)
177 ptr = c_loc(this%xx_d)
178 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
179 host_to_device, sync = .false.)
180 call device_alloc(this%bb_d_d, ptr_size)
181 ptr = c_loc(this%bb_d)
182 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
183 host_to_device, sync = .false.)
184 end if
185
186
187 end subroutine projection_init
188
189 subroutine projection_free(this)
190 class(projection_t), intent(inout) :: this
191 integer :: i
192 if (allocated(this%xx)) then
193 deallocate(this%xx)
194 end if
195 if (allocated(this%bb)) then
196 deallocate(this%bb)
197 end if
198 if (allocated(this%xbar)) then
199 deallocate(this%xbar)
200 end if
201 if (allocated(this%xx_d)) then
202 do i = 1, this%L
203 if (c_associated(this%xx_d(i))) then
204 call device_free(this%xx_d(i))
205 end if
206 end do
207 deallocate(this%xx_d)
208 end if
209 if (c_associated(this%xx_d_d)) then
210 call device_free(this%xx_d_d)
211 end if
212 if (c_associated(this%xbar_d)) then
213 call device_free(this%xbar_d)
214 end if
215 if (c_associated(this%alpha_d)) then
216 call device_free(this%alpha_d)
217 end if
218 if (allocated(this%bb_d)) then
219 do i = 1, this%L
220 if (c_associated(this%bb_d(i))) then
221 call device_free(this%bb_d(i))
222 end if
223 end do
224 deallocate(this%bb_d)
225 end if
226 if (c_associated(this%bb_d_d)) then
227 call device_free(this%bb_d_d)
228 end if
229
230 end subroutine projection_free
231
232 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
233 string)
234 class(projection_t), intent(inout) :: this
235 integer, intent(inout) :: n
236 real(kind=rp), intent(inout), dimension(n) :: b
237 integer, intent(in) :: tstep
238 class(coef_t), intent(inout) :: coef
239 type(time_step_controller_t), intent(in) :: dt_controller
240 character(len=*), optional :: string
241
242 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
243 if (dt_controller%is_variable_dt) then
244 ! the time step at which dt is changed
245 if (dt_controller%dt_last_change .eq. 0) then
246 call this%clear(n)
247 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
248 ! activate projection some steps after dt is changed
249 ! note that dt_last_change start from 0
250 call this%project_on(b, coef, n)
251 if (present(string)) then
252 call this%log_info(string, tstep)
253 end if
254 end if
255 else
256 call this%project_on(b, coef, n)
257 if (present(string)) then
258 call this%log_info(string, tstep)
259 end if
260 end if
261 end if
262
263 end subroutine projection_pre_solving
264
265 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
266 dt_controller)
267 class(projection_t), intent(inout) :: this
268 integer, intent(inout) :: n
269 class(ax_t), intent(inout) :: Ax
270 class(coef_t), intent(inout) :: coef
271 class(bc_list_t), intent(inout) :: bclst
272 type(gs_t), intent(inout) :: gs_h
273 real(kind=rp), intent(inout), dimension(n) :: x
274 integer, intent(in) :: tstep
275 type(time_step_controller_t), intent(in) :: dt_controller
276
277 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
278 if (.not.(dt_controller%is_variable_dt) .or. &
279 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
280 call this%project_back(x, ax, coef, bclst, gs_h, n)
281 end if
282 end if
283
284 end subroutine projection_post_solving
285
286 subroutine bcknd_project_on(this, b, coef, n)
287 class(projection_t), intent(inout) :: this
288 integer, intent(inout) :: n
289 class(coef_t), intent(inout) :: coef
290 real(kind=rp), intent(inout), dimension(n) :: b
291 call profiler_start_region('Project on', 16)
292 if (neko_bcknd_device .eq. 1) then
293 call device_project_on(this, b, coef, n)
294 else
295 call cpu_project_on(this, b, coef, n)
296 end if
297 call profiler_end_region('Project on', 16)
298 end subroutine bcknd_project_on
299
300 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
301 class(projection_t), intent(inout) :: this
302 integer, intent(inout) :: n
303 class(ax_t), intent(inout) :: Ax
304 class(coef_t), intent(inout) :: coef
305 class(bc_list_t), intent(inout) :: bclst
306 type(gs_t), intent(inout) :: gs_h
307 real(kind=rp), intent(inout), dimension(n) :: x
308 type(c_ptr) :: x_d
309
310 call profiler_start_region('Project back', 17)
311
312 if (neko_bcknd_device .eq. 1) then
313 x_d = device_get_ptr(x)
314 ! Restore desired solution
315 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
316 if (this%m .eq. this%L) then
317 this%m = 1
318 else
319 this%m = min(this%m+1, this%L)
320 end if
321
322 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
323
324 else
325 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
326 if (this%m .eq. this%L) then
327 this%m = 1
328 else
329 this%m = min(this%m+1, this%L)
330 end if
331
332 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
333 end if
334
335 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
336 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
337 call bclst%apply_scalar(this%bb(1, this%m), n)
338
339 call proj_ortho(this, coef, n)
340 call profiler_end_region('Project back', 17)
341 end subroutine bcknd_project_back
342
343
344
345 subroutine cpu_project_on(this, b, coef, n)
346 class(projection_t), intent(inout) :: this
347 integer, intent(inout) :: n
348 class(coef_t), intent(inout) :: coef
349 real(kind=rp), intent(inout), dimension(n) :: b
350 integer :: i, j, k, l, ierr
351 real(kind=rp) :: work(this%L), alpha(this%L), s
352
353 associate(xbar => this%xbar, xx => this%xx, &
354 bb => this%bb)
355
356 if (this%m .le. 0) return
357
358 !First round of CGS
359 call rzero(alpha, this%m)
360 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
361 this%proj_m = this%m
362 do i = 1, n, neko_blk_size
363 j = min(neko_blk_size, n-i+1)
364 do k = 1, this%m
365 s = 0.0_rp
366 do l = 0, (j-1)
367 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
368 end do
369 alpha(k) = alpha(k) + s
370 end do
371 end do
372
373 !First one outside loop to avoid zeroing xbar and bbar
374 call mpi_allreduce(mpi_in_place, alpha, this%m, &
375 mpi_real_precision, mpi_sum, neko_comm, ierr)
376
377 call rzero(work, this%m)
378
379 do i = 1, n, neko_blk_size
380 j = min(neko_blk_size, n-i+1)
381 do l = 0, (j-1)
382 xbar(i+l) = alpha(1) * xx(i+l,1)
383 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
384 end do
385 do k = 2, this%m
386 do l = 0, (j-1)
387 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
388 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
389 end do
390 end do
391 !Second round of CGS
392 do k = 1, this%m
393 s = 0.0_rp
394 do l = 0, (j-1)
395 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
396 end do
397 work(k) = work(k) + s
398 end do
399 end do
400
401 call mpi_allreduce(work, alpha, this%m, &
402 mpi_real_precision, mpi_sum, neko_comm, ierr)
403
404 do i = 1, n, neko_blk_size
405 j = min(neko_blk_size, n-i+1)
406 do k = 1, this%m
407 do l = 0, (j-1)
408 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
409 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
410 end do
411 end do
412 end do
413 end associate
414 end subroutine cpu_project_on
415
416 subroutine device_project_on(this, b, coef, n)
417 class(projection_t), intent(inout) :: this
418 integer, intent(inout) :: n
419 class(coef_t), intent(inout) :: coef
420 real(kind=rp), intent(inout), dimension(n) :: b
421 real(kind=rp) :: alpha(this%L)
422 type(c_ptr) :: b_d
423 integer :: i
424 b_d = device_get_ptr(b)
425
426 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
427 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
428
429 if (this%m .le. 0) return
430
431
432
433 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
434 this%proj_m = this%m
435 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
436 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
437 coef%mult_d, xbar_d, this%m, n)
438 else
439 if (neko_bcknd_opencl .eq. 1) then
440 do i = 1, this%m
441 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
442 end do
443 else
444 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
445 call device_memcpy(alpha, alpha_d, this%m, &
446 host_to_device, sync = .false.)
447 end if
448 call device_rzero(xbar_d, n)
449 if (neko_bcknd_opencl .eq. 1) then
450 do i = 1, this%m
451 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
452 end do
453 call cmult(alpha, -1.0_rp, this%m)
454 else
455 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
456 call device_cmult(alpha_d, -1.0_rp, this%m)
457 end if
458
459 if (neko_bcknd_opencl .eq. 1) then
460 do i = 1, this%m
461 call device_add2s2(b_d, bb_d(i), alpha(i), n)
462 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
463 end do
464 else
465 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
466 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
467 call device_memcpy(alpha, alpha_d, this%m, &
468 host_to_device, sync = .false.)
469 end if
470
471 if (neko_bcknd_opencl .eq. 1) then
472 do i = 1, this%m
473 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
474 call cmult(alpha, -1.0_rp, this%m)
475 call device_add2s2(b_d, bb_d(i), alpha(i), n)
476 end do
477 else
478 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
479 call device_cmult(alpha_d, -1.0_rp, this%m)
480 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
481 end if
482 end if
483
484 end associate
485 end subroutine device_project_on
486
487 !Choose between CPU or device for proj_ortho
488 subroutine proj_ortho(this, coef, n)
489 class(projection_t), intent(inout) :: this
490 type(coef_t), intent(in) :: coef
491 integer, intent(in) :: n
492
493 if (neko_bcknd_device .eq. 1) then
494 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
495 else
496 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
497 end if
498 end subroutine proj_ortho
499
500 !This is a lot more primitive than on the CPU
501 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
502 type(projection_t), intent(inout) :: this
503 integer, intent(in) :: n
504 type(c_ptr), dimension(this%L) :: xx_d, bb_d
505 type(c_ptr), intent(in) :: w_d
506 real(kind=rp) :: nrm, scl
507 real(kind=rp) :: alpha(this%L)
508 integer :: i
509
510 associate(m => this%m, xx_d_d => this%xx_d_d, &
511 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
512
513 if (m .le. 0) return
514
515 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
516 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
517 w_d, xx_d(m), this%m, n, nrm)
518 else
519 if (neko_bcknd_opencl .eq. 1)then
520 do i = 1, m
521 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
522 end do
523 else
524 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
525 end if
526 nrm = sqrt(alpha(m))
527 call cmult(alpha, -1.0_rp,m)
528 if (neko_bcknd_opencl .eq. 1)then
529 do i = 1, m - 1
530 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
531 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
532
533 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
534 end do
535 else
536 call device_memcpy(alpha, alpha_d, this%m, &
537 host_to_device, sync = .false.)
538 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
539 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
540
541 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
542 end if
543 call cmult(alpha, -1.0_rp,m)
544 if (neko_bcknd_opencl .eq. 1)then
545 do i = 1, m - 1
546 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
547 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
548 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
549 end do
550 else
551 call device_memcpy(alpha, alpha_d, m, &
552 host_to_device, sync = .false.)
553 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
554 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
555 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
556 end if
557 end if
558
559 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
560 alpha(m) = sqrt(alpha(m))
561
562 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
563 scl = 1.0_rp / alpha(m)
564 call device_cmult(xx_d(m), scl, n)
565 call device_cmult(bb_d(m), scl, n)
566
567
568 else !New vector is not linearly independent, forget about it
569 if (pe_rank .eq. 0) then
570 call neko_warning('New vector not linearly indepependent!')
571 end if
572 m = m - 1 !Remove column
573 end if
574
575 end associate
576
577 end subroutine device_proj_ortho
578
579
580 subroutine cpu_proj_ortho(this, xx, bb, w, n)
581 type(projection_t), intent(inout) :: this
582 integer, intent(in) :: n
583 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
584 real(kind=rp), dimension(n), intent(in) :: w
585 real(kind=rp) :: nrm, scl1, scl2, c, s
586 real(kind=rp) :: alpha(this%L), beta(this%L)
587 integer :: i, j, k, l, h, ierr
588
589 associate(m => this%m)
590
591 if (m .le. 0) return !No vectors to ortho-normalize
592
593 ! AX = B
594 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
595 call rzero(alpha, m)
596 do i = 1, n, neko_blk_size
597 j = min(neko_blk_size, n-i+1)
598 do k = 1, m !First round CGS
599 s = 0.0_rp
600 c = 0.0_rp
601 do l = 0, (j-1)
602 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
603 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
604 end do
605 alpha(k) = alpha(k) + 0.5_rp * (s + c)
606 end do
607 end do
608
609 call mpi_allreduce(mpi_in_place, alpha, this%m, &
610 mpi_real_precision, mpi_sum, neko_comm, ierr)
611
612 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
613
614
615 do i = 1, n, neko_blk_size
616 j = min(neko_blk_size, n-i+1)
617 do k = 1, m-1
618 do l = 0, (j-1)
619 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
620 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
621 end do
622 end do
623 end do
624 call rzero(beta,m)
625
626 do i = 1, n, neko_blk_size
627 j = min(neko_blk_size, n-i+1)
628 do k = 1, m-1
629 s = 0.0_rp
630 c = 0.0_rp
631 do l = 0, (j-1)
632 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
633 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
634 end do
635 beta(k) = beta(k) + 0.5_rp * (s + c)
636 end do
637 end do
638
639 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
640 mpi_real_precision, mpi_sum, neko_comm, ierr)
641
642 alpha(m) = 0.0_rp
643
644 do i = 1, n, neko_blk_size
645 j = min(neko_blk_size,n-i+1)
646 do k = 1, m-1
647 do l = 0, (j-1)
648 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
649 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
650 end do
651 end do
652 s = 0.0_rp
653 do l = 0, (j-1)
654 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
655 end do
656 alpha(m) = alpha(m) + s
657 end do
658 do k = 1, m-1
659 alpha(k) = alpha(k) + beta(k)
660 end do
661
662 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
663 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
664 mpi_real_precision, mpi_sum, neko_comm, ierr)
665 alpha(m) = sqrt(alpha(m))
666 !dx and db now stored in last column of xx and bb
667
668 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
669 !Normalize dx and db
670 scl1 = 1.0_rp / alpha(m)
671 do i = 0, (n - 1)
672 xx(1+i,m) = scl1 * xx(1+i,m)
673 bb(1+i,m) = scl1 * bb(1+i,m)
674 end do
675
676 else !New vector is not linearly independent, forget about it
677 k = m !location of rank deficient column
678 if (pe_rank .eq. 0) then
679 call neko_warning('New vector not linearly indepependent!')
680 end if
681 m = m - 1 !Remove column
682 end if
683
684 end associate
685
686 end subroutine cpu_proj_ortho
687
688 subroutine print_proj_info(this, string, tstep)
689 class(projection_t), intent(in) :: this
690 character(len=*), intent(in) :: string
691 integer, intent(in) :: tstep
692 character(len=LOG_SIZE) :: log_buf
693 character(len=12) :: tstep_str
694
695 if (this%proj_m .gt. 0) then
696 write(tstep_str, '(I12)') tstep
697 write(log_buf, '(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
698 adjustl(tstep_str), ' | Projection:', string, &
699 ', Vectors:', this%proj_m, &
700 ', Original res.:', this%proj_res
701 call neko_log%message(log_buf)
702 call neko_log%newline()
703 end if
704
705 end subroutine print_proj_info
706
707 subroutine bcknd_clear(this, n)
708 class(projection_t), intent(inout) :: this
709 integer, intent(in) :: n
710 integer :: i, j
711
712 this%m = 0
713 this%proj_m = 0
714
715 do i = 1, this%L
716 if (neko_bcknd_device .eq. 1) then
717 call device_rzero(this%xx_d(i), n)
718 call device_rzero(this%xx_d(i), n)
719 else
720 do j = 1, n
721 this%xx(j,i) = 0.0_rp
722 this%bb(j,i) = 0.0_rp
723 end do
724 end if
725 end do
726
727 end subroutine bcknd_clear
728
729end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:76
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:107
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56