Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
84 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 contains
108 procedure, pass(this) :: clear => bcknd_clear
109 procedure, pass(this) :: project_on => bcknd_project_on
110 procedure, pass(this) :: project_back => bcknd_project_back
111 procedure, pass(this) :: log_info => print_proj_info
112 procedure, pass(this) :: init => projection_init
113 procedure, pass(this) :: free => projection_free
114 procedure, pass(this) :: pre_solving => projection_pre_solving
115 procedure, pass(this) :: post_solving => projection_post_solving
116 end type projection_t
117
118contains
119
120 subroutine projection_init(this, n, L, activ_step)
121 class(projection_t), target, intent(inout) :: this
122 integer, intent(in) :: n
123 integer, intent(in) :: L
124 integer, optional, intent(in) :: activ_step
125 integer :: i
126 integer(c_size_t) :: ptr_size
127 type(c_ptr) :: ptr
128 real(c_rp) :: dummy
129
130 call this%free()
131
132 this%L = l
133
134 if (present(activ_step)) then
135 this%activ_step = activ_step
136 else
137 this%activ_step = 5
138 end if
139
140 this%m = 0
141
142 ! Return if the space is 0, to avoid allocating zero sized
143 ! arrays, which are not supported for all backends
144 if (this%L .le. 0) then
145 return
146 end if
147
148 allocate(this%xx(n, this%L))
149 allocate(this%bb(n, this%L))
150 allocate(this%xbar(n))
151 call rzero(this%xbar, n)
152 do i = 1, this%L
153 call rzero(this%xx(1, i), n)
154 call rzero(this%bb(1, i), n)
155 end do
156 if (neko_bcknd_device .eq. 1) then
157
158 allocate(this%xx_d(this%L))
159 allocate(this%bb_d(this%L))
160
161 call device_map(this%xbar, this%xbar_d, n)
162 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
163
164 call device_rzero(this%xbar_d, n)
165 call device_rzero(this%alpha_d, this%L)
166
167 do i = 1, this%L
168 this%xx_d(i) = c_null_ptr
169 call device_map(this%xx(:, i), this%xx_d(i), n)
170 call device_rzero(this%xx_d(i), n)
171 this%bb_d(i) = c_null_ptr
172 call device_map(this%bb(:, i), this%bb_d(i), n)
173 call device_rzero(this%bb_d(i), n)
174 end do
175
176 ptr_size = c_sizeof(c_null_ptr) * this%L
177 call device_alloc(this%xx_d_d, ptr_size)
178 ptr = c_loc(this%xx_d)
179 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
180 host_to_device, sync = .false.)
181 call device_alloc(this%bb_d_d, ptr_size)
182 ptr = c_loc(this%bb_d)
183 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
184 host_to_device, sync = .false.)
185 end if
186
187
188 end subroutine projection_init
189
190 subroutine projection_free(this)
191 class(projection_t), intent(inout) :: this
192 integer :: i
193 if (allocated(this%xx)) then
194 deallocate(this%xx)
195 end if
196 if (allocated(this%bb)) then
197 deallocate(this%bb)
198 end if
199 if (allocated(this%xbar)) then
200 deallocate(this%xbar)
201 end if
202 if (allocated(this%xx_d)) then
203 do i = 1, this%L
204 if (c_associated(this%xx_d(i))) then
205 call device_free(this%xx_d(i))
206 end if
207 end do
208 deallocate(this%xx_d)
209 end if
210 if (c_associated(this%xx_d_d)) then
211 call device_free(this%xx_d_d)
212 end if
213 if (c_associated(this%xbar_d)) then
214 call device_free(this%xbar_d)
215 end if
216 if (c_associated(this%alpha_d)) then
217 call device_free(this%alpha_d)
218 end if
219 if (allocated(this%bb_d)) then
220 do i = 1, this%L
221 if (c_associated(this%bb_d(i))) then
222 call device_free(this%bb_d(i))
223 end if
224 end do
225 deallocate(this%bb_d)
226 end if
227 if (c_associated(this%bb_d_d)) then
228 call device_free(this%bb_d_d)
229 end if
230
231 end subroutine projection_free
232
233 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
234 string)
235 class(projection_t), intent(inout) :: this
236 integer, intent(inout) :: n
237 real(kind=rp), intent(inout), dimension(n) :: b
238 integer, intent(in) :: tstep
239 class(coef_t), intent(inout) :: coef
240 type(time_step_controller_t), intent(in) :: dt_controller
241 character(len=*), optional :: string
242
243 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
244 if (dt_controller%is_variable_dt) then
245 ! the time step at which dt is changed
246 if (dt_controller%dt_last_change .eq. 0) then
247 call this%clear(n)
248 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
249 ! activate projection some steps after dt is changed
250 ! note that dt_last_change start from 0
251 call this%project_on(b, coef, n)
252 if (present(string)) then
253 call this%log_info(string, tstep)
254 end if
255 end if
256 else
257 call this%project_on(b, coef, n)
258 if (present(string)) then
259 call this%log_info(string, tstep)
260 end if
261 end if
262 end if
263
264 end subroutine projection_pre_solving
265
266 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
267 dt_controller)
268 class(projection_t), intent(inout) :: this
269 integer, intent(inout) :: n
270 class(ax_t), intent(inout) :: Ax
271 class(coef_t), intent(inout) :: coef
272 class(bc_list_t), intent(inout) :: bclst
273 type(gs_t), intent(inout) :: gs_h
274 real(kind=rp), intent(inout), dimension(n) :: x
275 integer, intent(in) :: tstep
276 type(time_step_controller_t), intent(in) :: dt_controller
277
278 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
279 if (.not.(dt_controller%is_variable_dt) .or. &
280 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
281 call this%project_back(x, ax, coef, bclst, gs_h, n)
282 end if
283 end if
284
285 end subroutine projection_post_solving
286
287 subroutine bcknd_project_on(this, b, coef, n)
288 class(projection_t), intent(inout) :: this
289 integer, intent(inout) :: n
290 class(coef_t), intent(inout) :: coef
291 real(kind=rp), intent(inout), dimension(n) :: b
292 call profiler_start_region('Project on', 16)
293 if (neko_bcknd_device .eq. 1) then
294 call device_project_on(this, b, coef, n)
295 else
296 call cpu_project_on(this, b, coef, n)
297 end if
298 call profiler_end_region('Project on', 16)
299 end subroutine bcknd_project_on
300
301 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
302 class(projection_t), intent(inout) :: this
303 integer, intent(inout) :: n
304 class(ax_t), intent(inout) :: Ax
305 class(coef_t), intent(inout) :: coef
306 class(bc_list_t), intent(inout) :: bclst
307 type(gs_t), intent(inout) :: gs_h
308 real(kind=rp), intent(inout), dimension(n) :: x
309 type(c_ptr) :: x_d
310
311 call profiler_start_region('Project back', 17)
312
313 if (neko_bcknd_device .eq. 1) then
314 x_d = device_get_ptr(x)
315 ! Restore desired solution
316 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
317 if (this%m .eq. this%L) then
318 this%m = 1
319 else
320 this%m = min(this%m+1, this%L)
321 end if
322
323 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
324
325 else
326 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
327 if (this%m .eq. this%L) then
328 this%m = 1
329 else
330 this%m = min(this%m+1, this%L)
331 end if
332
333 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
334 end if
335
336 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
337 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
338 call bclst%apply_scalar(this%bb(1, this%m), n)
339
340 call proj_ortho(this, coef, n)
341 call profiler_end_region('Project back', 17)
342 end subroutine bcknd_project_back
343
344
345
346 subroutine cpu_project_on(this, b, coef, n)
347 class(projection_t), intent(inout) :: this
348 integer, intent(inout) :: n
349 class(coef_t), intent(inout) :: coef
350 real(kind=rp), intent(inout), dimension(n) :: b
351 integer :: i, j, k, l, ierr
352 real(kind=rp) :: work(this%L), alpha(this%L), s
353
354 associate(xbar => this%xbar, xx => this%xx, &
355 bb => this%bb)
356
357 if (this%m .le. 0) return
358
359 !First round of CGS
360 call rzero(alpha, this%m)
361 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
362 this%proj_m = this%m
363 do i = 1, n, neko_blk_size
364 j = min(neko_blk_size, n-i+1)
365 do k = 1, this%m
366 s = 0.0_rp
367 do l = 0, (j-1)
368 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
369 end do
370 alpha(k) = alpha(k) + s
371 end do
372 end do
373
374 !First one outside loop to avoid zeroing xbar and bbar
375 call mpi_allreduce(mpi_in_place, alpha, this%m, &
376 mpi_real_precision, mpi_sum, neko_comm, ierr)
377
378 call rzero(work, this%m)
379
380 do i = 1, n, neko_blk_size
381 j = min(neko_blk_size, n-i+1)
382 do l = 0, (j-1)
383 xbar(i+l) = alpha(1) * xx(i+l,1)
384 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
385 end do
386 do k = 2, this%m
387 do l = 0, (j-1)
388 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
389 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
390 end do
391 end do
392 !Second round of CGS
393 do k = 1, this%m
394 s = 0.0_rp
395 do l = 0, (j-1)
396 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
397 end do
398 work(k) = work(k) + s
399 end do
400 end do
401
402 call mpi_allreduce(work, alpha, this%m, &
403 mpi_real_precision, mpi_sum, neko_comm, ierr)
404
405 do i = 1, n, neko_blk_size
406 j = min(neko_blk_size, n-i+1)
407 do k = 1, this%m
408 do l = 0, (j-1)
409 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
410 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
411 end do
412 end do
413 end do
414 end associate
415 end subroutine cpu_project_on
416
417 subroutine device_project_on(this, b, coef, n)
418 class(projection_t), intent(inout) :: this
419 integer, intent(inout) :: n
420 class(coef_t), intent(inout) :: coef
421 real(kind=rp), intent(inout), dimension(n) :: b
422 real(kind=rp) :: alpha(this%L)
423 type(c_ptr) :: b_d
424 integer :: i
425 b_d = device_get_ptr(b)
426
427 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
428 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
429
430 if (this%m .le. 0) return
431
432
433
434 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
435 this%proj_m = this%m
436 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
437 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
438 coef%mult_d, xbar_d, this%m, n)
439 else
440 if (neko_bcknd_opencl .eq. 1) then
441 do i = 1, this%m
442 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
443 end do
444 else
445 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
446 call device_memcpy(alpha, alpha_d, this%m, &
447 host_to_device, sync = .false.)
448 end if
449 call device_rzero(xbar_d, n)
450 if (neko_bcknd_opencl .eq. 1) then
451 do i = 1, this%m
452 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
453 end do
454 call cmult(alpha, -1.0_rp, this%m)
455 else
456 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
457 call device_cmult(alpha_d, -1.0_rp, this%m)
458 end if
459
460 if (neko_bcknd_opencl .eq. 1) then
461 do i = 1, this%m
462 call device_add2s2(b_d, bb_d(i), alpha(i), n)
463 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
464 end do
465 else
466 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
467 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
468 call device_memcpy(alpha, alpha_d, this%m, &
469 host_to_device, sync = .false.)
470 end if
471
472 if (neko_bcknd_opencl .eq. 1) then
473 do i = 1, this%m
474 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
475 call cmult(alpha, -1.0_rp, this%m)
476 call device_add2s2(b_d, bb_d(i), alpha(i), n)
477 end do
478 else
479 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
480 call device_cmult(alpha_d, -1.0_rp, this%m)
481 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
482 end if
483 end if
484
485 end associate
486 end subroutine device_project_on
487
488 !Choose between CPU or device for proj_ortho
489 subroutine proj_ortho(this, coef, n)
490 class(projection_t), intent(inout) :: this
491 type(coef_t), intent(in) :: coef
492 integer, intent(in) :: n
493
494 if (neko_bcknd_device .eq. 1) then
495 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
496 else
497 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
498 end if
499 end subroutine proj_ortho
500
501 !This is a lot more primitive than on the CPU
502 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
503 type(projection_t), intent(inout) :: this
504 integer, intent(in) :: n
505 type(c_ptr), dimension(this%L) :: xx_d, bb_d
506 type(c_ptr), intent(in) :: w_d
507 real(kind=rp) :: nrm, scl
508 real(kind=rp) :: alpha(this%L)
509 integer :: i
510
511 associate(m => this%m, xx_d_d => this%xx_d_d, &
512 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
513
514 if (m .le. 0) return
515
516 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
517 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
518 w_d, xx_d(m), this%m, n, nrm)
519 else
520 if (neko_bcknd_opencl .eq. 1)then
521 do i = 1, m
522 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
523 end do
524 else
525 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
526 end if
527 nrm = sqrt(alpha(m))
528 call cmult(alpha, -1.0_rp,m)
529 if (neko_bcknd_opencl .eq. 1)then
530 do i = 1, m - 1
531 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
532 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
533
534 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
535 end do
536 else
537 call device_memcpy(alpha, alpha_d, this%m, &
538 host_to_device, sync = .false.)
539 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
540 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
541
542 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
543 end if
544 call cmult(alpha, -1.0_rp,m)
545 if (neko_bcknd_opencl .eq. 1)then
546 do i = 1, m - 1
547 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
548 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
549 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
550 end do
551 else
552 call device_memcpy(alpha, alpha_d, m, &
553 host_to_device, sync = .false.)
554 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
555 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
556 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
557 end if
558 end if
559
560 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
561 alpha(m) = sqrt(alpha(m))
562
563 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
564 scl = 1.0_rp / alpha(m)
565 call device_cmult(xx_d(m), scl, n)
566 call device_cmult(bb_d(m), scl, n)
567
568
569 else !New vector is not linearly independent, forget about it
570 if (pe_rank .eq. 0) then
571 call neko_warning('New vector not linearly indepependent!')
572 end if
573 m = m - 1 !Remove column
574 end if
575
576 end associate
577
578 end subroutine device_proj_ortho
579
580
581 subroutine cpu_proj_ortho(this, xx, bb, w, n)
582 type(projection_t), intent(inout) :: this
583 integer, intent(in) :: n
584 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
585 real(kind=rp), dimension(n), intent(in) :: w
586 real(kind=rp) :: nrm, scl1, scl2, c, s
587 real(kind=rp) :: alpha(this%L), beta(this%L)
588 integer :: i, j, k, l, h, ierr
589
590 associate(m => this%m)
591
592 if (m .le. 0) return !No vectors to ortho-normalize
593
594 ! AX = B
595 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
596 call rzero(alpha, m)
597 do i = 1, n, neko_blk_size
598 j = min(neko_blk_size, n-i+1)
599 do k = 1, m !First round CGS
600 s = 0.0_rp
601 c = 0.0_rp
602 do l = 0, (j-1)
603 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
604 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
605 end do
606 alpha(k) = alpha(k) + 0.5_rp * (s + c)
607 end do
608 end do
609
610 call mpi_allreduce(mpi_in_place, alpha, this%m, &
611 mpi_real_precision, mpi_sum, neko_comm, ierr)
612
613 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
614
615
616 do i = 1, n, neko_blk_size
617 j = min(neko_blk_size, n-i+1)
618 do k = 1, m-1
619 do l = 0, (j-1)
620 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
621 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
622 end do
623 end do
624 end do
625 call rzero(beta,m)
626
627 do i = 1, n, neko_blk_size
628 j = min(neko_blk_size, n-i+1)
629 do k = 1, m-1
630 s = 0.0_rp
631 c = 0.0_rp
632 do l = 0, (j-1)
633 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
634 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
635 end do
636 beta(k) = beta(k) + 0.5_rp * (s + c)
637 end do
638 end do
639
640 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
641 mpi_real_precision, mpi_sum, neko_comm, ierr)
642
643 alpha(m) = 0.0_rp
644
645 do i = 1, n, neko_blk_size
646 j = min(neko_blk_size,n-i+1)
647 do k = 1, m-1
648 do l = 0, (j-1)
649 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
650 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
651 end do
652 end do
653 s = 0.0_rp
654 do l = 0, (j-1)
655 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
656 end do
657 alpha(m) = alpha(m) + s
658 end do
659 do k = 1, m-1
660 alpha(k) = alpha(k) + beta(k)
661 end do
662
663 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
664 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
665 mpi_real_precision, mpi_sum, neko_comm, ierr)
666 alpha(m) = sqrt(alpha(m))
667 !dx and db now stored in last column of xx and bb
668
669 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
670 !Normalize dx and db
671 scl1 = 1.0_rp / alpha(m)
672 do i = 0, (n - 1)
673 xx(1+i,m) = scl1 * xx(1+i,m)
674 bb(1+i,m) = scl1 * bb(1+i,m)
675 end do
676
677 else !New vector is not linearly independent, forget about it
678 k = m !location of rank deficient column
679 if (pe_rank .eq. 0) then
680 call neko_warning('New vector not linearly indepependent!')
681 end if
682 m = m - 1 !Remove column
683 end if
684
685 end associate
686
687 end subroutine cpu_proj_ortho
688
689 subroutine print_proj_info(this, string, tstep)
690 class(projection_t), intent(in) :: this
691 character(len=*), intent(in) :: string
692 integer, intent(in) :: tstep
693 character(len=LOG_SIZE) :: log_buf
694 character(len=12) :: tstep_str
695
696 if (this%proj_m .gt. 0) then
697 write(tstep_str, '(I12)') tstep
698 write(log_buf, '(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
699 adjustl(tstep_str), ' | Projection:', string, &
700 ', Vectors:', this%proj_m, &
701 ', Original res.:', this%proj_res
702 call neko_log%message(log_buf)
703 call neko_log%newline()
704 end if
705
706 end subroutine print_proj_info
707
708 subroutine bcknd_clear(this, n)
709 class(projection_t), intent(inout) :: this
710 integer, intent(in) :: n
711 integer :: i, j
712
713 this%m = 0
714 this%proj_m = 0
715
716 do i = 1, this%L
717 if (neko_bcknd_device .eq. 1) then
718 call device_rzero(this%xx_d(i), n)
719 call device_rzero(this%xx_d(i), n)
720 else
721 do j = 1, n
722 this%xx(j,i) = 0.0_rp
723 this%bb(j,i) = 0.0_rp
724 end do
725 end if
726 end do
727
728 end subroutine bcknd_clear
729
730end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:76
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:107
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56