Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
84 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 contains
108 procedure, pass(this) :: clear => bcknd_clear
109 procedure, pass(this) :: project_on => bcknd_project_on
110 procedure, pass(this) :: project_back => bcknd_project_back
111 procedure, pass(this) :: log_info => print_proj_info
112 procedure, pass(this) :: init => projection_init
113 procedure, pass(this) :: free => projection_free
114 procedure, pass(this) :: pre_solving => projection_pre_solving
115 procedure, pass(this) :: post_solving => projection_post_solving
116 end type projection_t
117
118contains
119
120 subroutine projection_init(this, n, L, activ_step)
121 class(projection_t), target, intent(inout) :: this
122 integer, intent(in) :: n
123 integer, intent(in) :: L
124 integer, optional, intent(in) :: activ_step
125 integer :: i
126 integer(c_size_t) :: ptr_size
127 type(c_ptr) :: ptr
128 real(c_rp) :: dummy
129
130 call this%free()
131
132 this%L = l
133
134 if (present(activ_step)) then
135 this%activ_step = activ_step
136 else
137 this%activ_step = 5
138 end if
139
140 this%m = 0
141
142 ! Return if the space is 0, to avoid allocating zero sized
143 ! arrays, which are not supported for all backends
144 if (this%L .le. 0) then
145 return
146 end if
147
148 allocate(this%xx(n, this%L))
149 allocate(this%bb(n, this%L))
150 allocate(this%xbar(n))
151 allocate(this%xx_d(this%L))
152 allocate(this%bb_d(this%L))
153 call rzero(this%xbar, n)
154 do i = 1, this%L
155 call rzero(this%xx(1, i), n)
156 call rzero(this%bb(1, i), n)
157 end do
158 if (neko_bcknd_device .eq. 1) then
159
160 call device_map(this%xbar, this%xbar_d, n)
161 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
162
163 call device_rzero(this%xbar_d, n)
164 call device_rzero(this%alpha_d, this%L)
165
166 do i = 1, this%L
167 this%xx_d(i) = c_null_ptr
168 call device_map(this%xx(:, i), this%xx_d(i), n)
169 call device_rzero(this%xx_d(i), n)
170 this%bb_d(i) = c_null_ptr
171 call device_map(this%bb(:, i), this%bb_d(i), n)
172 call device_rzero(this%bb_d(i), n)
173 end do
174
175 ptr_size = c_sizeof(c_null_ptr) * this%L
176 call device_alloc(this%xx_d_d, ptr_size)
177 ptr = c_loc(this%xx_d)
178 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
179 host_to_device, sync = .false.)
180 call device_alloc(this%bb_d_d, ptr_size)
181 ptr = c_loc(this%bb_d)
182 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
183 host_to_device, sync = .false.)
184 end if
185
186
187 end subroutine projection_init
188
189 subroutine projection_free(this)
190 class(projection_t), intent(inout) :: this
191 integer :: i
192 if (allocated(this%xx)) then
193 deallocate(this%xx)
194 end if
195 if (allocated(this%bb)) then
196 deallocate(this%bb)
197 end if
198 if (allocated(this%xbar)) then
199 deallocate(this%xbar)
200 end if
201 if (allocated(this%xx_d)) then
202 do i = 1, this%L
203 if (c_associated(this%xx_d(i))) then
204 call device_free(this%xx_d(i))
205 end if
206 end do
207 end if
208 if (c_associated(this%xx_d_d)) then
209 call device_free(this%xx_d_d)
210 end if
211 if (c_associated(this%xbar_d)) then
212 call device_free(this%xbar_d)
213 end if
214 if (c_associated(this%alpha_d)) then
215 call device_free(this%alpha_d)
216 end if
217 if (allocated(this%bb_d)) then
218 do i = 1, this%L
219 if (c_associated(this%bb_d(i))) then
220 call device_free(this%bb_d(i))
221 end if
222 end do
223 end if
224 if (c_associated(this%bb_d_d)) then
225 call device_free(this%bb_d_d)
226 end if
227
228 end subroutine projection_free
229
230 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
231 string)
232 class(projection_t), intent(inout) :: this
233 integer, intent(inout) :: n
234 real(kind=rp), intent(inout), dimension(n) :: b
235 integer, intent(in) :: tstep
236 class(coef_t), intent(inout) :: coef
237 type(time_step_controller_t), intent(in) :: dt_controller
238 character(len=*), optional :: string
239
240 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
241 if (dt_controller%is_variable_dt) then
242 ! the time step at which dt is changed
243 if (dt_controller%dt_last_change .eq. 0) then
244 call this%clear(n)
245 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
246 ! activate projection some steps after dt is changed
247 ! note that dt_last_change start from 0
248 call this%project_on(b, coef, n)
249 if (present(string)) then
250 call this%log_info(string, tstep)
251 end if
252 end if
253 else
254 call this%project_on(b, coef, n)
255 if (present(string)) then
256 call this%log_info(string, tstep)
257 end if
258 end if
259 end if
260
261 end subroutine projection_pre_solving
262
263 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
264 dt_controller)
265 class(projection_t), intent(inout) :: this
266 integer, intent(inout) :: n
267 class(ax_t), intent(inout) :: Ax
268 class(coef_t), intent(inout) :: coef
269 class(bc_list_t), intent(inout) :: bclst
270 type(gs_t), intent(inout) :: gs_h
271 real(kind=rp), intent(inout), dimension(n) :: x
272 integer, intent(in) :: tstep
273 type(time_step_controller_t), intent(in) :: dt_controller
274
275 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
276 if (.not.(dt_controller%is_variable_dt) .or. &
277 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
278 call this%project_back(x, ax, coef, bclst, gs_h, n)
279 end if
280 end if
281
282 end subroutine projection_post_solving
283
284 subroutine bcknd_project_on(this, b, coef, n)
285 class(projection_t), intent(inout) :: this
286 integer, intent(inout) :: n
287 class(coef_t), intent(inout) :: coef
288 real(kind=rp), intent(inout), dimension(n) :: b
289 call profiler_start_region('Project on', 16)
290 if (neko_bcknd_device .eq. 1) then
291 call device_project_on(this, b, coef, n)
292 else
293 call cpu_project_on(this, b, coef, n)
294 end if
295 call profiler_end_region('Project on', 16)
296 end subroutine bcknd_project_on
297
298 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
299 class(projection_t), intent(inout) :: this
300 integer, intent(inout) :: n
301 class(ax_t), intent(inout) :: Ax
302 class(coef_t), intent(inout) :: coef
303 class(bc_list_t), intent(inout) :: bclst
304 type(gs_t), intent(inout) :: gs_h
305 real(kind=rp), intent(inout), dimension(n) :: x
306 type(c_ptr) :: x_d
307
308 call profiler_start_region('Project back', 17)
309
310 if (neko_bcknd_device .eq. 1) then
311 x_d = device_get_ptr(x)
312 ! Restore desired solution
313 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
314 if (this%m .eq. this%L) then
315 this%m = 1
316 else
317 this%m = min(this%m+1, this%L)
318 end if
319
320 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
321
322 else
323 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
324 if (this%m .eq. this%L) then
325 this%m = 1
326 else
327 this%m = min(this%m+1, this%L)
328 end if
329
330 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
331 end if
332
333 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
334 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
335 call bclst%apply_scalar(this%bb(1, this%m), n)
336
337 call proj_ortho(this, coef, n)
338 call profiler_end_region('Project back', 17)
339 end subroutine bcknd_project_back
340
341
342
343 subroutine cpu_project_on(this, b, coef, n)
344 class(projection_t), intent(inout) :: this
345 integer, intent(inout) :: n
346 class(coef_t), intent(inout) :: coef
347 real(kind=rp), intent(inout), dimension(n) :: b
348 integer :: i, j, k, l, ierr
349 real(kind=rp) :: work(this%L), alpha(this%L), s
350
351 associate(xbar => this%xbar, xx => this%xx, &
352 bb => this%bb)
353
354 if (this%m .le. 0) return
355
356 !First round of CGS
357 call rzero(alpha, this%m)
358 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
359 this%proj_m = this%m
360 do i = 1, n, neko_blk_size
361 j = min(neko_blk_size, n-i+1)
362 do k = 1, this%m
363 s = 0.0_rp
364 do l = 0, (j-1)
365 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
366 end do
367 alpha(k) = alpha(k) + s
368 end do
369 end do
370
371 !First one outside loop to avoid zeroing xbar and bbar
372 call mpi_allreduce(mpi_in_place, alpha, this%m, &
373 mpi_real_precision, mpi_sum, neko_comm, ierr)
374
375 call rzero(work, this%m)
376
377 do i = 1, n, neko_blk_size
378 j = min(neko_blk_size, n-i+1)
379 do l = 0, (j-1)
380 xbar(i+l) = alpha(1) * xx(i+l,1)
381 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
382 end do
383 do k = 2, this%m
384 do l = 0, (j-1)
385 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
386 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
387 end do
388 end do
389 !Second round of CGS
390 do k = 1, this%m
391 s = 0.0_rp
392 do l = 0, (j-1)
393 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
394 end do
395 work(k) = work(k) + s
396 end do
397 end do
398
399 call mpi_allreduce(work, alpha, this%m, &
400 mpi_real_precision, mpi_sum, neko_comm, ierr)
401
402 do i = 1, n, neko_blk_size
403 j = min(neko_blk_size, n-i+1)
404 do k = 1, this%m
405 do l = 0, (j-1)
406 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
407 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
408 end do
409 end do
410 end do
411 end associate
412 end subroutine cpu_project_on
413
414 subroutine device_project_on(this, b, coef, n)
415 class(projection_t), intent(inout) :: this
416 integer, intent(inout) :: n
417 class(coef_t), intent(inout) :: coef
418 real(kind=rp), intent(inout), dimension(n) :: b
419 real(kind=rp) :: alpha(this%L)
420 type(c_ptr) :: b_d
421 integer :: i
422 b_d = device_get_ptr(b)
423
424 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
425 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
426
427 if (this%m .le. 0) return
428
429
430
431 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
432 this%proj_m = this%m
433 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
434 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
435 coef%mult_d, xbar_d, this%m, n)
436 else
437 if (neko_bcknd_opencl .eq. 1) then
438 do i = 1, this%m
439 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
440 end do
441 else
442 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
443 call device_memcpy(alpha, alpha_d, this%m, &
444 host_to_device, sync = .false.)
445 end if
446 call device_rzero(xbar_d, n)
447 if (neko_bcknd_opencl .eq. 1) then
448 do i = 1, this%m
449 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
450 end do
451 call cmult(alpha, -1.0_rp, this%m)
452 else
453 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
454 call device_cmult(alpha_d, -1.0_rp, this%m)
455 end if
456
457 if (neko_bcknd_opencl .eq. 1) then
458 do i = 1, this%m
459 call device_add2s2(b_d, bb_d(i), alpha(i), n)
460 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
461 end do
462 else
463 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
464 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
465 call device_memcpy(alpha, alpha_d, this%m, &
466 host_to_device, sync = .false.)
467 end if
468
469 if (neko_bcknd_opencl .eq. 1) then
470 do i = 1, this%m
471 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
472 call cmult(alpha, -1.0_rp, this%m)
473 call device_add2s2(b_d, bb_d(i), alpha(i), n)
474 end do
475 else
476 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
477 call device_cmult(alpha_d, -1.0_rp, this%m)
478 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
479 end if
480 end if
481
482 end associate
483 end subroutine device_project_on
484
485 !Choose between CPU or device for proj_ortho
486 subroutine proj_ortho(this, coef, n)
487 class(projection_t), intent(inout) :: this
488 type(coef_t), intent(in) :: coef
489 integer, intent(in) :: n
490
491 if (neko_bcknd_device .eq. 1) then
492 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
493 else
494 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
495 end if
496 end subroutine proj_ortho
497
498 !This is a lot more primitive than on the CPU
499 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
500 type(projection_t), intent(inout) :: this
501 integer, intent(in) :: n
502 type(c_ptr), dimension(this%L) :: xx_d, bb_d
503 type(c_ptr), intent(in) :: w_d
504 real(kind=rp) :: nrm, scl
505 real(kind=rp) :: alpha(this%L)
506 integer :: i
507
508 associate(m => this%m, xx_d_d => this%xx_d_d, &
509 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
510
511 if (m .le. 0) return
512
513 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
514 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
515 w_d, xx_d(m), this%m, n, nrm)
516 else
517 if (neko_bcknd_opencl .eq. 1)then
518 do i = 1, m
519 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
520 end do
521 else
522 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
523 end if
524 nrm = sqrt(alpha(m))
525 call cmult(alpha, -1.0_rp,m)
526 if (neko_bcknd_opencl .eq. 1)then
527 do i = 1, m - 1
528 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
529 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
530
531 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
532 end do
533 else
534 call device_memcpy(alpha, alpha_d, this%m, &
535 host_to_device, sync = .false.)
536 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
537 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
538
539 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
540 end if
541 call cmult(alpha, -1.0_rp,m)
542 if (neko_bcknd_opencl .eq. 1)then
543 do i = 1, m - 1
544 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
545 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
546 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
547 end do
548 else
549 call device_memcpy(alpha, alpha_d, m, &
550 host_to_device, sync = .false.)
551 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
552 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
553 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
554 end if
555 end if
556
557 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
558 alpha(m) = sqrt(alpha(m))
559
560 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
561 scl = 1.0_rp / alpha(m)
562 call device_cmult(xx_d(m), scl, n)
563 call device_cmult(bb_d(m), scl, n)
564
565
566 else !New vector is not linearly independent, forget about it
567 if (pe_rank .eq. 0) then
568 call neko_warning('New vector not linearly indepependent!')
569 end if
570 m = m - 1 !Remove column
571 end if
572
573 end associate
574
575 end subroutine device_proj_ortho
576
577
578 subroutine cpu_proj_ortho(this, xx, bb, w, n)
579 type(projection_t), intent(inout) :: this
580 integer, intent(in) :: n
581 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
582 real(kind=rp), dimension(n), intent(in) :: w
583 real(kind=rp) :: nrm, scl1, scl2, c, s
584 real(kind=rp) :: alpha(this%L), beta(this%L)
585 integer :: i, j, k, l, h, ierr
586
587 associate(m => this%m)
588
589 if (m .le. 0) return !No vectors to ortho-normalize
590
591 ! AX = B
592 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
593 call rzero(alpha, m)
594 do i = 1, n, neko_blk_size
595 j = min(neko_blk_size, n-i+1)
596 do k = 1, m !First round CGS
597 s = 0.0_rp
598 c = 0.0_rp
599 do l = 0, (j-1)
600 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
601 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
602 end do
603 alpha(k) = alpha(k) + 0.5_rp * (s + c)
604 end do
605 end do
606
607 call mpi_allreduce(mpi_in_place, alpha, this%m, &
608 mpi_real_precision, mpi_sum, neko_comm, ierr)
609
610 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
611
612
613 do i = 1, n, neko_blk_size
614 j = min(neko_blk_size, n-i+1)
615 do k = 1, m-1
616 do l = 0, (j-1)
617 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
618 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
619 end do
620 end do
621 end do
622 call rzero(beta,m)
623
624 do i = 1, n, neko_blk_size
625 j = min(neko_blk_size, n-i+1)
626 do k = 1, m-1
627 s = 0.0_rp
628 c = 0.0_rp
629 do l = 0, (j-1)
630 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
631 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
632 end do
633 beta(k) = beta(k) + 0.5_rp * (s + c)
634 end do
635 end do
636
637 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
638 mpi_real_precision, mpi_sum, neko_comm, ierr)
639
640 alpha(m) = 0.0_rp
641
642 do i = 1, n, neko_blk_size
643 j = min(neko_blk_size,n-i+1)
644 do k = 1, m-1
645 do l = 0, (j-1)
646 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
647 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
648 end do
649 end do
650 s = 0.0_rp
651 do l = 0, (j-1)
652 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
653 end do
654 alpha(m) = alpha(m) + s
655 end do
656 do k = 1, m-1
657 alpha(k) = alpha(k) + beta(k)
658 end do
659
660 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
661 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
662 mpi_real_precision, mpi_sum, neko_comm, ierr)
663 alpha(m) = sqrt(alpha(m))
664 !dx and db now stored in last column of xx and bb
665
666 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
667 !Normalize dx and db
668 scl1 = 1.0_rp / alpha(m)
669 do i = 0, (n - 1)
670 xx(1+i,m) = scl1 * xx(1+i,m)
671 bb(1+i,m) = scl1 * bb(1+i,m)
672 end do
673
674 else !New vector is not linearly independent, forget about it
675 k = m !location of rank deficient column
676 if (pe_rank .eq. 0) then
677 call neko_warning('New vector not linearly indepependent!')
678 end if
679 m = m - 1 !Remove column
680 end if
681
682 end associate
683
684 end subroutine cpu_proj_ortho
685
686 subroutine print_proj_info(this, string, tstep)
687 class(projection_t), intent(in) :: this
688 character(len=*), intent(in) :: string
689 integer, intent(in) :: tstep
690 character(len=LOG_SIZE) :: log_buf
691 character(len=12) :: tstep_str
692
693 if (this%proj_m .gt. 0) then
694 write(tstep_str, '(I12)') tstep
695 write(log_buf, '(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
696 adjustl(tstep_str), ' | Projection:', string, &
697 ', Vectors:', this%proj_m, &
698 ', Original res.:', this%proj_res
699 call neko_log%message(log_buf)
700 call neko_log%newline()
701 end if
702
703 end subroutine print_proj_info
704
705 subroutine bcknd_clear(this, n)
706 class(projection_t), intent(inout) :: this
707 integer, intent(in) :: n
708 integer :: i, j
709
710 this%m = 0
711 this%proj_m = 0
712
713 do i = 1, this%L
714 if (neko_bcknd_device .eq. 1) then
715 call device_rzero(this%xx_d(i), n)
716 call device_rzero(this%xx_d(i), n)
717 else
718 do j = 1, n
719 this%xx(j,i) = 0.0_rp
720 this%bb(j,i) = 0.0_rp
721 end do
722 end if
723 end do
724
725 end subroutine bcknd_clear
726
727end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
integer, public pe_rank
MPI rank.
Definition comm.F90:55
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:187
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:70
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:417
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:732
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine projection_init(this, n, l, activ_step)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:284
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55