Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, add2s2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
84 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum, mpi_wtime
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 logical :: prj_reorthogonalize_basis = .false.
108 contains
109 procedure, pass(this) :: clear => bcknd_clear
110 procedure, pass(this) :: project_on => bcknd_project_on
111 procedure, pass(this) :: project_back => bcknd_project_back
112 procedure, pass(this) :: log_info => print_proj_info
113 procedure, pass(this) :: init => projection_init
114 procedure, pass(this) :: free => projection_free
115 procedure, pass(this) :: pre_solving => projection_pre_solving
116 procedure, pass(this) :: post_solving => projection_post_solving
117 procedure, pass(this) :: reortho_basis => bcknd_reorthogonalize_basis
118 end type projection_t
119
120contains
121
122 subroutine projection_init(this, n, L, activ_step, reorthogonalize_basis)
123 class(projection_t), target, intent(inout) :: this
124 integer, intent(in) :: n
125 integer, intent(in) :: L
126 integer, optional, intent(in) :: activ_step
127 logical, optional, intent(in) :: reorthogonalize_basis
128 integer :: i
129 integer(c_size_t) :: ptr_size
130 type(c_ptr) :: ptr
131 real(c_rp) :: dummy
132
133 call this%free()
134
135 this%L = l
136
137 if (present(activ_step)) then
138 this%activ_step = activ_step
139 else
140 this%activ_step = 5
141 end if
142
143 if (present(reorthogonalize_basis)) then
144 this%prj_reorthogonalize_basis = reorthogonalize_basis
145 end if
146
147 this%m = 0
148
149 ! Return if the space is 0, to avoid allocating zero sized
150 ! arrays, which are not supported for all backends
151 if (this%L .le. 0) then
152 return
153 end if
154
155 allocate(this%xx(n, this%L))
156 allocate(this%bb(n, this%L))
157 allocate(this%xbar(n))
158 call rzero(this%xbar, n)
159 do i = 1, this%L
160 call rzero(this%xx(1, i), n)
161 call rzero(this%bb(1, i), n)
162 end do
163 if (neko_bcknd_device .eq. 1) then
164
165 allocate(this%xx_d(this%L))
166 allocate(this%bb_d(this%L))
167
168 call device_map(this%xbar, this%xbar_d, n)
169 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
170
171 call device_rzero(this%xbar_d, n)
172 call device_rzero(this%alpha_d, this%L)
173
174 do i = 1, this%L
175 this%xx_d(i) = c_null_ptr
176 call device_map(this%xx(:, i), this%xx_d(i), n)
177 call device_rzero(this%xx_d(i), n)
178 this%bb_d(i) = c_null_ptr
179 call device_map(this%bb(:, i), this%bb_d(i), n)
180 call device_rzero(this%bb_d(i), n)
181 end do
182
183 ptr_size = c_sizeof(c_null_ptr) * this%L
184 call device_alloc(this%xx_d_d, ptr_size)
185 ptr = c_loc(this%xx_d)
186 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
187 host_to_device, sync = .false.)
188 call device_alloc(this%bb_d_d, ptr_size)
189 ptr = c_loc(this%bb_d)
190 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
191 host_to_device, sync = .false.)
192 end if
193
194
195 end subroutine projection_init
196
197 subroutine projection_free(this)
198 class(projection_t), intent(inout) :: this
199 integer :: i
200 if (c_associated(this%xx_d_d)) then
201 call device_free(this%xx_d_d)
202 end if
203 if (c_associated(this%bb_d_d)) then
204 call device_free(this%bb_d_d)
205 end if
206 if (c_associated(this%alpha_d)) then
207 call device_free(this%alpha_d)
208 end if
209 if (allocated(this%xx)) then
210 if (neko_bcknd_device .eq. 1) then
211 do i = 1, this%L
212 call device_unmap(this%xx(:, i), this%xx_d(i))
213 end do
214 deallocate(this%xx_d)
215 end if
216 deallocate(this%xx)
217 end if
218 if (allocated(this%xbar)) then
219 if (neko_bcknd_device .eq. 1) then
220 call device_unmap(this%xbar, this%xbar_d)
221 end if
222 deallocate(this%xbar)
223 end if
224 if (allocated(this%bb)) then
225 if (neko_bcknd_device .eq. 1) then
226 do i = 1, this%L
227 call device_unmap(this%bb(:, i), this%bb_d(i))
228 end do
229 deallocate(this%bb_d)
230 end if
231 deallocate(this%bb)
232 end if
233
234 end subroutine projection_free
235
236 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
237 string, Ax, gs_h, bclst)
238 class(projection_t), intent(inout) :: this
239 integer, intent(inout) :: n
240 real(kind=rp), intent(inout), dimension(n) :: b
241 integer, intent(in) :: tstep
242 class(coef_t), intent(inout) :: coef
243 type(time_step_controller_t), intent(in) :: dt_controller
244 class(bc_list_t), optional, intent(inout) :: bclst
245 type(gs_t), optional, intent(inout) :: gs_h
246 class(ax_t), optional, intent(in) :: Ax
247 character(len=*), optional :: string
248
249 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
250 if (dt_controller%is_variable_dt) then
251 ! the time step at which dt is changed
252 if (dt_controller%dt_last_change .eq. 0) then
253 call this%clear(n)
254 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
255 ! Re-orthogonalize basis if requested
256 if (this%prj_reorthogonalize_basis .and. present(gs_h) &
257 .and. present(ax) .and.present(bclst)) then
258 call this%reortho_basis(ax, coef, gs_h, bclst, n)
259 end if
260 ! activate projection some steps after dt is changed
261 ! note that dt_last_change start from 0
262 call this%project_on(b, coef, n)
263 if (present(string)) then
264 call this%log_info(string, tstep)
265 end if
266 end if
267 else
268 ! Re-orthogonalize basis if requested
269 if (this%prj_reorthogonalize_basis .and. present(gs_h) &
270 .and. present(ax) .and.present(bclst)) then
271 call this%reortho_basis(ax, coef, gs_h, bclst, n)
272 end if
273 call this%project_on(b, coef, n)
274 if (present(string)) then
275 call this%log_info(string, tstep)
276 end if
277 end if
278 end if
279
280 end subroutine projection_pre_solving
281
282 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
283 dt_controller)
284 class(projection_t), intent(inout) :: this
285 integer, intent(inout) :: n
286 class(ax_t), intent(inout) :: Ax
287 class(coef_t), intent(inout) :: coef
288 class(bc_list_t), intent(inout) :: bclst
289 type(gs_t), intent(inout) :: gs_h
290 real(kind=rp), intent(inout), dimension(n) :: x
291 integer, intent(in) :: tstep
292 type(time_step_controller_t), intent(in) :: dt_controller
293
294 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
295 if (.not.(dt_controller%is_variable_dt) .or. &
296 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
297 call this%project_back(x, ax, coef, bclst, gs_h, n)
298 end if
299 end if
300
301 end subroutine projection_post_solving
302
303 subroutine bcknd_project_on(this, b, coef, n)
304 class(projection_t), intent(inout) :: this
305 integer, intent(inout) :: n
306 class(coef_t), intent(inout) :: coef
307 real(kind=rp), intent(inout), dimension(n) :: b
308 call profiler_start_region('Project on', 16)
309 if (neko_bcknd_device .eq. 1) then
310 call device_project_on(this, b, coef, n)
311 else
312 call cpu_project_on(this, b, coef, n)
313 end if
314 call profiler_end_region('Project on', 16)
315 end subroutine bcknd_project_on
316
317 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
318 class(projection_t), intent(inout) :: this
319 integer, intent(inout) :: n
320 class(ax_t), intent(inout) :: Ax
321 class(coef_t), intent(inout) :: coef
322 class(bc_list_t), intent(inout) :: bclst
323 type(gs_t), intent(inout) :: gs_h
324 real(kind=rp), intent(inout), dimension(n) :: x
325 type(c_ptr) :: x_d
326
327 call profiler_start_region('Project back', 17)
328
329 if (neko_bcknd_device .eq. 1) then
330 x_d = device_get_ptr(x)
331 ! Restore desired solution
332 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
333 if (this%m .eq. this%L) then
334 this%m = 1
335 else
336 this%m = min(this%m+1, this%L)
337 end if
338
339 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
340
341 else
342 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
343 if (this%m .eq. this%L) then
344 this%m = 1
345 else
346 this%m = min(this%m+1, this%L)
347 end if
348
349 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
350 end if
351
352 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
353 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
354 call bclst%apply_scalar(this%bb(1, this%m), n)
355
356 call proj_ortho(this, coef, n)
357 call profiler_end_region('Project back', 17)
358 end subroutine bcknd_project_back
359
360 subroutine bcknd_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
361 class(projection_t), intent(inout) :: this
362 class(ax_t), intent(in) :: Ax
363 class(coef_t), intent(in) :: coef
364 type(gs_t), intent(inout) :: gs_h
365 type(bc_list_t), intent(inout) :: blst
366 integer, intent(in) :: n
367
368 call profiler_start_region('Project reortho basis')
369 if (neko_bcknd_device .eq. 1) then
370 call device_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
371 else
372 call cpu_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
373 end if
374 call profiler_end_region('Project reortho basis')
375 end subroutine bcknd_reorthogonalize_basis
376
377 subroutine cpu_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
378 class(projection_t), intent(inout) :: this
379 class(ax_t), intent(in) :: Ax
380 class(coef_t), intent(in) :: coef
381 type(gs_t), intent(inout) :: gs_h
382 type(bc_list_t), intent(inout) :: blst
383 integer, intent(in) :: n
384 character(len=1000) :: msg
385
386 integer :: i, j
387 real(kind=rp) :: alpha, s, norm_fac
388 real(kind=rp) :: start_time, end_time, time
389
390 if (this%m .le. 0) return
391
392 associate(xx => this%xx, bb => this%bb)
393 start_time = mpi_wtime()
394
395 ! Recompute B = A_new * X using the new mesh metrics
396 do i = 1, this%m
397 call ax%compute(bb(1,i), xx(1,i), coef, coef%msh, coef%Xh)
398 call gs_h%gs_op_vector(bb(1,i), n, gs_op_add)
399 call blst%apply_scalar(bb(1,i), n)
400 end do
401
402 ! Modified Gram-Schmidt
403 do i = 1, this%m
404
405 ! Orthogonalize against previous vectors
406 do j = 1, i - 1
407 alpha = glsc3(xx(1,i), bb(1,j), coef%mult, n)
408 call add2s2(xx(1,i), xx(1,j), -alpha, n)
409 call add2s2(bb(1,i), bb(1,j), -alpha, n)
410 end do
411
412 s = glsc3(xx(1,i), bb(1,i), coef%mult, n)
413 norm_fac = 1.0_rp / sqrt(s)
414 call cmult(xx(1,i), norm_fac, n)
415 call cmult(bb(1,i), norm_fac, n)
416
417 end do
418 end_time = mpi_wtime()
419 time = end_time - start_time
420 write(msg, '(A, E15.7)') "Projection basis reorthogonalization (s): ", time
421 call neko_log%message(trim(msg))
422 end associate
423 end subroutine cpu_reorthogonalize_basis
424
425 subroutine device_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
426 class(projection_t), intent(inout) :: this
427 class(ax_t), intent(in) :: Ax
428 class(coef_t), intent(in) :: coef
429 type(gs_t), intent(inout) :: gs_h
430 type(bc_list_t), intent(inout) :: blst
431 integer, intent(in) :: n
432 character(len=1000) :: msg
433
434 integer :: i, j
435 real(kind=rp) :: alpha, s, norm_fac
436 real(kind=rp) :: start_time, end_time, time
437
438 if (this%m .le. 0) return
439
440 associate(xx_d => this%xx_d, bb_d => this%bb_d)
441 start_time = mpi_wtime()
442
443 ! Recompute B = A_new * X using the new mesh metrics
444 do i = 1, this%m
445 call ax%compute(this%bb(1,i), this%xx(1,i), coef, coef%msh, coef%Xh)
446 call gs_h%gs_op_vector(this%bb(1,i), n, gs_op_add)
447 call blst%apply_scalar(this%bb(1,i), n)
448 end do
449
450 ! Modified Gram-Schmidt
451 do i = 1, this%m
452
453 ! Orthogonalize against previous vectors
454 do j = 1, i - 1
455 alpha = device_glsc3(xx_d(i), bb_d(j), coef%mult_d, n)
456 call device_add2s2(xx_d(i), xx_d(j), -alpha, n)
457 call device_add2s2(bb_d(i), bb_d(j), -alpha, n)
458 end do
459
460 s = device_glsc3(xx_d(i), bb_d(i), coef%mult_d, n)
461 norm_fac = 1.0_rp / sqrt(s)
462 call device_cmult(xx_d(i), norm_fac, n)
463 call device_cmult(bb_d(i), norm_fac, n)
464
465 end do
466 end_time = mpi_wtime()
467 time = end_time - start_time
468 write(msg, '(A, E15.7)') "Projection basis reorthogonalization (s): ", time
469 call neko_log%message(trim(msg))
470 end associate
471 end subroutine device_reorthogonalize_basis
472
473
474 subroutine cpu_project_on(this, b, coef, n)
475 class(projection_t), intent(inout) :: this
476 integer, intent(inout) :: n
477 class(coef_t), intent(inout) :: coef
478 real(kind=rp), intent(inout), dimension(n) :: b
479 integer :: i, j, k, l, ierr
480 real(kind=rp) :: work(this%L), alpha(this%L), s
481
482 associate(xbar => this%xbar, xx => this%xx, &
483 bb => this%bb)
484
485 if (this%m .le. 0) return
486
487 !First round of CGS
488 call rzero(alpha, this%m)
489 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
490 this%proj_m = this%m
491 do i = 1, n, neko_blk_size
492 j = min(neko_blk_size, n-i+1)
493 do k = 1, this%m
494 s = 0.0_rp
495 do l = 0, (j-1)
496 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
497 end do
498 alpha(k) = alpha(k) + s
499 end do
500 end do
501
502 !First one outside loop to avoid zeroing xbar and bbar
503 call mpi_allreduce(mpi_in_place, alpha, this%m, &
504 mpi_real_precision, mpi_sum, neko_comm, ierr)
505
506 call rzero(work, this%m)
507
508 do i = 1, n, neko_blk_size
509 j = min(neko_blk_size, n-i+1)
510 do l = 0, (j-1)
511 xbar(i+l) = alpha(1) * xx(i+l,1)
512 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
513 end do
514 do k = 2, this%m
515 do l = 0, (j-1)
516 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
517 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
518 end do
519 end do
520 !Second round of CGS
521 do k = 1, this%m
522 s = 0.0_rp
523 do l = 0, (j-1)
524 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
525 end do
526 work(k) = work(k) + s
527 end do
528 end do
529
530 call mpi_allreduce(work, alpha, this%m, &
531 mpi_real_precision, mpi_sum, neko_comm, ierr)
532
533 do i = 1, n, neko_blk_size
534 j = min(neko_blk_size, n-i+1)
535 do k = 1, this%m
536 do l = 0, (j-1)
537 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
538 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
539 end do
540 end do
541 end do
542 end associate
543 end subroutine cpu_project_on
544
545 subroutine device_project_on(this, b, coef, n)
546 class(projection_t), intent(inout) :: this
547 integer, intent(inout) :: n
548 class(coef_t), intent(inout) :: coef
549 real(kind=rp), intent(inout), dimension(n) :: b
550 real(kind=rp) :: alpha(this%L)
551 type(c_ptr) :: b_d
552 integer :: i
553 b_d = device_get_ptr(b)
554
555 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
556 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
557
558 if (this%m .le. 0) return
559
560
561
562 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
563 this%proj_m = this%m
564 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
565 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
566 coef%mult_d, xbar_d, this%m, n)
567 else
568 if (neko_bcknd_opencl .eq. 1) then
569 do i = 1, this%m
570 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
571 end do
572 else
573 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
574 call device_memcpy(alpha, alpha_d, this%m, &
575 host_to_device, sync = .false.)
576 end if
577 call device_rzero(xbar_d, n)
578 if (neko_bcknd_opencl .eq. 1) then
579 do i = 1, this%m
580 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
581 end do
582 call cmult(alpha, -1.0_rp, this%m)
583 else
584 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
585 call device_cmult(alpha_d, -1.0_rp, this%m)
586 end if
587
588 if (neko_bcknd_opencl .eq. 1) then
589 do i = 1, this%m
590 call device_add2s2(b_d, bb_d(i), alpha(i), n)
591 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
592 end do
593 else
594 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
595 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
596 call device_memcpy(alpha, alpha_d, this%m, &
597 host_to_device, sync = .false.)
598 end if
599
600 if (neko_bcknd_opencl .eq. 1) then
601 do i = 1, this%m
602 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
603 call cmult(alpha, -1.0_rp, this%m)
604 call device_add2s2(b_d, bb_d(i), alpha(i), n)
605 end do
606 else
607 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
608 call device_cmult(alpha_d, -1.0_rp, this%m)
609 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
610 end if
611 end if
612
613 end associate
614 end subroutine device_project_on
615
616 !Choose between CPU or device for proj_ortho
617 subroutine proj_ortho(this, coef, n)
618 class(projection_t), intent(inout) :: this
619 type(coef_t), intent(in) :: coef
620 integer, intent(in) :: n
621
622 if (neko_bcknd_device .eq. 1) then
623 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
624 else
625 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
626 end if
627 end subroutine proj_ortho
628
629 !This is a lot more primitive than on the CPU
630 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
631 type(projection_t), intent(inout) :: this
632 integer, intent(in) :: n
633 type(c_ptr), dimension(this%L) :: xx_d, bb_d
634 type(c_ptr), intent(in) :: w_d
635 real(kind=rp) :: nrm, scl
636 real(kind=rp) :: alpha(this%L)
637 integer :: i
638
639 associate(m => this%m, xx_d_d => this%xx_d_d, &
640 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
641
642 if (m .le. 0) return
643
644 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
645 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
646 w_d, xx_d(m), this%m, n, nrm)
647 else
648 if (neko_bcknd_opencl .eq. 1)then
649 do i = 1, m
650 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
651 end do
652 else
653 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
654 end if
655 nrm = sqrt(alpha(m))
656 call cmult(alpha, -1.0_rp,m)
657 if (neko_bcknd_opencl .eq. 1)then
658 do i = 1, m - 1
659 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
660 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
661
662 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
663 end do
664 else
665 call device_memcpy(alpha, alpha_d, this%m, &
666 host_to_device, sync = .false.)
667 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
668 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
669
670 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
671 end if
672 call cmult(alpha, -1.0_rp,m)
673 if (neko_bcknd_opencl .eq. 1)then
674 do i = 1, m - 1
675 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
676 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
677 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
678 end do
679 else
680 call device_memcpy(alpha, alpha_d, m, &
681 host_to_device, sync = .false.)
682 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
683 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
684 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
685 end if
686 end if
687
688 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
689 alpha(m) = sqrt(alpha(m))
690
691 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
692 scl = 1.0_rp / alpha(m)
693 call device_cmult(xx_d(m), scl, n)
694 call device_cmult(bb_d(m), scl, n)
695
696
697 else !New vector is not linearly independent, forget about it
698 if (pe_rank .eq. 0) then
699 call neko_warning('New vector not linearly indepependent!')
700 end if
701 m = m - 1 !Remove column
702 end if
703
704 end associate
705
706 end subroutine device_proj_ortho
707
708
709 subroutine cpu_proj_ortho(this, xx, bb, w, n)
710 type(projection_t), intent(inout) :: this
711 integer, intent(in) :: n
712 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
713 real(kind=rp), dimension(n), intent(in) :: w
714 real(kind=rp) :: nrm, scl1, scl2, c, s
715 real(kind=rp) :: alpha(this%L), beta(this%L)
716 integer :: i, j, k, l, h, ierr
717
718 associate(m => this%m)
719
720 if (m .le. 0) return !No vectors to ortho-normalize
721
722 ! AX = B
723 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
724 call rzero(alpha, m)
725 do i = 1, n, neko_blk_size
726 j = min(neko_blk_size, n-i+1)
727 do k = 1, m !First round CGS
728 s = 0.0_rp
729 c = 0.0_rp
730 do l = 0, (j-1)
731 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
732 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
733 end do
734 alpha(k) = alpha(k) + 0.5_rp * (s + c)
735 end do
736 end do
737
738 call mpi_allreduce(mpi_in_place, alpha, this%m, &
739 mpi_real_precision, mpi_sum, neko_comm, ierr)
740
741 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
742
743
744 do i = 1, n, neko_blk_size
745 j = min(neko_blk_size, n-i+1)
746 do k = 1, m-1
747 do l = 0, (j-1)
748 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
749 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
750 end do
751 end do
752 end do
753 call rzero(beta,m)
754
755 do i = 1, n, neko_blk_size
756 j = min(neko_blk_size, n-i+1)
757 do k = 1, m-1
758 s = 0.0_rp
759 c = 0.0_rp
760 do l = 0, (j-1)
761 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
762 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
763 end do
764 beta(k) = beta(k) + 0.5_rp * (s + c)
765 end do
766 end do
767
768 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
769 mpi_real_precision, mpi_sum, neko_comm, ierr)
770
771 alpha(m) = 0.0_rp
772
773 do i = 1, n, neko_blk_size
774 j = min(neko_blk_size,n-i+1)
775 do k = 1, m-1
776 do l = 0, (j-1)
777 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
778 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
779 end do
780 end do
781 s = 0.0_rp
782 do l = 0, (j-1)
783 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
784 end do
785 alpha(m) = alpha(m) + s
786 end do
787 do k = 1, m-1
788 alpha(k) = alpha(k) + beta(k)
789 end do
790
791 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
792 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
793 mpi_real_precision, mpi_sum, neko_comm, ierr)
794 alpha(m) = sqrt(alpha(m))
795 !dx and db now stored in last column of xx and bb
796
797 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
798 !Normalize dx and db
799 scl1 = 1.0_rp / alpha(m)
800 do i = 0, (n - 1)
801 xx(1+i,m) = scl1 * xx(1+i,m)
802 bb(1+i,m) = scl1 * bb(1+i,m)
803 end do
804
805 else !New vector is not linearly independent, forget about it
806 k = m !location of rank deficient column
807 if (pe_rank .eq. 0) then
808 call neko_warning('New vector not linearly indepependent!')
809 end if
810 m = m - 1 !Remove column
811 end if
812
813 end associate
814
815 end subroutine cpu_proj_ortho
816
817 subroutine print_proj_info(this, string, tstep)
818 class(projection_t), intent(in) :: this
819 character(len=*), intent(in) :: string
820 integer, intent(in) :: tstep
821 character(len=LOG_SIZE) :: log_buf
822 character(len=12) :: tstep_str
823
824 if (this%proj_m .gt. 0) then
825 write(tstep_str, '(I12)') tstep
826 write(log_buf, '(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
827 adjustl(tstep_str), ' | Projection:', string, &
828 ', Vectors:', this%proj_m, &
829 ', Original res.:', this%proj_res
830 call neko_log%message(log_buf)
831 call neko_log%newline()
832 end if
833
834 end subroutine print_proj_info
835
836 subroutine bcknd_clear(this, n)
837 class(projection_t), intent(inout) :: this
838 integer, intent(in) :: n
839 integer :: i, j
840
841 this%m = 0
842 this%proj_m = 0
843
844 do i = 1, this%L
845 if (neko_bcknd_device .eq. 1) then
846 call device_rzero(this%xx_d(i), n)
847 call device_rzero(this%bb_d(i), n)
848 else
849 do j = 1, n
850 this%xx(j,i) = 0.0_rp
851 this%bb(j,i) = 0.0_rp
852 end do
853 end if
854 end do
855
856 end subroutine bcknd_clear
857
858end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:83
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:198
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:79
integer, parameter, public log_size
Definition log.f90:45
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:812
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:107
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine projection_init(this, n, l, activ_step, reorthogonalize_basis)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string, ax, gs_h, bclst)
subroutine bcknd_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine device_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine cpu_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:61