Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
64 use num_types, only : rp, c_rp
65 use math, only : rzero, glsc3, add2, add2s2, copy, cmult
66 use coefs, only : coef_t
67 use ax_product, only : ax_t
68 use bc_list, only : bc_list_t
69 use gather_scatter, only : gs_t, gs_op_add
79 use logger, only : log_size, neko_log
80 use utils, only : neko_warning
81 use bc_list, only : bc_list_t
84 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum, mpi_wtime
85 use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, &
86 c_sizeof, c_null_ptr, c_loc, c_associated
87 implicit none
88 private
89 public :: proj_ortho
90
91 type, public :: projection_t
92 real(kind=rp), allocatable :: xx(:,:)
93 real(kind=rp), allocatable :: bb(:,:)
94 real(kind=rp), allocatable :: xbar(:)
95 type(c_ptr), allocatable :: xx_d(:)
96 type(c_ptr), allocatable :: bb_d(:)
97 type(c_ptr) :: xbar_d = c_null_ptr
98 type(c_ptr) :: alpha_d = c_null_ptr
99 type(c_ptr) :: xx_d_d = c_null_ptr
100 type(c_ptr) :: bb_d_d = c_null_ptr
101 integer :: m, l
102 real(kind=rp) :: tol = 1e-7_rp
103 !logging variables
104 real(kind=rp) :: proj_res
105 integer :: proj_m = 0
106 integer :: activ_step ! steps to activate projection
107 logical :: prj_reorthogonalize_basis = .false.
108 contains
109 procedure, pass(this) :: clear => bcknd_clear
110 procedure, pass(this) :: project_on => bcknd_project_on
111 procedure, pass(this) :: project_back => bcknd_project_back
112 procedure, pass(this) :: log_info => print_proj_info
113 procedure, pass(this) :: init => projection_init
114 procedure, pass(this) :: free => projection_free
115 procedure, pass(this) :: pre_solving => projection_pre_solving
116 procedure, pass(this) :: post_solving => projection_post_solving
117 procedure, pass(this) :: reortho_basis => bcknd_reorthogonalize_basis
118 end type projection_t
119
120contains
121
122 subroutine projection_init(this, n, L, activ_step, reorthogonalize_basis)
123 class(projection_t), target, intent(inout) :: this
124 integer, intent(in) :: n
125 integer, intent(in) :: L
126 integer, optional, intent(in) :: activ_step
127 logical, optional, intent(in) :: reorthogonalize_basis
128 integer :: i
129 integer(c_size_t) :: ptr_size
130 type(c_ptr) :: ptr
131 real(c_rp) :: dummy
132
133 call this%free()
134
135 this%L = l
136
137 if (present(activ_step)) then
138 this%activ_step = activ_step
139 else
140 this%activ_step = 5
141 end if
142
143 if (present(reorthogonalize_basis)) then
144 this%prj_reorthogonalize_basis = reorthogonalize_basis
145 end if
146
147 this%m = 0
148
149 ! Return if the space is 0, to avoid allocating zero sized
150 ! arrays, which are not supported for all backends
151 if (this%L .le. 0) then
152 return
153 end if
154
155 allocate(this%xx(n, this%L))
156 allocate(this%bb(n, this%L))
157 allocate(this%xbar(n))
158 call rzero(this%xbar, n)
159 do i = 1, this%L
160 call rzero(this%xx(1, i), n)
161 call rzero(this%bb(1, i), n)
162 end do
163 if (neko_bcknd_device .eq. 1) then
164
165 allocate(this%xx_d(this%L))
166 allocate(this%bb_d(this%L))
167
168 call device_map(this%xbar, this%xbar_d, n)
169 call device_alloc(this%alpha_d, int(c_sizeof(dummy)*this%L, c_size_t))
170
171 call device_rzero(this%xbar_d, n)
172 call device_rzero(this%alpha_d, this%L)
173
174 do i = 1, this%L
175 this%xx_d(i) = c_null_ptr
176 call device_map(this%xx(:, i), this%xx_d(i), n)
177 call device_rzero(this%xx_d(i), n)
178 this%bb_d(i) = c_null_ptr
179 call device_map(this%bb(:, i), this%bb_d(i), n)
180 call device_rzero(this%bb_d(i), n)
181 end do
182
183 ptr_size = c_sizeof(c_null_ptr) * this%L
184 call device_alloc(this%xx_d_d, ptr_size)
185 ptr = c_loc(this%xx_d)
186 call device_memcpy(ptr, this%xx_d_d, ptr_size, &
187 host_to_device, sync = .false.)
188 call device_alloc(this%bb_d_d, ptr_size)
189 ptr = c_loc(this%bb_d)
190 call device_memcpy(ptr, this%bb_d_d, ptr_size, &
191 host_to_device, sync = .false.)
192 end if
193
194
195 end subroutine projection_init
196
197 subroutine projection_free(this)
198 class(projection_t), intent(inout) :: this
199 integer :: i
200 if (c_associated(this%xx_d_d)) then
201 call device_free(this%xx_d_d)
202 end if
203 if (c_associated(this%bb_d_d)) then
204 call device_free(this%bb_d_d)
205 end if
206 if (c_associated(this%alpha_d)) then
207 call device_free(this%alpha_d)
208 end if
209 if (allocated(this%xx)) then
210 if (neko_bcknd_device .eq. 1) then
211 do i = 1, this%L
212 call device_unmap(this%xx(:, i), this%xx_d(i))
213 end do
214 deallocate(this%xx_d)
215 end if
216 deallocate(this%xx)
217 end if
218 if (allocated(this%xbar)) then
219 if (neko_bcknd_device .eq. 1) then
220 call device_unmap(this%xbar, this%xbar_d)
221 end if
222 deallocate(this%xbar)
223 end if
224 if (allocated(this%bb)) then
225 if (neko_bcknd_device .eq. 1) then
226 do i = 1, this%L
227 call device_unmap(this%bb(:, i), this%bb_d(i))
228 end do
229 deallocate(this%bb_d)
230 end if
231 deallocate(this%bb)
232 end if
233
234 end subroutine projection_free
235
236 subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, &
237 string, Ax, gs_h, bclst)
238 class(projection_t), intent(inout) :: this
239 integer, intent(inout) :: n
240 real(kind=rp), intent(inout), dimension(n) :: b
241 integer, intent(in) :: tstep
242 class(coef_t), intent(inout) :: coef
243 type(time_step_controller_t), intent(in) :: dt_controller
244 class(bc_list_t), optional, intent(inout) :: bclst
245 type(gs_t), optional, intent(inout) :: gs_h
246 class(ax_t), optional, intent(in) :: Ax
247 character(len=*), optional :: string
248
249 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
250 if (dt_controller%is_variable_dt) then
251 ! the time step at which dt is changed
252 if (dt_controller%dt_last_change .eq. 0) then
253 call this%clear(n)
254 else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
255 ! Re-orthogonalize basis if requested
256 if (this%prj_reorthogonalize_basis .and. present(gs_h) &
257 .and. present(ax) .and.present(bclst)) then
258 call this%reortho_basis(ax, coef, gs_h, bclst, n)
259 end if
260 ! activate projection some steps after dt is changed
261 ! note that dt_last_change start from 0
262 call this%project_on(b, coef, n)
263 if (present(string)) then
264 call this%log_info(string, tstep)
265 end if
266 end if
267 else
268 ! Re-orthogonalize basis if requested
269 if (this%prj_reorthogonalize_basis .and. present(gs_h) &
270 .and. present(ax) .and.present(bclst)) then
271 call this%reortho_basis(ax, coef, gs_h, bclst, n)
272 end if
273 call this%project_on(b, coef, n)
274 if (present(string)) then
275 call this%log_info(string, tstep)
276 end if
277 end if
278 end if
279
280 end subroutine projection_pre_solving
281
282 subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, &
283 dt_controller)
284 class(projection_t), intent(inout) :: this
285 integer, intent(inout) :: n
286 class(ax_t), intent(inout) :: Ax
287 class(coef_t), intent(inout) :: coef
288 class(bc_list_t), intent(inout) :: bclst
289 type(gs_t), intent(inout) :: gs_h
290 real(kind=rp), intent(inout), dimension(n) :: x
291 integer, intent(in) :: tstep
292 type(time_step_controller_t), intent(in) :: dt_controller
293
294 if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
295 if (.not.(dt_controller%is_variable_dt) .or. &
296 (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
297 call this%project_back(x, ax, coef, bclst, gs_h, n)
298 end if
299 end if
300
301 end subroutine projection_post_solving
302
303 subroutine bcknd_project_on(this, b, coef, n)
304 class(projection_t), intent(inout) :: this
305 integer, intent(inout) :: n
306 class(coef_t), intent(inout) :: coef
307 real(kind=rp), intent(inout), dimension(n) :: b
308 call profiler_start_region('Project on', 16)
309 if (neko_bcknd_device .eq. 1) then
310 call device_project_on(this, b, coef, n)
311 else
312 call cpu_project_on(this, b, coef, n)
313 end if
314 call profiler_end_region('Project on', 16)
315 end subroutine bcknd_project_on
316
317 subroutine bcknd_project_back(this, x, Ax, coef, bclst, gs_h, n)
318 class(projection_t), intent(inout) :: this
319 integer, intent(inout) :: n
320 class(ax_t), intent(inout) :: Ax
321 class(coef_t), intent(inout) :: coef
322 class(bc_list_t), intent(inout) :: bclst
323 type(gs_t), intent(inout) :: gs_h
324 real(kind=rp), intent(inout), dimension(n) :: x
325 type(c_ptr) :: x_d
326
327 call profiler_start_region('Project back', 17)
328
329 if (neko_bcknd_device .eq. 1) then
330 x_d = device_get_ptr(x)
331 ! Restore desired solution
332 if (this%m .gt. 0) call device_add2(x_d, this%xbar_d, n)
333 if (this%m .eq. this%L) then
334 this%m = 1
335 else
336 this%m = min(this%m+1, this%L)
337 end if
338
339 call device_copy(this%xx_d(this%m), x_d, n) ! Update (X,B)
340
341 else
342 if (this%m .gt. 0) call add2(x, this%xbar, n) ! Restore desired solution
343 if (this%m .eq. this%L) then
344 this%m = 1
345 else
346 this%m = min(this%m+1, this%L)
347 end if
348
349 call copy(this%xx(1, this%m), x, n) ! Update (X,B)
350 end if
351
352 call ax%compute(this%bb(1, this%m), x, coef, coef%msh, coef%Xh)
353 call gs_h%gs_op_vector(this%bb(1, this%m), n, gs_op_add)
354 call bclst%apply_scalar(this%bb(1, this%m), n)
355
356 call proj_ortho(this, coef, n)
357 call profiler_end_region('Project back', 17)
358 end subroutine bcknd_project_back
359
360 subroutine bcknd_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
361 class(projection_t), intent(inout) :: this
362 class(ax_t), intent(in) :: Ax
363 class(coef_t), intent(in) :: coef
364 type(gs_t), intent(inout) :: gs_h
365 type(bc_list_t), intent(inout) :: blst
366 integer, intent(in) :: n
367
368 call profiler_start_region('Project reortho basis')
369 if (neko_bcknd_device .eq. 1) then
370 call device_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
371 else
372 call cpu_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
373 end if
374 call profiler_end_region('Project reortho basis')
375 end subroutine bcknd_reorthogonalize_basis
376
377 subroutine cpu_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
378 class(projection_t), intent(inout) :: this
379 class(ax_t), intent(in) :: Ax
380 class(coef_t), intent(in) :: coef
381 type(gs_t), intent(inout) :: gs_h
382 type(bc_list_t), intent(inout) :: blst
383 integer, intent(in) :: n
384 character(len=1000) :: msg
385
386 integer :: i, j
387 real(kind=rp) :: alpha, s, norm_fac
388 real(kind=rp) :: start_time, end_time, time
389
390 if (this%m .le. 0) return
391
392 associate(xx => this%xx, bb => this%bb)
393 start_time = mpi_wtime()
394
395 ! Recompute B = A_new * X using the new mesh metrics
396 do i = 1, this%m
397 call ax%compute(bb(1,i), xx(1,i), coef, coef%msh, coef%Xh)
398 call gs_h%gs_op_vector(bb(1,i), n, gs_op_add)
399 call blst%apply_scalar(bb(1,i), n)
400 end do
401
402 ! Modified Gram-Schmidt
403 do i = 1, this%m
404
405 ! Orthogonalize against previous vectors
406 do j = 1, i - 1
407 alpha = glsc3(xx(1,i), bb(1,j), coef%mult, n)
408 call add2s2(xx(1,i), xx(1,j), -alpha, n)
409 call add2s2(bb(1,i), bb(1,j), -alpha, n)
410 end do
411
412 s = glsc3(xx(1,i), bb(1,i), coef%mult, n)
413 norm_fac = 1.0_rp / sqrt(s)
414 call cmult(xx(1,i), norm_fac, n)
415 call cmult(bb(1,i), norm_fac, n)
416
417 end do
418 end_time = mpi_wtime()
419 time = end_time - start_time
420 write(msg, '(A, E15.7)') "Projection basis reorthogonalization (s): ", time
421 call neko_log%message(trim(msg))
422 end associate
423 end subroutine cpu_reorthogonalize_basis
424
425 subroutine device_reorthogonalize_basis(this, Ax, coef, gs_h, blst, n)
426 class(projection_t), intent(inout) :: this
427 class(ax_t), intent(in) :: Ax
428 class(coef_t), intent(in) :: coef
429 type(gs_t), intent(inout) :: gs_h
430 type(bc_list_t), intent(inout) :: blst
431 integer, intent(in) :: n
432 character(len=1000) :: msg
433
434 integer :: i, j
435 real(kind=rp) :: alpha, s, norm_fac
436 real(kind=rp) :: start_time, end_time, time
437
438 if (this%m .le. 0) return
439
440 associate(xx_d => this%xx_d, bb_d => this%bb_d)
441 start_time = mpi_wtime()
442
443 ! Recompute B = A_new * X using the new mesh metrics
444 do i = 1, this%m
445 call ax%compute(this%bb(1,i), this%xx(1,i), coef, coef%msh, coef%Xh)
446 call gs_h%gs_op_vector(this%bb(1,i), n, gs_op_add)
447 call blst%apply_scalar(this%bb(1,i), n)
448 end do
449
450 ! Modified Gram-Schmidt
451 do i = 1, this%m
452
453 ! Orthogonalize against previous vectors
454 do j = 1, i - 1
455 alpha = device_glsc3(xx_d(i), bb_d(j), coef%mult_d, n)
456 call device_add2s2(xx_d(i), xx_d(j), -alpha, n)
457 call device_add2s2(bb_d(i), bb_d(j), -alpha, n)
458 end do
459
460 s = device_glsc3(xx_d(i), bb_d(i), coef%mult_d, n)
461 norm_fac = 1.0_rp / sqrt(s)
462 call device_cmult(xx_d(i), norm_fac, n)
463 call device_cmult(bb_d(i), norm_fac, n)
464
465 end do
466 end_time = mpi_wtime()
467 time = end_time - start_time
468 write(msg, '(A, E15.7)') "Projection basis reorthogonalization (s): ", time
469 call neko_log%message(trim(msg))
470 end associate
471 end subroutine device_reorthogonalize_basis
472
473
474 subroutine cpu_project_on(this, b, coef, n)
475 class(projection_t), intent(inout) :: this
476 integer, intent(inout) :: n
477 class(coef_t), intent(inout) :: coef
478 real(kind=rp), intent(inout), dimension(n) :: b
479 integer :: i, j, k, l, ierr
480 real(kind=rp) :: work(this%L), alpha(this%L), s
481
482 associate(xbar => this%xbar, xx => this%xx, &
483 bb => this%bb)
484
485 if (this%m .le. 0) return
486
487 !First round of CGS
488 call rzero(alpha, this%m)
489 this%proj_res = sqrt(glsc3(b, b, coef%mult, n) / coef%volume)
490 this%proj_m = this%m
491 !$omp parallel do private(j, k, l, s) reduction(+:alpha)
492 do i = 1, n, neko_blk_size
493 j = min(neko_blk_size, n-i+1)
494 do k = 1, this%m
495 s = 0.0_rp
496 do l = 0, (j-1)
497 s = s + xx(i+l, k) * coef%mult(i+l,1,1,1) * b(i+l)
498 end do
499 alpha(k) = alpha(k) + s
500 end do
501 end do
502 !$omp end parallel do
503
504 !First one outside loop to avoid zeroing xbar and bbar
505 call mpi_allreduce(mpi_in_place, alpha, this%m, &
506 mpi_real_precision, mpi_sum, neko_comm, ierr)
507
508 call rzero(work, this%m)
509
510 !$omp parallel do private(j, k, l, s) reduction(+:work)
511 do i = 1, n, neko_blk_size
512 j = min(neko_blk_size, n-i+1)
513 do l = 0, (j-1)
514 xbar(i+l) = alpha(1) * xx(i+l,1)
515 b(i+l) = b(i+l) - alpha(1) * bb(i+l,1)
516 end do
517 do k = 2, this%m
518 do l = 0, (j-1)
519 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
520 b(i+l) = b(i+l)- alpha(k) * bb(i+l,k)
521 end do
522 end do
523 !Second round of CGS
524 do k = 1, this%m
525 s = 0.0_rp
526 do l = 0, (j-1)
527 s = s + xx(i+l,k) * coef%mult(i+l,1,1,1) * b(i+l)
528 end do
529 work(k) = work(k) + s
530 end do
531 end do
532 !$omp end parallel do
533
534 call mpi_allreduce(work, alpha, this%m, &
535 mpi_real_precision, mpi_sum, neko_comm, ierr)
536
537 !$omp parallel do private(j, k, l)
538 do i = 1, n, neko_blk_size
539 j = min(neko_blk_size, n-i+1)
540 do k = 1, this%m
541 do l = 0, (j-1)
542 xbar(i+l) = xbar(i+l) + alpha(k) * xx(i+l,k)
543 b(i+l) = b(i+l) - alpha(k) * bb(i+l,k)
544 end do
545 end do
546 end do
547 !$omp end parallel do
548 end associate
549 end subroutine cpu_project_on
550
551 subroutine device_project_on(this, b, coef, n)
552 class(projection_t), intent(inout) :: this
553 integer, intent(inout) :: n
554 class(coef_t), intent(inout) :: coef
555 real(kind=rp), intent(inout), dimension(n) :: b
556 real(kind=rp) :: alpha(this%L)
557 type(c_ptr) :: b_d
558 integer :: i
559 b_d = device_get_ptr(b)
560
561 associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
562 bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
563
564 if (this%m .le. 0) return
565
566
567
568 this%proj_res = sqrt(device_glsc3(b_d, b_d, coef%mult_d, n)/coef%volume)
569 this%proj_m = this%m
570 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
571 call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
572 coef%mult_d, xbar_d, this%m, n)
573 else
574 if (neko_bcknd_opencl .eq. 1) then
575 do i = 1, this%m
576 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
577 end do
578 else
579 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
580 call device_memcpy(alpha, alpha_d, this%m, &
581 host_to_device, sync = .false.)
582 end if
583 call device_rzero(xbar_d, n)
584 if (neko_bcknd_opencl .eq. 1) then
585 do i = 1, this%m
586 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
587 end do
588 call cmult(alpha, -1.0_rp, this%m)
589 else
590 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
591 call device_cmult(alpha_d, -1.0_rp, this%m)
592 end if
593
594 if (neko_bcknd_opencl .eq. 1) then
595 do i = 1, this%m
596 call device_add2s2(b_d, bb_d(i), alpha(i), n)
597 alpha(i) = device_glsc3(b_d, xx_d(i), coef%mult_d, n)
598 end do
599 else
600 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
601 call device_glsc3_many(alpha, b_d, xx_d_d, coef%mult_d, this%m, n)
602 call device_memcpy(alpha, alpha_d, this%m, &
603 host_to_device, sync = .false.)
604 end if
605
606 if (neko_bcknd_opencl .eq. 1) then
607 do i = 1, this%m
608 call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
609 call cmult(alpha, -1.0_rp, this%m)
610 call device_add2s2(b_d, bb_d(i), alpha(i), n)
611 end do
612 else
613 call device_add2s2_many(xbar_d, xx_d_d, alpha_d, this%m, n)
614 call device_cmult(alpha_d, -1.0_rp, this%m)
615 call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
616 end if
617 end if
618
619 end associate
620 end subroutine device_project_on
621
622 !Choose between CPU or device for proj_ortho
623 subroutine proj_ortho(this, coef, n)
624 class(projection_t), intent(inout) :: this
625 type(coef_t), intent(in) :: coef
626 integer, intent(in) :: n
627
628 if (neko_bcknd_device .eq. 1) then
629 call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
630 else
631 call cpu_proj_ortho (this, this%xx, this%bb, coef%mult, n)
632 end if
633 end subroutine proj_ortho
634
635 !This is a lot more primitive than on the CPU
636 subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
637 type(projection_t), intent(inout) :: this
638 integer, intent(in) :: n
639 type(c_ptr), dimension(this%L) :: xx_d, bb_d
640 type(c_ptr), intent(in) :: w_d
641 real(kind=rp) :: nrm, scl
642 real(kind=rp) :: alpha(this%L)
643 integer :: i
644
645 associate(m => this%m, xx_d_d => this%xx_d_d, &
646 bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
647
648 if (m .le. 0) return
649
650 if (neko_device_mpi .and. (neko_bcknd_opencl .ne. 1)) then
651 call device_project_ortho(alpha_d, bb_d(m), xx_d_d, bb_d_d, &
652 w_d, xx_d(m), this%m, n, nrm)
653 else
654 if (neko_bcknd_opencl .eq. 1)then
655 do i = 1, m
656 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d,n)
657 end do
658 else
659 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
660 end if
661 nrm = sqrt(alpha(m))
662 call cmult(alpha, -1.0_rp,m)
663 if (neko_bcknd_opencl .eq. 1)then
664 do i = 1, m - 1
665 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
666 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
667
668 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
669 end do
670 else
671 call device_memcpy(alpha, alpha_d, this%m, &
672 host_to_device, sync = .false.)
673 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
674 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
675
676 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
677 end if
678 call cmult(alpha, -1.0_rp,m)
679 if (neko_bcknd_opencl .eq. 1)then
680 do i = 1, m - 1
681 call device_add2s2(xx_d(m), xx_d(i), alpha(i), n)
682 call device_add2s2(bb_d(m), bb_d(i), alpha(i), n)
683 alpha(i) = device_glsc3(bb_d(m), xx_d(i), w_d, n)
684 end do
685 else
686 call device_memcpy(alpha, alpha_d, m, &
687 host_to_device, sync = .false.)
688 call device_add2s2_many(xx_d(m), xx_d_d, alpha_d, m-1, n)
689 call device_add2s2_many(bb_d(m), bb_d_d, alpha_d, m-1, n)
690 call device_glsc3_many(alpha, bb_d(m), xx_d_d, w_d, m, n)
691 end if
692 end if
693
694 alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
695 alpha(m) = sqrt(alpha(m))
696
697 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
698 scl = 1.0_rp / alpha(m)
699 call device_cmult(xx_d(m), scl, n)
700 call device_cmult(bb_d(m), scl, n)
701
702
703 else !New vector is not linearly independent, forget about it
704 if (pe_rank .eq. 0) then
705 call neko_warning('New vector not linearly indepependent!')
706 end if
707 m = m - 1 !Remove column
708 end if
709
710 end associate
711
712 end subroutine device_proj_ortho
713
714
715 subroutine cpu_proj_ortho(this, xx, bb, w, n)
716 type(projection_t), intent(inout) :: this
717 integer, intent(in) :: n
718 real(kind=rp), dimension(n, this%L), intent(inout) :: xx, bb
719 real(kind=rp), dimension(n), intent(in) :: w
720 real(kind=rp) :: nrm, scl1, scl2, c, s, alpha_m
721 real(kind=rp) :: alpha(this%L), beta(this%L)
722 integer :: i, j, k, l, h, ierr
723
724 associate(m => this%m)
725
726 if (m .le. 0) return !No vectors to ortho-normalize
727
728 ! AX = B
729 ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
730 call rzero(alpha, m)
731 !$omp parallel do private(j, k, l, s, c) reduction(+:alpha)
732 do i = 1, n, neko_blk_size
733 j = min(neko_blk_size, n-i+1)
734 do k = 1, m !First round CGS
735 s = 0.0_rp
736 c = 0.0_rp
737 do l = 0, (j-1)
738 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
739 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
740 end do
741 alpha(k) = alpha(k) + 0.5_rp * (s + c)
742 end do
743 end do
744 !$omp end parallel do
745
746 call mpi_allreduce(mpi_in_place, alpha, this%m, &
747 mpi_real_precision, mpi_sum, neko_comm, ierr)
748
749 nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
750
751
752 !$omp parallel do private(j, k, l)
753 do i = 1, n, neko_blk_size
754 j = min(neko_blk_size, n-i+1)
755 do k = 1, m-1
756 do l = 0, (j-1)
757 xx(i+l,m) = xx(i+l,m) - alpha(k) * xx(i+l,k)
758 bb(i+l,m) = bb(i+l,m) - alpha(k) * bb(i+l,k)
759 end do
760 end do
761 end do
762 !$omp end parallel do
763 call rzero(beta,m)
764
765 !$omp parallel do private(j, k, l, s, c) reduction(+:beta)
766 do i = 1, n, neko_blk_size
767 j = min(neko_blk_size, n-i+1)
768 do k = 1, m-1
769 s = 0.0_rp
770 c = 0.0_rp
771 do l = 0, (j-1)
772 s = s + xx(i+l,k) * w(i+l) * bb(i+l,m)
773 c = c + bb(i+l,k) * w(i+l) * xx(i+l,m)
774 end do
775 beta(k) = beta(k) + 0.5_rp * (s + c)
776 end do
777 end do
778 !$omp end parallel do
779
780 call mpi_allreduce(mpi_in_place, beta, this%m-1, &
781 mpi_real_precision, mpi_sum, neko_comm, ierr)
782
783 alpha_m = 0.0_rp
784
785 !$omp parallel do private(j, k, l, s) reduction(+:alpha_m)
786 do i = 1, n, neko_blk_size
787 j = min(neko_blk_size,n-i+1)
788 do k = 1, m-1
789 do l = 0, (j-1)
790 xx(i+l,m) = xx(i+l,m) - beta(k) * xx(i+l,k)
791 bb(i+l,m) = bb(i+l,m) - beta(k) * bb(i+l,k)
792 end do
793 end do
794 s = 0.0_rp
795 do l = 0, (j-1)
796 s = s + xx(i+l,m) * w(i+l) * bb(i+l,m)
797 end do
798 alpha_m = alpha_m + s
799 end do
800 !$omp end parallel do
801 alpha(m) = alpha_m
802 do k = 1, m-1
803 alpha(k) = alpha(k) + beta(k)
804 end do
805
806 !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
807 call mpi_allreduce(mpi_in_place, alpha(m), 1, &
808 mpi_real_precision, mpi_sum, neko_comm, ierr)
809 alpha(m) = sqrt(alpha(m))
810 !dx and db now stored in last column of xx and bb
811
812 if (alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
813 !Normalize dx and db
814 scl1 = 1.0_rp / alpha(m)
815 !$omp parallel do
816 do i = 0, (n - 1)
817 xx(1+i,m) = scl1 * xx(1+i,m)
818 bb(1+i,m) = scl1 * bb(1+i,m)
819 end do
820 !$omp end parallel do
821
822 else !New vector is not linearly independent, forget about it
823 k = m !location of rank deficient column
824 if (pe_rank .eq. 0) then
825 call neko_warning('New vector not linearly indepependent!')
826 end if
827 m = m - 1 !Remove column
828 end if
829
830 end associate
831
832 end subroutine cpu_proj_ortho
833
834 subroutine print_proj_info(this, string, tstep)
835 class(projection_t), intent(in) :: this
836 character(len=*), intent(in) :: string
837 integer, intent(in) :: tstep
838 character(len=LOG_SIZE) :: log_buf
839 character(len=12) :: tstep_str
840
841 if (this%proj_m .gt. 0) then
842 write(tstep_str, '(I12)') tstep
843 write(log_buf, '(A12,A14,1X,A8,A10,1X,I3,A16,1X,E10.4)') &
844 adjustl(tstep_str), ' | Projection:', string, &
845 ', Vectors:', this%proj_m, &
846 ', Original res.:', this%proj_res
847 call neko_log%message(log_buf)
848 call neko_log%newline()
849 end if
850
851 end subroutine print_proj_info
852
853 subroutine bcknd_clear(this, n)
854 class(projection_t), intent(inout) :: this
855 integer, intent(in) :: n
856 integer :: i, j
857
858 this%m = 0
859 this%proj_m = 0
860
861 do i = 1, this%L
862 if (neko_bcknd_device .eq. 1) then
863 call device_rzero(this%xx_d(i), n)
864 call device_rzero(this%bb_d(i), n)
865 else
866 do j = 1, n
867 this%xx(j,i) = 0.0_rp
868 this%bb(j,i) = 0.0_rp
869 end do
870 end if
871 end do
872
873 end subroutine bcknd_clear
874
875end module projection
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:53
integer, public pe_rank
MPI rank.
Definition comm.F90:58
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:45
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Interface for device projection.
subroutine, public device_proj_on(alpha_d, b_d, x_d_d, b_d_d, mult_d, xbar_d, j, n)
subroutine, public device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, w_d, xm_d, j, n, nrm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:48
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:240
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:209
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:80
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:502
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:898
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:996
Build configurations.
integer, parameter neko_blk_size
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:79
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:116
Project x onto X, the space of old solutions and back again.
subroutine device_project_on(this, b, coef, n)
subroutine projection_free(this)
subroutine bcknd_clear(this, n)
subroutine projection_init(this, n, l, activ_step, reorthogonalize_basis)
subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
subroutine bcknd_project_back(this, x, ax, coef, bclst, gs_h, n)
subroutine cpu_proj_ortho(this, xx, bb, w, n)
subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string, ax, gs_h, bclst)
subroutine bcknd_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine device_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine, public proj_ortho(this, coef, n)
subroutine bcknd_project_on(this, b, coef, n)
subroutine cpu_reorthogonalize_basis(this, ax, coef, gs_h, blst, n)
subroutine cpu_project_on(this, b, coef, n)
subroutine projection_post_solving(this, x, ax, coef, bclst, gs_h, n, tstep, dt_controller)
subroutine print_proj_info(this, string, tstep)
Implements type time_step_controller.
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:398
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63