Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pc_hsmg.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module hsmg
63 use num_types, only : rp
64 use math, only : copy, col2, add2
65 use utils, only : neko_error
66 use precon, only : pc_t, precon_factory, precon_destroy
67 use ax_product, only : ax_t, ax_helm_factory
68 use gather_scatter, only : gs_t, gs_op_add
70 use bc, only: bc_t
71 use bc_list, only : bc_list_t
72 use dirichlet, only : dirichlet_t
73 use schwarz, only : schwarz_t
74 use jacobi, only : jacobi_t
75 use sx_jacobi, only : sx_jacobi_t
77 use device
80 use space, only : space_t, gll
81 use dofmap, only : dofmap_t
82 use field, only : field_t
83 use coefs, only : coef_t
84 use mesh, only : mesh_t
85 use json_module, only : json_file
87 use krylov, only : ksp_t, ksp_monitor_t, ksp_max_iter, &
88 krylov_solver_factory
91 use logger, only : neko_log, log_size
92 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
93 !$ use omp_lib
94 implicit none
95 private
96
97 !Struct to arrange our multigridlevels
98 type, private :: multigrid_t
99 type(dofmap_t), pointer :: dof
100 type(gs_t), pointer :: gs_h
101 type(space_t), pointer :: xh
102 type(coef_t), pointer :: coef
103 type(bc_list_t), pointer :: bclst
104 type(schwarz_t), pointer :: schwarz
105 type(field_t), pointer :: e
106 end type multigrid_t
107
108 type, public, extends(pc_t) :: hsmg_t
109 type(mesh_t), pointer :: msh
110 integer :: nlvls
111 type(multigrid_t), allocatable :: grids(:)
112 type(gs_t) :: gs_crs, gs_mg
113 type(space_t) :: xh_crs, xh_mg
114 type(dofmap_t) :: dm_crs, dm_mg
115 type(coef_t) :: c_crs, c_mg
116 type(zero_dirichlet_t) :: bc_crs, bc_mg, bc_reg
117 type(bc_list_t) :: bclst_crs, bclst_mg, bclst_reg
118 type(schwarz_t) :: schwarz, schwarz_mg, schwarz_crs
119 type(field_t) :: e, e_mg, e_crs
120 type(field_t) :: wf
121 class(ksp_t), allocatable :: crs_solver
122 type(tamg_solver_t), allocatable :: amg_solver
123 integer :: niter
124 class(pc_t), allocatable :: pc_crs
125 class(ax_t), allocatable :: ax
126 real(kind=rp), allocatable :: r(:)
127 type(interpolator_t) :: interp_fine_mid
128 type(interpolator_t) :: interp_mid_crs
129 real(kind=rp), allocatable :: w(:)
130 type(c_ptr) :: w_d = c_null_ptr
131 type(c_ptr) :: r_d = c_null_ptr
132 type(c_ptr) :: hsmg_event
133 type(c_ptr) :: gs_event
134 contains
135 procedure, pass(this) :: init => hsmg_init
136 procedure, pass(this) :: init_from_components => &
138 procedure, pass(this) :: free => hsmg_free
139 procedure, pass(this) :: solve => hsmg_solve
140 procedure, pass(this) :: update => hsmg_set_h
141 end type hsmg_t
142
143contains
144
145 subroutine hsmg_init(this, coef, bclst, hsmg_params)
146 class(hsmg_t), intent(inout), target :: this
147 type(coef_t), intent(in), target :: coef
148 type(bc_list_t), intent(inout), target :: bclst
149 type(json_file), intent(inout) :: hsmg_params
150 character(len=:), allocatable :: crs_solver, crs_pc
151 logical :: crs_monitor
152 integer :: crs_tamg_lvls, crs_tamg_itrs, crs_tamg_cheby_degree
153
154 ! Exract coarse grid parameters
155
156 ! Common parameters for the coarse grid
157 call json_get_or_default(hsmg_params, 'coarse_grid.solver', &
158 crs_solver, "cg")
159
160 !
161 ! Parameters for a Krylov based coarse grid solverthis
162 !
163 call json_get_or_default(hsmg_params, 'coarse_grid.iterations', &
164 this%niter, 10)
165
166 call json_get_or_default(hsmg_params, 'coarse_grid.preconditioner', &
167 crs_pc, "jacobi")
168
169 call json_get_or_default(hsmg_params, 'coarse_grid.monitor', &
170 crs_monitor, .false.)
171
172 !
173 ! Parameters for a tree-amg based coarse grid solver
174 !
175 call json_get_or_default(hsmg_params, 'coarse_grid.levels', &
176 crs_tamg_lvls, 3)
177
178 call json_get_or_default(hsmg_params, 'coarse_grid.iterations', &
179 crs_tamg_itrs, 1)
180
181 call json_get_or_default(hsmg_params, 'coarse_grid.cheby_degree', &
182 crs_tamg_cheby_degree, 5)
183
184 call this%init_from_components(coef, bclst, crs_solver, crs_pc, &
185 crs_monitor, crs_tamg_lvls, crs_tamg_itrs, crs_tamg_cheby_degree)
186
187 end subroutine hsmg_init
188
189 subroutine hsmg_init_from_components(this, coef, bclst, crs_solver, crs_pc, &
190 crs_monitor, crs_tamg_lvls, crs_tamg_itrs, crs_tamg_cheby_degree)
191 class(hsmg_t), intent(inout), target :: this
192 type(coef_t), intent(in), target :: coef
193 type(bc_list_t), intent(inout), target :: bclst
194 character(len=:), intent(inout), allocatable :: crs_solver, crs_pc
195 logical, intent(inout) :: crs_monitor
196 integer, intent(in) :: crs_tamg_lvls, crs_tamg_itrs, crs_tamg_cheby_degree
197 integer :: n, i
198 integer :: lx_crs, lx_mid
199 class(bc_t), pointer :: bc_i
200
201 character(len=LOG_SIZE) :: log_buf
202
203 call this%free()
205 this%nlvls = 3
206 lx_crs = 2
207 if (coef%Xh%lx .lt. 5) then
208 lx_mid = max(coef%Xh%lx-1,3)
209
210 if (coef%Xh%lx .le. 2) then
211 call neko_error('Polynomial order < 2 not supported for hsmg precon')
212 end if
213
214 else
215 lx_mid = 4
216 end if
217
218
219 call neko_log%section('HSMG')
220 if (this%nlvls .lt. 1e1) then
221 write(log_buf, '(A,I1,A)') 'HSMG hierarchy : ', &
222 this%nlvls, ' levels'
223 else if (this%nlvls .lt. 1e2) then
224 write(log_buf, '(A,I2,A)') 'HSMG hierarchy : ', &
225 this%nlvls, ' levels'
226 else if (this%nlvls .lt. 1e3) then
227 write(log_buf, '(A,I3,A)') 'HSMG hierarchy : ', this%nlvls, &
228 ' levels'
229 else
230 write(log_buf, '(A,I6,A)') 'HSMG hierarchy : ', this%nlvls, &
231 ' levels'
232 end if
233 call neko_log%message(log_buf)
234 if (trim(crs_solver) .ne. 'tamg' .or. trim(crs_solver) .eq. 'cheby') then
235 call neko_log%message('Coarse grid solver : (' // trim(crs_solver) // &
236 ', ' // trim(crs_pc) // ')')
237
238 if (this%niter .lt. 1e1) then
239 write(log_buf, '(A,I1)') 'Coarse grid iters. : ', this%niter
240 else if (this%niter .lt. 1e2) then
241 write(log_buf, '(A,I2)') 'Coarse grid iters. : ', this%niter
242 else if (this%niter .lt. 1e3) then
243 write(log_buf, '(A,I3)') 'Coarse grid iters. : ', this%niter
244 else if (this%niter .lt. 1e4) then
245 write(log_buf, '(A,I4)') 'Coarse grid iters. : ', this%niter
246 else
247 write(log_buf, '(A,I6)') 'Coarse grid iters. : ', this%niter
248 end if
249
250 call neko_log%message(log_buf)
251 else
252 call neko_log%message('Coarse grid solver : ' // trim(crs_solver) )
253 end if
254
255 this%msh => coef%msh
256 allocate(this%grids(this%nlvls))
257 allocate(this%w(coef%dof%size()))
258 allocate(this%r(coef%dof%size()))
259
260
261 ! Compute all elements as if they are deformed
262 call coef%msh%all_deformed()
263
264 n = coef%dof%size()
265 call this%e%init(coef%dof, 'work array')
266 call this%wf%init(coef%dof, 'work 2')
267
268 call this%Xh_crs%init(gll, lx_crs, lx_crs, lx_crs)
269 call this%dm_crs%init(coef%msh, this%Xh_crs)
270 call this%gs_crs%init(this%dm_crs)
271 call this%e_crs%init(this%dm_crs, 'work crs')
272 call this%c_crs%init(this%gs_crs)
273
274 call this%Xh_mg%init(gll, lx_mid, lx_mid, lx_mid)
275 call this%dm_mg%init(coef%msh, this%Xh_mg)
276 call this%gs_mg%init(this%dm_mg)
277 call this%e_mg%init(this%dm_mg, 'work midl')
278 call this%c_mg%init(this%gs_mg)
279
280 ! Create backend specific Ax operator
281 call ax_helm_factory(this%ax, full_formulation = .false.)
282
283 call this%bc_crs%init_base(this%c_crs)
284 call this%bc_mg%init_base(this%c_mg)
285 call this%bc_reg%init_base(coef)
286 if (bclst%size() .gt. 0) then
287 do i = 1, bclst%size()
288 bc_i => bclst%get(i)
289 call this%bc_reg%mark_facets(bc_i%marked_facet)
290 bc_i => bclst%get(i)
291 call this%bc_crs%mark_facets(bc_i%marked_facet)
292 bc_i => bclst%get(i)
293 call this%bc_mg%mark_facets(bc_i%marked_facet)
294 end do
295 end if
296 call this%bc_reg%finalize()
297 call this%bc_crs%finalize()
298 call this%bc_mg%finalize()
299
300 call this%bclst_reg%init()
301 call this%bclst_crs%init()
302 call this%bclst_mg%init()
303
304 call this%bclst_reg%append(this%bc_reg)
305 call this%bclst_crs%append(this%bc_crs)
306 call this%bclst_mg%append(this%bc_mg)
307
308 call this%schwarz%init(coef%Xh, coef%dof, coef%gs_h, &
309 this%bclst_reg, coef%msh)
310 call this%schwarz_mg%init(this%Xh_mg, this%dm_mg, this%gs_mg,&
311 this%bclst_mg, coef%msh)
312
313 call this%interp_fine_mid%init(coef%Xh, this%Xh_mg)
314 call this%interp_mid_crs%init(this%Xh_mg, this%Xh_crs)
315
316 call hsmg_fill_grid(coef%dof, coef%gs_h, coef%Xh, coef, &
317 this%bclst_reg, this%schwarz, this%e, this%grids, 3)
318 call hsmg_fill_grid(this%dm_mg, this%gs_mg, this%Xh_mg, this%c_mg, &
319 this%bclst_mg, this%schwarz_mg, this%e_mg, &
320 this%grids, 2)
321 call hsmg_fill_grid(this%dm_crs, this%gs_crs, this%Xh_crs, &
322 this%c_crs, this%bclst_crs, this%schwarz_crs, &
323 this%e_crs, this%grids, 1)
324
325 call hsmg_set_h(this)
326 if (neko_bcknd_device .eq. 1) then
327 call device_map(this%w, this%w_d, n)
328 call device_map(this%r, this%r_d, n)
329 end if
330
331 call device_event_create(this%hsmg_event, 2)
332 call device_event_create(this%gs_event, 2)
333
334
335
336 ! Create a backend specific krylov solver
337 if (trim(crs_solver) .eq. 'tamg') then
338 allocate(this%amg_solver)
339 call this%amg_solver%init(this%ax, this%grids(1)%e%Xh, &
340 this%grids(1)%coef, this%msh, this%grids(1)%gs_h, crs_tamg_lvls, &
341 this%grids(1)%bclst, crs_tamg_itrs, crs_tamg_cheby_degree)
342 else
343 ! Create a backend specific preconditioner
344 call precon_factory(this%pc_crs, crs_pc)
345
346 select type (pc => this%pc_crs)
347 type is (jacobi_t)
348 call pc%init(this%c_crs, this%dm_crs, this%gs_crs)
349 type is (sx_jacobi_t)
350 call pc%init(this%c_crs, this%dm_crs, this%gs_crs)
351 type is (device_jacobi_t)
352 call pc%init(this%c_crs, this%dm_crs, this%gs_crs)
353 end select
354
355 call krylov_solver_factory(this%crs_solver, &
356 this%dm_crs%size(), trim(crs_solver), ksp_max_iter, &
357 m = this%pc_crs, monitor = crs_monitor)
358 end if
359
360 call neko_log%end_section()
361
362 end subroutine hsmg_init_from_components
363
364 subroutine hsmg_set_h(this)
365 class(hsmg_t), intent(inout) :: this
366 ! integer :: i
367 ! Yeah I dont really know what to do here. For incompressible flow not
368 ! much happens
369 this%grids(1)%coef%ifh2 = .false.
370 call copy(this%grids(1)%coef%h1, this%grids(3)%coef%h1, &
371 this%grids(1)%dof%size())
372 if (neko_bcknd_device .eq. 1) then
373 call device_copy(this%grids(1)%coef%h1_d, this%grids(3)%coef%h1_d, &
374 this%grids(1)%dof%size())
375 end if
376 end subroutine hsmg_set_h
377
378
379 subroutine hsmg_fill_grid(dof, gs_h, Xh, coef, bclst, schwarz, e, grids, l)
380 type(dofmap_t), target, intent(in) :: dof
381 type(gs_t), target, intent(in) :: gs_h
382 type(space_t), target, intent(in) :: Xh
383 type(coef_t), target, intent(in) :: coef
384 type(bc_list_t), target, intent(in) :: bclst
385 type(schwarz_t), target, intent(in) :: schwarz
386 type(field_t), target, intent(in) :: e
387 integer, intent(in) :: l
388 type(multigrid_t), intent(inout), dimension(l) :: grids
389
390
391 grids(l)%dof => dof
392 grids(l)%gs_h => gs_h
393 grids(l)%Xh => xh
394 grids(l)%coef => coef
395 grids(l)%bclst => bclst
396 grids(l)%schwarz => schwarz
397 grids(l)%e => e
398
399 end subroutine hsmg_fill_grid
400
401 subroutine hsmg_free(this)
402 class(hsmg_t), intent(inout) :: this
403
404 if (allocated(this%ax)) then
405 deallocate(this%ax)
406 end if
407
408 if (allocated(this%grids)) then
409 deallocate(this%grids)
410 end if
411
412 if (allocated(this%w)) then
413 deallocate(this%w)
414 end if
415
416 if (allocated(this%r)) then
417 deallocate(this%r)
418 end if
419
420 if (c_associated(this%w_d)) then
421 call device_free(this%w_d)
422 end if
423
424 if (c_associated(this%r_d)) then
425 call device_free(this%r_d)
426 end if
427
428 call this%schwarz%free()
429 call this%schwarz_mg%free()
430
431 call this%c_crs%free()
432 call this%c_mg%free()
433 call this%e%free()
434 call this%e_mg%free()
435 call this%e_crs%free()
436 call this%wf%free()
437
438 call this%gs_crs%free()
439 call this%gs_mg%free()
440 call this%interp_mid_crs%free()
441 call this%interp_fine_mid%free()
442
443 call this%bc_crs%free()
444 call this%bc_mg%free()
445 call this%bc_reg%free()
446
447 call this%bclst_reg%free()
448 call this%bclst_crs%free()
449 call this%bclst_mg%free()
450
451 if (allocated(this%crs_solver)) then
452 call this%crs_solver%free()
453 deallocate(this%crs_solver)
454 end if
455
456 if (allocated(this%pc_crs)) then
457 call precon_destroy(this%pc_crs)
458 end if
459
460 if (c_associated(this%hsmg_event)) then
461 call device_event_destroy(this%hsmg_event)
462 end if
463 if (c_associated(this%gs_event)) then
464 call device_event_destroy(this%gs_event)
465 end if
466
467 call this%dm_crs%free()
468 call this%dm_mg%free()
469 call this%Xh_crs%free()
470 call this%Xh_mg%free()
471
472 end subroutine hsmg_free
473
475 subroutine hsmg_solve(this, z, r, n)
476 integer, intent(in) :: n
477 class(hsmg_t), intent(inout) :: this
478 real(kind=rp), dimension(n), intent(inout) :: z
479 real(kind=rp), dimension(n), intent(inout) :: r
480 type(c_ptr) :: z_d, r_d
481 type(ksp_monitor_t) :: crs_info
482 integer :: thrdid, nthrds
483
484 call profiler_start_region('HSMG_solve', 8)
485 if (neko_bcknd_device .eq. 1) then
486 z_d = device_get_ptr(z)
487 r_d = device_get_ptr(r)
488 !We should not work with the input
489 call device_copy(this%r_d, r_d, n)
490 call this%bclst_reg%apply_scalar(this%r, n)
491
492 !OVERLAPPING Schwarz exchange and solve
493 !! DOWNWARD Leg of V-cycle, we are pretty hardcoded here but w/e
494 call device_col2(this%r_d, this%grids(3)%coef%mult_d, &
495 this%grids(3)%dof%size())
496 !Restrict to middle level
497 call this%interp_fine_mid%map(this%e%x, this%r, &
498 this%msh%nelv, this%grids(2)%Xh)
499 call this%grids(2)%gs_h%op(this%e%x, &
500 this%grids(2)%dof%size(), gs_op_add, this%gs_event)
501 call device_event_sync(this%gs_event)
502 !This should probably be double checked again
503 call device_copy(this%r_d, r_d, n)
504 call this%bclst_reg%apply_scalar(this%r, n)
505 call device_copy(this%w_d, this%e%x_d, this%grids(2)%dof%size())
506 call this%bclst_mg%apply_scalar(this%w, this%grids(2)%dof%size())
507 !OVERLAPPING Schwarz exchange and solve
508 call device_col2(this%w_d, this%grids(2)%coef%mult_d, &
509 this%grids(2)%dof%size())
510 !restrict residual to crs
511 call this%interp_mid_crs%map(this%wf%x, this%w, this%msh%nelv, &
512 this%grids(1)%Xh)
513 !Crs solve
514 call device_copy(this%w_d, this%e%x_d, this%grids(2)%dof%size())
515 call this%bclst_mg%apply_scalar(this%w, this%grids(2)%dof%size())
516
517 !$omp parallel private(thrdid, nthrds)
518
519 thrdid = 0
520 nthrds = 1
521 !$ thrdid = omp_get_thread_num()
522 !$ nthrds = omp_get_num_threads()
523
524 if (thrdid .eq. 0) then
525 call profiler_start_region('HSMG_schwarz', 9)
526 call this%grids(3)%schwarz%compute(z, this%r)
527 call this%grids(2)%schwarz%compute(this%grids(2)%e%x, this%w)
528 call profiler_end_region('HSMG_schwarz', 9)
529 end if
530 if (nthrds .eq. 1 .or. thrdid .eq. 1) then
531 call profiler_start_region('HSMG_coarse_grid', 10)
532 call this%grids(1)%gs_h%op(this%wf%x, &
533 this%grids(1)%dof%size(), gs_op_add, this%gs_event)
534 call device_event_sync(this%gs_event)
535 call this%grids(1)%bclst%apply_scalar(this%wf%x, &
536 this%grids(1)%dof%size())
537 call profiler_start_region('HSMG_coarse_solve', 11)
538 if (allocated(this%amg_solver)) then
539 call this%amg_solver%solve(this%grids(1)%e%x, this%wf%x, &
540 this%grids(1)%dof%size())
541 else
542 crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, &
543 this%wf%x, &
544 this%grids(1)%dof%size(), &
545 this%grids(1)%coef, &
546 this%grids(1)%bclst, &
547 this%grids(1)%gs_h, this%niter)
548 end if
549 call profiler_end_region('HSMG_coarse_solve', 11)
550 call this%grids(1)%bclst%apply_scalar(this%grids(1)%e%x,&
551 this%grids(1)%dof%size())
552 call profiler_end_region('HSMG_coarse_grid', 10)
553 end if
554 !$omp end parallel
555
556 call this%interp_mid_crs%map(this%w, this%grids(1)%e%x, &
557 this%msh%nelv, this%grids(2)%Xh)
558 call device_add2(this%grids(2)%e%x_d, this%w_d, this%grids(2)%dof%size())
559
560 call this%interp_fine_mid%map(this%w, this%grids(2)%e%x, &
561 this%msh%nelv, this%grids(3)%Xh)
562 call device_add2(z_d, this%w_d, this%grids(3)%dof%size())
563 call this%grids(3)%gs_h%op(z, this%grids(3)%dof%size(), &
564 gs_op_add, this%gs_event)
565 call device_event_sync(this%gs_event)
566 call device_col2(z_d, this%grids(3)%coef%mult_d, &
567 this%grids(3)%dof%size())
568 else
569 !We should not work with the input
570 call copy(this%r, r, n)
571
572 !OVERLAPPING Schwarz exchange and solve
573 call this%grids(3)%schwarz%compute(z, this%r)
574 ! DOWNWARD Leg of V-cycle, we are pretty hardcoded here but w/e
575 call col2(this%r, this%grids(3)%coef%mult, &
576 this%grids(3)%dof%size())
577 !Restrict to middle level
578 call this%interp_fine_mid%map(this%w, this%r, &
579 this%msh%nelv, this%grids(2)%Xh)
580 call this%grids(2)%gs_h%op(this%w, this%grids(2)%dof%size(), gs_op_add)
581 !OVERLAPPING Schwarz exchange and solve
582 call this%grids(2)%schwarz%compute(this%grids(2)%e%x, this%w)
583 call col2(this%w, this%grids(2)%coef%mult, this%grids(2)%dof%size())
584 !restrict residual to crs
585 call this%interp_mid_crs%map(this%r, this%w, &
586 this%msh%nelv, this%grids(1)%Xh)
587 !Crs solve
588
589 call this%grids(1)%gs_h%op(this%r, this%grids(1)%dof%size(), gs_op_add)
590 call this%grids(1)%bclst%apply(this%r, this%grids(1)%dof%size())
591
592 call profiler_start_region('HSMG_coarse-solve', 11)
593 if (allocated(this%amg_solver)) then
594 call this%amg_solver%solve(this%grids(1)%e%x, this%r, &
595 this%grids(1)%dof%size())
596 else
597 crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, this%r, &
598 this%grids(1)%dof%size(), &
599 this%grids(1)%coef, &
600 this%grids(1)%bclst, &
601 this%grids(1)%gs_h, this%niter)
602 end if
603 call profiler_end_region('HSMG_coarse-solve', 11)
604
605 call this%grids(1)%bclst%apply_scalar(this%grids(1)%e%x, &
606 this%grids(1)%dof%size())
607
608
609 call this%interp_mid_crs%map(this%w, this%grids(1)%e%x, &
610 this%msh%nelv, this%grids(2)%Xh)
611 call add2(this%grids(2)%e%x, this%w, this%grids(2)%dof%size())
612
613 call this%interp_fine_mid%map(this%w, this%grids(2)%e%x, &
614 this%msh%nelv, this%grids(3)%Xh)
615 call add2(z, this%w, this%grids(3)%dof%size())
616 call this%grids(3)%gs_h%op(z, this%grids(3)%dof%size(), gs_op_add)
617 call col2(z, this%grids(3)%coef%mult, this%grids(3)%dof%size())
618
619 end if
620 call profiler_end_region('HSMG_solve', 8)
621 end subroutine hsmg_solve
622end module hsmg
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Retrieves a parameter by name or assigns a provided default value. In the latter case also adds the m...
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines a boundary condition.
Definition bc.f90:34
Coefficients.
Definition coef.f90:34
Jacobi preconditioner accelerator backend.
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1279
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1249
Defines a dirichlet boundary condition.
Definition dirichlet.f90:34
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Defines a field.
Definition field.f90:34
Gather-scatter.
Krylov preconditioner.
Definition pc_hsmg.f90:61
subroutine hsmg_init(this, coef, bclst, hsmg_params)
Definition pc_hsmg.f90:146
subroutine hsmg_solve(this, z, r, n)
The h1mg preconditioner from Nek5000.
Definition pc_hsmg.f90:476
subroutine hsmg_set_h(this)
Definition pc_hsmg.f90:365
subroutine hsmg_free(this)
Definition pc_hsmg.f90:402
subroutine hsmg_fill_grid(dof, gs_h, xh, coef, bclst, schwarz, e, grids, l)
Definition pc_hsmg.f90:380
subroutine hsmg_init_from_components(this, coef, bclst, crs_solver, crs_pc, crs_monitor, crs_tamg_lvls, crs_tamg_itrs, crs_tamg_cheby_degree)
Definition pc_hsmg.f90:191
Routines to interpolate between different spaces.
Jacobi preconditioner.
Definition pc_jacobi.f90:34
Utilities for retrieving parameters from the case files.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:70
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:107
Overlapping schwarz solves.
Definition schwarz.f90:61
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
Jacobi preconditioner SX-Aurora backend.
Implements multigrid using the TreeAMG hierarchy structure. USE:
Utilities.
Definition utils.f90:35
Defines a zero-valued Dirichlet boundary condition.
Base type for a matrix-vector product providing .
Definition ax.f90:43
Base type for a boundary condition.
Definition bc.f90:61
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Defines a jacobi preconditioner.
Generic Dirichlet boundary condition on .
Definition dirichlet.f90:48
Interpolation between two space::space_t.
Defines a jacobi preconditioner.
Definition pc_jacobi.f90:45
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:63
Defines a jacobi preconditioner for SX-Aurora.
Type for the TreeAMG solver.
Zero-valued Dirichlet boundary condition. Used for no-slip walls, but also for various auxillary cond...
#define max(a, b)
Definition tensor.cu:40