Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg_coupled.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cg_cpld
35 use num_types, only: rp, xp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc2, abscmp
45 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
46 use utils, only : neko_error
47 use operators, only : rotate_cyc
48 implicit none
49 private
50
52 type, public, extends(ksp_t) :: cg_cpld_t
53 real(kind=rp), allocatable :: w1(:)
54 real(kind=rp), allocatable :: w2(:)
55 real(kind=rp), allocatable :: w3(:)
56 real(kind=rp), allocatable :: r1(:)
57 real(kind=rp), allocatable :: r2(:)
58 real(kind=rp), allocatable :: r3(:)
59 real(kind=rp), allocatable :: p1(:)
60 real(kind=rp), allocatable :: p2(:)
61 real(kind=rp), allocatable :: p3(:)
62 real(kind=rp), allocatable :: z1(:)
63 real(kind=rp), allocatable :: z2(:)
64 real(kind=rp), allocatable :: z3(:)
65 real(kind=rp), allocatable :: tmp(:)
66 contains
67 procedure, pass(this) :: init => cg_cpld_init
68 procedure, pass(this) :: free => cg_cpld_free
69 procedure, pass(this) :: solve => cg_cpld_nop
70 procedure, pass(this) :: solve_coupled => cg_cpld_solve
71 end type cg_cpld_t
72
73contains
74
76 subroutine cg_cpld_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
77 class(cg_cpld_t), target, intent(inout) :: this
78 integer, intent(in) :: max_iter
79 class(pc_t), optional, intent(in), target :: M
80 integer, intent(in) :: n
81 real(kind=rp), optional, intent(in) :: rel_tol
82 real(kind=rp), optional, intent(in) :: abs_tol
83 logical, optional, intent(in) :: monitor
84
85 call this%free()
86
87 allocate(this%w1(n))
88 allocate(this%w2(n))
89 allocate(this%w3(n))
90 allocate(this%r1(n))
91 allocate(this%r2(n))
92 allocate(this%r3(n))
93 allocate(this%p1(n))
94 allocate(this%p2(n))
95 allocate(this%p3(n))
96 allocate(this%z1(n))
97 allocate(this%z2(n))
98 allocate(this%z3(n))
99 allocate(this%tmp(n))
100
101 if (present(m)) then
102 this%M => m
103 end if
104
105 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
106 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
107 else if (present(rel_tol) .and. present(abs_tol)) then
108 call this%ksp_init(max_iter, rel_tol, abs_tol)
109 else if (present(monitor) .and. present(abs_tol)) then
110 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
111 else if (present(rel_tol) .and. present(monitor)) then
112 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
113 else if (present(rel_tol)) then
114 call this%ksp_init(max_iter, rel_tol = rel_tol)
115 else if (present(abs_tol)) then
116 call this%ksp_init(max_iter, abs_tol = abs_tol)
117 else if (present(monitor)) then
118 call this%ksp_init(max_iter, monitor = monitor)
119 else
120 call this%ksp_init(max_iter)
121 end if
122
123 end subroutine cg_cpld_init
124
126 subroutine cg_cpld_free(this)
127 class(cg_cpld_t), intent(inout) :: this
128
129 call this%ksp_free()
130
131 if (allocated(this%w1)) then
132 deallocate(this%w1)
133 end if
134
135 if (allocated(this%w2)) then
136 deallocate(this%w2)
137 end if
138
139 if (allocated(this%w3)) then
140 deallocate(this%w3)
141 end if
142
143 if (allocated(this%r1)) then
144 deallocate(this%r1)
145 end if
146
147 if (allocated(this%r2)) then
148 deallocate(this%r2)
149 end if
150
151 if (allocated(this%r3)) then
152 deallocate(this%r3)
153 end if
154
155 if (allocated(this%p1)) then
156 deallocate(this%p1)
157 end if
158
159 if (allocated(this%p2)) then
160 deallocate(this%p2)
161 end if
162
163 if (allocated(this%p3)) then
164 deallocate(this%p3)
165 end if
166
167 if (allocated(this%z1)) then
168 deallocate(this%z1)
169 end if
170
171 if (allocated(this%z2)) then
172 deallocate(this%z2)
173 end if
174
175 if (allocated(this%z3)) then
176 deallocate(this%z3)
177 end if
178
179 if (allocated(this%tmp)) then
180 deallocate(this%tmp)
181 end if
182
183 nullify(this%M)
184
185 end subroutine cg_cpld_free
186
187 function cg_cpld_nop(this, Ax, x, f, n, coef, blst, gs_h, niter) &
188 result(ksp_results)
189 class(cg_cpld_t), intent(inout) :: this
190 class(ax_t), intent(in) :: ax
191 type(field_t), intent(inout) :: x
192 integer, intent(in) :: n
193 real(kind=rp), dimension(n), intent(in) :: f
194 type(coef_t), intent(inout) :: coef
195 type(bc_list_t), intent(inout) :: blst
196 type(gs_t), intent(inout) :: gs_h
197 type(ksp_monitor_t) :: ksp_results
198 integer, optional, intent(in) :: niter
199
200 ! Throw and error
201 call neko_error('The cpldcg solver is only defined for coupled solves')
202
203 ksp_results%res_final = 0.0
204 ksp_results%iter = 0
205 end function cg_cpld_nop
206
208 function cg_cpld_solve(this, Ax, x, y, z, fx, fy, fz, &
209 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
210 class(cg_cpld_t), intent(inout) :: this
211 class(ax_t), intent(in) :: ax
212 type(field_t), intent(inout) :: x
213 type(field_t), intent(inout) :: y
214 type(field_t), intent(inout) :: z
215 integer, intent(in) :: n
216 real(kind=rp), dimension(n), intent(in) :: fx
217 real(kind=rp), dimension(n), intent(in) :: fy
218 real(kind=rp), dimension(n), intent(in) :: fz
219 type(coef_t), intent(inout) :: coef
220 type(bc_list_t), intent(inout) :: blstx
221 type(bc_list_t), intent(inout) :: blsty
222 type(bc_list_t), intent(inout) :: blstz
223 type(gs_t), intent(inout) :: gs_h
224 type(ksp_monitor_t), dimension(3) :: ksp_results
225 integer, optional, intent(in) :: niter
226 integer :: i, iter, max_iter, ierr
227 real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
228 real(kind=rp) :: beta, pap, alpha, norm_fac
229 real(kind=xp) :: tmp_xp, r1_xp, r2_xp, r3_xp, mult_xp
230
231 if (present(niter)) then
232 max_iter = niter
233 else
234 max_iter = this%max_iter
235 end if
236 norm_fac = 1.0_rp / sqrt(coef%volume)
237
238 associate(p1 => this%p1, p2 => this%p2, p3 => this%p3, z1 => this%z1, &
239 z2 => this%z2, z3 => this%z3, r1 => this%r1, r2 => this%r2, &
240 r3 => this%r3, tmp => this%tmp, w1 => this%w1, w2 => this%w2, &
241 w3 => this%w3)
242
243 rtz1 = 1.0_rp
244 do concurrent(i = 1:n)
245 x%x(i,1,1,1) = 0.0_rp
246 y%x(i,1,1,1) = 0.0_rp
247 z%x(i,1,1,1) = 0.0_rp
248 p1(i) = 0.0_rp
249 p2(i) = 0.0_rp
250 p3(i) = 0.0_rp
251 z1(i) = 0.0_rp
252 z2(i) = 0.0_rp
253 z3(i) = 0.0_rp
254 r1(i) = fx(i)
255 r2(i) = fy(i)
256 r3(i) = fz(i)
257 tmp(i) = r1(i)**2 + r2(i)**2 + r3(i)**2
258 end do
259
260 rtr = glsc2(tmp, coef%mult, n)
261 rnorm = sqrt(rtr)*norm_fac
262 ksp_results%res_start = rnorm
263 ksp_results%res_final = rnorm
264 ksp_results%iter = 0
265 if (abscmp(rnorm, 0.0_rp)) then
266 ksp_results%converged = .true.
267 return
268 end if
269
270 call this%monitor_start('cpldCG')
271 do iter = 1, max_iter
272 call this%M%solve(z1, this%r1, n)
273 call this%M%solve(z2, this%r2, n)
274 call this%M%solve(z3, this%r3, n)
275 rtz2 = rtz1
276
277 do concurrent(i = 1:n)
278 this%tmp(i) = z1(i) * r1(i) &
279 + z2(i) * r2(i) &
280 + z3(i) * r3(i)
281 end do
282
283 rtz1 = glsc2(tmp, coef%mult, n)
284
285 beta = rtz1 / rtz2
286 if (iter .eq. 1) beta = 0.0_rp
287 do concurrent(i = 1:n)
288 p1(i) = p1(i) * beta + z1(i)
289 p2(i) = p2(i) * beta + z2(i)
290 p3(i) = p3(i) * beta + z3(i)
291 end do
292
293 call ax%compute_vector(w1, w2, w3, p1, p2, p3, coef, x%msh, x%Xh)
294
295 call rotate_cyc(w1, w2, w3, 1, coef)
296 call gs_h%op(w1, n, gs_op_add)
297 call gs_h%op(w2, n, gs_op_add)
298 call gs_h%op(w3, n, gs_op_add)
299 call rotate_cyc(w1, w2, w3, 0, coef)
300
301 call blstx%apply_scalar(w1, n)
302 call blsty%apply_scalar(w2, n)
303 call blstz%apply_scalar(w3, n)
304
305 do concurrent(i = 1:n)
306 tmp(i) = w1(i) * p1(i) &
307 + w2(i) * p2(i) &
308 + w3(i) * p3(i)
309 end do
310
311 pap = glsc2(tmp, coef%mult, n)
312
313 alpha = rtz1 / pap
314 do concurrent(i = 1:n)
315 x%x(i,1,1,1) = x%x(i,1,1,1) + alpha * p1(i)
316 y%x(i,1,1,1) = y%x(i,1,1,1) + alpha * p2(i)
317 z%x(i,1,1,1) = z%x(i,1,1,1) + alpha * p3(i)
318 end do
319
320 tmp = 0.0_rp
321 tmp_xp = 0.0_xp
322 !$omp parallel do reduction(+:tmp_xp)
323 do i = 1, n
324 r1(i) = r1(i) - alpha * w1(i)
325 r2(i) = r2(i) - alpha * w2(i)
326 r3(i) = r3(i) - alpha * w3(i)
327 r1_xp = real(r1(i), kind=xp)
328 r2_xp = real(r2(i), kind=xp)
329 r3_xp = real(r3(i), kind=xp)
330 mult_xp = real(coef%mult(i,1,1,1), kind=xp)
331 tmp_xp = tmp_xp + &
332 (r1_xp * r1_xp + r2_xp * r2_xp + r3_xp * r3_xp) * mult_xp
333 end do
334 !$omp end parallel do
335
336 call mpi_allreduce(mpi_in_place, tmp_xp, 1, mpi_extra_precision, &
337 mpi_sum, neko_comm, ierr)
338 rtr = tmp_xp
339
340 if (iter .eq. 1) rtr0 = rtr
341 rnorm = sqrt(rtr) * norm_fac
342 call this%monitor_iter(iter, rnorm)
343 if (rnorm .lt. this%abs_tol) then
344 exit
345 end if
346 end do
347 end associate
348 call this%monitor_stop()
349 ksp_results%res_final = rnorm
350 ksp_results%iter = iter
351 ksp_results%converged = this%is_converged(iter, rnorm)
352 end function cg_cpld_solve
353
354end module cg_cpld
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
double real
Apply cyclic boundary condition to a vector field.
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines a coupled Conjugate Gradient methods.
type(ksp_monitor_t) function, dimension(3) cg_cpld_solve(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Coupled PCG solve.
type(ksp_monitor_t) function cg_cpld_nop(this, ax, x, f, n, coef, blst, gs_h, niter)
subroutine cg_cpld_free(this)
Deallocate a coupled PCG solver.
subroutine cg_cpld_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a coupled PCG solver.
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:53
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1264
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Coupled preconditioned conjugate gradient method.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40