Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, &
80 c_associated
81 !$ use omp_lib
82 implicit none
83 private
84
85 type, public :: schwarz_t
86 real(kind=rp), allocatable :: work1(:)
87 real(kind=rp), allocatable :: work2(:)
88 real(kind=rp), allocatable :: wt(:,:,:,:,:)
89 type(c_ptr) :: work1_d = c_null_ptr
90 type(c_ptr) :: work2_d = c_null_ptr
91 type(c_ptr) :: wt_d = c_null_ptr
92 type(space_t) :: xh_schwarz
93 type(gs_t) :: gs_schwarz
94 type(dofmap_t) :: dm_schwarz
95 type(fdm_t) :: fdm
96 type(space_t), pointer :: xh => null()
97 type(bc_list_t), pointer :: bclst => null()
98 type(dofmap_t), pointer :: dof => null()
99 type(gs_t), pointer :: gs_h => null()
100 type(mesh_t), pointer :: msh => null()
101 type(c_ptr) :: event = c_null_ptr
102 logical :: local_gs = .false.
103 type(gs_t), allocatable :: gs_h_local
104 contains
105 procedure, pass(this) :: init => schwarz_init
106 procedure, pass(this) :: free => schwarz_free
107 procedure, pass(this) :: compute => schwarz_compute
108 end type schwarz_t
109
110contains
111
112 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
113 class(schwarz_t), target, intent(inout) :: this
114 type(space_t), target, intent(inout) :: Xh
115 type(dofmap_t), target, intent(in) :: dof
116 type(gs_t), target, intent(inout) :: gs_h
117 type(mesh_t), target, intent(inout) :: msh
118 type(bc_list_t), target, intent(inout) :: bclst
119 integer :: nthrds
120
121 call this%free()
122
123 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
126
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
130
131 call this%fdm%init(xh, dof, gs_h)
132
133
134 this%msh => msh
135 this%Xh => xh
136 this%bclst => bclst
137 this%dof => dof
138
139 nthrds = 1
140 !$omp parallel
141 !$ nthrds = omp_get_num_threads()
142 !$omp end parallel
143
144 ! If we are running multithreaded, we need a local gs object,
145 ! otherwise we can reuse the external one
146 if (nthrds .gt. 1) then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
151 else
152 this%gs_h => gs_h
153 this%local_gs = .false.
154 end if
155
156 if (neko_bcknd_device .eq. 1) then
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
159 end if
160
161 call schwarz_setup_wt(this)
162 if (neko_bcknd_device .eq. 1) then
163 call device_alloc(this%wt_d, &
164 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
165 call rone(this%work1, this%dof%size())
166 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
167 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
168 host_to_device, sync = .false.)
169 call device_event_create(this%event, 2)
170 end if
171 end subroutine schwarz_init
172
173 subroutine schwarz_free(this)
174 class(schwarz_t), intent(inout) :: this
175
176 if (allocated(this%work1)) deallocate(this%work1)
177 if (allocated(this%work2)) deallocate(this%work2)
178 if (allocated(this%wt)) deallocate(this%wt)
179
180 if (c_associated(this%work1_d)) then
181 call device_free(this%work1_d)
182 end if
183
184 if (c_associated(this%work2_d)) then
185 call device_free(this%work2_d)
186 end if
187
188 if (c_associated(this%wt_d)) then
189 call device_free(this%wt_d)
190 end if
191
192 call this%Xh_schwarz%free()
193 call this%gs_schwarz%free()
194 !why cant I do this?
195 !call dofmap_free(this%dm_schwarz)
196 call this%dm_schwarz%free()
197 call this%fdm%free()
198
199 nullify(this%Xh)
200 nullify(this%bclst)
201 nullify(this%dof)
202 if (allocated(this%gs_h_local)) then
203 call this%gs_h_local%free()
204 deallocate(this%gs_h_local)
205 end if
206 nullify(this%gs_h)
207 nullify(this%msh)
208
209 this%local_gs = .false.
210 if (c_associated(this%event)) then
211 call device_event_destroy(this%event)
212 end if
213 end subroutine schwarz_free
215 subroutine schwarz_setup_wt(this)
216 class(schwarz_t), intent(inout) :: this
217 integer :: enx, eny, enz, n, ie, k, ns
218 real(kind=rp), parameter :: zero = 0.0_rp
219 real(kind=rp), parameter :: one = 1.0_rp
220 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
221 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
222
223 n = this%dof%size()
224
225 enx = xh_schwarz%lx
226 eny = xh_schwarz%ly
227 enz = xh_schwarz%lz
228 if (.not. msh%gdim .eq. 3) enz = 1
229 ns = enx * eny * enz * msh%nelv
230
231 call rone(work2, ns)
232 call rzero(work1, ns)
233
234 ! Sum overlap region (border excluded)
235 ! Cred to PFF for this, very clever
236 call schwarz_extrude(work1, 0, zero, work2, 0, one, enx, eny, enz, &
237 msh%nelv)
238 if (neko_bcknd_device .eq. 1) then
239 call device_memcpy(work2, this%work2_d, ns, &
240 host_to_device, sync = .false.)
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
242 call device_memcpy(work2, this%work2_d, ns, &
243 device_to_host, sync = .true.)
244 else
245 call this%gs_schwarz%op(work2, ns, gs_op_add)
246 end if
247 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, &
248 msh%nelv)
249 call schwarz_extrude_single(work2, 2, one, 0, one, enx, eny, enz, &
250 msh%nelv)
251
252 ! if(.not.if3d) then ! Go back to regular size array
253 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
254 ! else
255 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
256 ! endif
257
258 if (neko_bcknd_device .eq. 1) then
259 call device_memcpy(work1, this%work1_d, n, &
260 host_to_device, sync = .false.)
261 call this%gs_h%op(work1, n, gs_op_add)
262 call device_memcpy(work1, this%work1_d, n, &
263 device_to_host, sync = .true.)
264 else
265 call this%gs_h%op(work1, n, gs_op_add)
266 end if
267
268 k = 1
269 do ie = 1, msh%nelv
270 if (msh%gdim .eq. 2) then
271 call schwarz_setup_schwarz_wt2d_2(this%wt, ie, xh%lx, &
272 work1(k), msh%nelv)
273 end if
274 if (this%msh%gdim .eq. 3) then
275 call schwarz_setup_schwarz_wt3d_2(this%wt, ie, xh%lx, &
276 work1(k), msh%nelv)
277 k = k + xh%lxyz
278 end if
279 end do
280 end associate
281 end subroutine schwarz_setup_wt
282
284 subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
285 integer, intent(in) :: n, nelv
286 real(kind=rp), intent(inout) :: wt(n, 4, 2, nelv)
287 real(kind=rp), intent(inout) :: work(n, n)
288 integer :: ie, i, j
289 do j = 1, n
290 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
291 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
292 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
293 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
294 end do
295 do i = 1, n
296 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
297 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
298 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
299 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
300 end do
301
302 return
303 end subroutine schwarz_setup_schwarz_wt2d_2
304
306 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
307 integer, intent(in) :: n, nelv, ie
308 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
309 real(kind=rp), intent(inout) :: work(n, n, n)
310 integer :: i, j, k
311
312 do k = 1, n
313 do j = 1, n
314 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
315 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
316 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
317 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
318 end do
319 end do
320
321 do k = 1, n
322 do i = 1, n
323 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
324 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
325 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
326 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
327 end do
328 end do
329
330 do j = 1, n
331 do i = 1, n
332 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
333 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
334 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
335 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
336 end do
337 end do
338
339 end subroutine schwarz_setup_schwarz_wt3d_2
340
342 subroutine schwarz_toreg3d(b, a, n, nelv)
343 integer, intent(in) :: n, nelv
344 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
345 real(kind=rp), intent(inout) :: b(n, n, n, nelv)
346 integer :: i, j, k, ie
347 !$omp parallel do private(ie, i, j, k)
348 do ie = 1, nelv
349 do k = 1, n
350 do j = 1, n
351 do i = 1, n
352 b(i, j, k, ie) = a(i, j, k, ie)
353 end do
354 end do
355 end do
356 end do
357 !$omp end parallel do
358 end subroutine schwarz_toreg3d
359
361 subroutine schwarz_toext3d(a, b, n, nelv)
362 integer, intent(in) :: n, nelv
363 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv), &
364 b(n, n, n, nelv)
365 integer :: i, j, k, ie
366
367 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
368 !$omp parallel do private(ie, i, j, k)
369 do ie = 1, nelv
370 do k = 1, n
371 do j = 1, n
372 do i = 1, n
373 a(i, j, k, ie) = b(i, j, k, ie)
374 end do
375 end do
376 end do
377 end do
378 !$omp end parallel do
379 end subroutine schwarz_toext3d
380
386 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
387 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
388 real(kind=rp), intent(inout) :: arr1(nx, ny, nz, nelv)
389 real(kind=rp), intent(in) :: arr2(nx, ny, nz, nelv)
390 real(kind=rp), intent(in) :: f1, f2
391 integer :: i, j, k, ie, i0, i1
392 i0 = 2
393 i1 = nx - 1
394
395 if (nz .eq. 1) then
396 !$omp parallel do private(ie, i, j)
397 do ie = 1, nelv
398 do j = i0, i1
399 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
400 + f2 * arr2(l2 + 1, j, 1, ie)
401 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
402 + f2 * arr2(nx - l2, j, 1, ie)
403 end do
404 do i = i0, i1
405 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
406 + f2 * arr2(i, l2 + 1, 1, ie)
407 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
408 + f2 * arr2(i, nx - l2, 1, ie)
409 end do
410 end do
411 !$omp end parallel do
412 else
413 !$omp parallel do private(ie, i, j, k)
414 do ie = 1, nelv
415 do k = i0, i1
416 do j = i0, i1
417 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
418 + f2 * arr2(l2 + 1, j, k, ie)
419 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
420 + f2 * arr2(nx - l2, j, k, ie)
421 end do
422 end do
423 do k = i0, i1
424 do i = i0, i1
425 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
426 + f2 * arr2(i, l2 + 1, k, ie)
427 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
428 + f2 * arr2(i, nx - l2, k, ie)
429 end do
430 end do
431 do j = i0, i1
432 do i = i0, i1
433 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
434 + f2 * arr2(i, j, l2 + 1, ie)
435 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
436 + f2 * arr2(i, j, nx - l2, ie)
437 end do
438 end do
439 end do
440 !$omp end parallel do
441 end if
442 end subroutine schwarz_extrude
443
447 subroutine schwarz_extrude_single(arr, l1, f1, l2, f2, nx, ny, nz, nelv)
448 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
449 real(kind=rp), intent(inout) :: arr(nx, ny, nz, nelv)
450 real(kind=rp), intent(in) :: f1, f2
451 integer :: i, j, k, ie, i0, i1
452 i0 = 2
453 i1 = nx - 1
454
455 if (nz .eq. 1) then
456 !$omp parallel do private(ie, i, j)
457 do ie = 1, nelv
458 do j = i0, i1
459 arr(l1 + 1, j, 1, ie) = f1 * arr(l1 + 1, j, 1, ie) &
460 + f2 * arr(l2 + 1, j, 1, ie)
461 arr(nx - l1, j, 1, ie) = f1 * arr(nx - l1, j, 1, ie) &
462 + f2 * arr(nx - l2, j, 1, ie)
463 end do
464 do i = i0, i1
465 arr(i, l1 + 1, 1, ie) = f1 * arr(i, l1 + 1, 1, ie) &
466 + f2 * arr(i, l2 + 1, 1, ie)
467 arr(i, ny - l1, 1, ie) = f1 * arr(i, ny - l1, 1, ie) &
468 + f2 * arr(i, nx - l2, 1, ie)
469 end do
470 end do
471 !$omp end parallel do
472 else
473 !$omp parallel do private(ie, i, j, k)
474 do ie = 1, nelv
475 do k = i0, i1
476 do j = i0, i1
477 arr(l1 + 1, j, k, ie) = f1 * arr(l1 + 1, j, k, ie) &
478 + f2 * arr(l2 + 1, j, k, ie)
479 arr(nx - l1, j, k, ie) = f1 * arr(nx - l1, j, k, ie) &
480 + f2 * arr(nx - l2, j, k, ie)
481 end do
482 end do
483 do k = i0, i1
484 do i = i0, i1
485 arr(i, l1 + 1, k, ie) = f1 * arr(i, l1 + 1, k, ie) &
486 + f2 * arr(i, l2 + 1, k, ie)
487 arr(i, nx - l1, k, ie) = f1 * arr(i, nx - l1, k, ie) &
488 + f2 * arr(i, nx - l2, k, ie)
489 end do
490 end do
491 do j = i0, i1
492 do i = i0, i1
493 arr(i, j, l1 + 1, ie) = f1 * arr(i, j, l1 + 1, ie) &
494 + f2 * arr(i, j, l2 + 1, ie)
495 arr(i, j, nx - l1, ie) = f1 * arr(i, j, nx - l1, ie) &
496 + f2 * arr(i, j, nx - l2, ie)
497 end do
498 end do
499 end do
500 !$omp end parallel do
501 end if
502 end subroutine schwarz_extrude_single
503
504 subroutine schwarz_compute(this, e, r)
505 class(schwarz_t), intent(inout) :: this
506 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
507 integer :: n, enx, eny, enz, ns
508 real(kind=rp), parameter :: zero = 0.0_rp
509 real(kind=rp), parameter :: one = 1.0_rp
510 type(c_ptr) :: e_d, r_d
511 associate(work1 => this%work1, work1_d => this%work1_d, &
512 work2 => this%work2, work2_d => this%work2_d)
513
514 n = this%dof%size()
515 enx = this%Xh_schwarz%lx
516 eny = this%Xh_schwarz%ly
517 enz = this%Xh_schwarz%lz
518 if (.not. this%msh%gdim .eq. 3) enz = 1
519 ns = enx * eny * enz * this%msh%nelv
520 if (neko_bcknd_device .eq. 1) then
521 r_d = device_get_ptr(r)
522 e_d = device_get_ptr(e)
523 call device_event_record(this%event, glb_cmd_queue)
524 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
525 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
526 this%msh%nelv, aux_cmd_queue)
527 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
528 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
529
530 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
531 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
532 call device_event_sync(this%event)
533 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
534 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
535
536 call this%fdm%compute(work2, work1, aux_cmd_queue) ! do local solves
537
538 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
539 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
540 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
541 call device_event_sync(this%event)
542
543 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
544 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
545 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
546 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
547 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
548 this%msh%nelv, aux_cmd_queue)
549
550 this%gs_h%bcknd%gs_stream = aux_cmd_queue
551 call this%gs_h%op(e, n, gs_op_add, this%event)
552
553 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
554 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
555
556 ! switch back to the default stream on the shared gs
557 if (.not. this%local_gs) then
558 call device_event_sync(this%event)
559 this%gs_h%bcknd%gs_stream = glb_cmd_queue
560 end if
561 else
562 call this%bclst%apply_scalar(r, n)
563 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
564
565 ! exchange interior nodes
566 call schwarz_extrude_single(work1, 0, zero, 2, one, &
567 enx, eny, enz, this%msh%nelv)
568 call this%gs_schwarz%op(work1, ns, gs_op_add)
569 call schwarz_extrude_single(work1, 0, one, 2, -one, &
570 enx, eny, enz, this%msh%nelv)
571
572 call this%fdm%compute(work2, work1) ! do local solves
573
574 ! Sum overlap region (border excluded)
575 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
576 enx, eny, enz, this%msh%nelv)
577 call this%gs_schwarz%op(work2, ns, gs_op_add)
578 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
579 enx, eny, enz, this%msh%nelv)
580 call schwarz_extrude_single(work2, 2, one, 0, one, &
581 enx, eny, enz, this%msh%nelv)
582
583 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
584
585 ! sum border nodes
586 call this%gs_h%op(e, n, gs_op_add)
587 call this%bclst%apply_scalar(e, n)
588
589 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
590 end if
591 end associate
592 end subroutine schwarz_compute
593
594 !Apply schwarz weights along the boundary of each element.
595 subroutine schwarz_wt3d(e, wt, n, nelv)
596 integer, intent(in) :: n, nelv
597 real(kind=rp), intent(inout) :: e(n, n, n, nelv)
598 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
599 integer :: ie, i, j, k
600
601 !$omp parallel do private(ie, i, j, k)
602 do ie = 1, nelv
603 do k = 1, n
604 do j = 1, n
605 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
606 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
607 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
608 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
609 end do
610 end do
611 do k = 1, n
612 do i = 3, n-2
613 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
614 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
615 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
616 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
617 end do
618 end do
619 do j = 3, n-2
620 do i = 3, n-2
621 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
622 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
623 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
624 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)
625 end do
626 end do
627 end do
628 !$omp end parallel do
629 end subroutine schwarz_wt3d
630end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1496
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1515
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1480
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:198
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1409
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1450
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:241
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:208
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:343
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:216
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:113
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:387
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:307
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:505
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:596
subroutine schwarz_extrude_single(arr, l1, f1, l2, f2, nx, ny, nz, nelv)
Same as schwarz_extrude but for the case when arr1 and arr2 are the same array. Avoids aliasing by us...
Definition schwarz.f90:448
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:362
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:285
subroutine schwarz_free(this)
Definition schwarz.f90:174
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:63