Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, &
80 c_associated
81 !$ use omp_lib
82 implicit none
83 private
84
85 type, public :: schwarz_t
86 real(kind=rp), allocatable :: work1(:)
87 real(kind=rp), allocatable :: work2(:)
88 real(kind=rp), allocatable :: wt(:,:,:,:,:)
89 type(c_ptr) :: work1_d = c_null_ptr
90 type(c_ptr) :: work2_d = c_null_ptr
91 type(c_ptr) :: wt_d = c_null_ptr
92 type(space_t) :: xh_schwarz
93 type(gs_t) :: gs_schwarz
94 type(dofmap_t) :: dm_schwarz
95 type(fdm_t) :: fdm
96 type(space_t), pointer :: xh => null()
97 type(bc_list_t), pointer :: bclst => null()
98 type(dofmap_t), pointer :: dof => null()
99 type(gs_t), pointer :: gs_h => null()
100 type(mesh_t), pointer :: msh => null()
101 type(c_ptr) :: event = c_null_ptr
102 logical :: local_gs = .false.
103 type(gs_t), allocatable :: gs_h_local
104 contains
105 procedure, pass(this) :: init => schwarz_init
106 procedure, pass(this) :: free => schwarz_free
107 procedure, pass(this) :: compute => schwarz_compute
108 end type schwarz_t
109
110contains
111
112 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
113 class(schwarz_t), target, intent(inout) :: this
114 type(space_t), target, intent(inout) :: Xh
115 type(dofmap_t), target, intent(in) :: dof
116 type(gs_t), target, intent(inout) :: gs_h
117 type(mesh_t), target, intent(inout) :: msh
118 type(bc_list_t), target, intent(inout) :: bclst
119 integer :: nthrds
120
121 call this%free()
122
123 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
126
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
130
131 call this%fdm%init(xh, dof, gs_h)
132
133
134 this%msh => msh
135 this%Xh => xh
136 this%bclst => bclst
137 this%dof => dof
138
139 nthrds = 1
140 !$omp parallel
141 !$ nthrds = omp_get_num_threads()
142 !$omp end parallel
143
144 ! If we are running multithreaded, we need a local gs object,
145 ! otherwise we can reuse the external one
146 if (nthrds .gt. 1) then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
151 else
152 this%gs_h => gs_h
153 this%local_gs = .false.
154 end if
155
156 if (neko_bcknd_device .eq. 1) then
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
159 end if
160
161 call schwarz_setup_wt(this)
162 if (neko_bcknd_device .eq. 1) then
163 call device_alloc(this%wt_d, &
164 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
165 call rone(this%work1, this%dof%size())
166 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
167 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
168 host_to_device, sync = .false.)
169 call device_event_create(this%event, 2)
170 end if
171 end subroutine schwarz_init
172
173 subroutine schwarz_free(this)
174 class(schwarz_t), intent(inout) :: this
175
176 if (allocated(this%work1)) then
177 if (neko_bcknd_device .eq. 1) then
178 call device_unmap(this%work1, this%work1_d)
179 end if
180 deallocate(this%work1)
181 end if
182 if (allocated(this%work2)) then
183 if (neko_bcknd_device .eq. 1) then
184 call device_unmap(this%work2, this%work2_d)
185 end if
186 deallocate(this%work2)
187 end if
188 if (allocated(this%wt)) deallocate(this%wt)
189
190 if (c_associated(this%wt_d)) then
191 call device_free(this%wt_d)
192 end if
193
194 call this%Xh_schwarz%free()
195 call this%gs_schwarz%free()
196 !why cant I do this?
197 !call dofmap_free(this%dm_schwarz)
198 call this%dm_schwarz%free()
199 call this%fdm%free()
200
201 nullify(this%Xh)
202 nullify(this%bclst)
203 nullify(this%dof)
204 if (allocated(this%gs_h_local)) then
205 call this%gs_h_local%free()
206 deallocate(this%gs_h_local)
207 end if
208 nullify(this%gs_h)
209 nullify(this%msh)
210
211 this%local_gs = .false.
212 if (c_associated(this%event)) then
213 call device_event_destroy(this%event)
214 end if
215 end subroutine schwarz_free
217 subroutine schwarz_setup_wt(this)
218 class(schwarz_t), intent(inout) :: this
219 integer :: enx, eny, enz, n, ie, k, ns
220 real(kind=rp), parameter :: zero = 0.0_rp
221 real(kind=rp), parameter :: one = 1.0_rp
222 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
223 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
224
225 n = this%dof%size()
226
227 enx = xh_schwarz%lx
228 eny = xh_schwarz%ly
229 enz = xh_schwarz%lz
230 if (.not. msh%gdim .eq. 3) enz = 1
231 ns = enx * eny * enz * msh%nelv
232
233 call rone(work2, ns)
234 call rzero(work1, ns)
235
236 ! Sum overlap region (border excluded)
237 ! Cred to PFF for this, very clever
238 call schwarz_extrude(work1, 0, zero, work2, 0, one, enx, eny, enz, &
239 msh%nelv)
240 if (neko_bcknd_device .eq. 1) then
241 call device_memcpy(work2, this%work2_d, ns, &
242 host_to_device, sync = .false.)
243 call this%gs_schwarz%op(work2, ns, gs_op_add)
244 call device_memcpy(work2, this%work2_d, ns, &
245 device_to_host, sync = .true.)
246 else
247 call this%gs_schwarz%op(work2, ns, gs_op_add)
248 end if
249 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, &
250 msh%nelv)
251 call schwarz_extrude_single(work2, 2, one, 0, one, enx, eny, enz, &
252 msh%nelv)
253
254 ! if(.not.if3d) then ! Go back to regular size array
255 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
256 ! else
257 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
258 ! endif
259
260 if (neko_bcknd_device .eq. 1) then
261 call device_memcpy(work1, this%work1_d, n, &
262 host_to_device, sync = .false.)
263 call this%gs_h%op(work1, n, gs_op_add)
264 call device_memcpy(work1, this%work1_d, n, &
265 device_to_host, sync = .true.)
266 else
267 call this%gs_h%op(work1, n, gs_op_add)
268 end if
269
270 k = 1
271 do ie = 1, msh%nelv
272 if (msh%gdim .eq. 2) then
273 call schwarz_setup_schwarz_wt2d_2(this%wt, ie, xh%lx, &
274 work1(k), msh%nelv)
275 end if
276 if (this%msh%gdim .eq. 3) then
277 call schwarz_setup_schwarz_wt3d_2(this%wt, ie, xh%lx, &
278 work1(k), msh%nelv)
279 k = k + xh%lxyz
280 end if
281 end do
282 end associate
283 end subroutine schwarz_setup_wt
284
286 subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
287 integer, intent(in) :: n, nelv
288 real(kind=rp), intent(inout) :: wt(n, 4, 2, nelv)
289 real(kind=rp), intent(inout) :: work(n, n)
290 integer :: ie, i, j
291 do j = 1, n
292 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
293 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
294 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
295 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
296 end do
297 do i = 1, n
298 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
299 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
300 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
301 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
302 end do
303
304 return
305 end subroutine schwarz_setup_schwarz_wt2d_2
306
308 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
309 integer, intent(in) :: n, nelv, ie
310 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
311 real(kind=rp), intent(inout) :: work(n, n, n)
312 integer :: i, j, k
313
314 do k = 1, n
315 do j = 1, n
316 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
317 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
318 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
319 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
320 end do
321 end do
322
323 do k = 1, n
324 do i = 1, n
325 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
326 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
327 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
328 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
329 end do
330 end do
331
332 do j = 1, n
333 do i = 1, n
334 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
335 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
336 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
337 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
338 end do
339 end do
340
341 end subroutine schwarz_setup_schwarz_wt3d_2
342
344 subroutine schwarz_toreg3d(b, a, n, nelv)
345 integer, intent(in) :: n, nelv
346 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
347 real(kind=rp), intent(inout) :: b(n, n, n, nelv)
348 integer :: i, j, k, ie
349 !$omp parallel do private(ie, i, j, k)
350 do ie = 1, nelv
351 do k = 1, n
352 do j = 1, n
353 do i = 1, n
354 b(i, j, k, ie) = a(i, j, k, ie)
355 end do
356 end do
357 end do
358 end do
359 !$omp end parallel do
360 end subroutine schwarz_toreg3d
361
363 subroutine schwarz_toext3d(a, b, n, nelv)
364 integer, intent(in) :: n, nelv
365 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv), &
366 b(n, n, n, nelv)
367 integer :: i, j, k, ie
368
369 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
370 !$omp parallel do private(ie, i, j, k)
371 do ie = 1, nelv
372 do k = 1, n
373 do j = 1, n
374 do i = 1, n
375 a(i, j, k, ie) = b(i, j, k, ie)
376 end do
377 end do
378 end do
379 end do
380 !$omp end parallel do
381 end subroutine schwarz_toext3d
382
388 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
389 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
390 real(kind=rp), intent(inout) :: arr1(nx, ny, nz, nelv)
391 real(kind=rp), intent(in) :: arr2(nx, ny, nz, nelv)
392 real(kind=rp), intent(in) :: f1, f2
393 integer :: i, j, k, ie, i0, i1
394 i0 = 2
395 i1 = nx - 1
396
397 if (nz .eq. 1) then
398 !$omp parallel do private(ie, i, j)
399 do ie = 1, nelv
400 do j = i0, i1
401 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
402 + f2 * arr2(l2 + 1, j, 1, ie)
403 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
404 + f2 * arr2(nx - l2, j, 1, ie)
405 end do
406 do i = i0, i1
407 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
408 + f2 * arr2(i, l2 + 1, 1, ie)
409 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
410 + f2 * arr2(i, nx - l2, 1, ie)
411 end do
412 end do
413 !$omp end parallel do
414 else
415 !$omp parallel do private(ie, i, j, k)
416 do ie = 1, nelv
417 do k = i0, i1
418 do j = i0, i1
419 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
420 + f2 * arr2(l2 + 1, j, k, ie)
421 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
422 + f2 * arr2(nx - l2, j, k, ie)
423 end do
424 end do
425 do k = i0, i1
426 do i = i0, i1
427 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
428 + f2 * arr2(i, l2 + 1, k, ie)
429 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
430 + f2 * arr2(i, nx - l2, k, ie)
431 end do
432 end do
433 do j = i0, i1
434 do i = i0, i1
435 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
436 + f2 * arr2(i, j, l2 + 1, ie)
437 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
438 + f2 * arr2(i, j, nx - l2, ie)
439 end do
440 end do
441 end do
442 !$omp end parallel do
443 end if
444 end subroutine schwarz_extrude
445
449 subroutine schwarz_extrude_single(arr, l1, f1, l2, f2, nx, ny, nz, nelv)
450 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
451 real(kind=rp), intent(inout) :: arr(nx, ny, nz, nelv)
452 real(kind=rp), intent(in) :: f1, f2
453 integer :: i, j, k, ie, i0, i1
454 i0 = 2
455 i1 = nx - 1
456
457 if (nz .eq. 1) then
458 !$omp parallel do private(ie, i, j)
459 do ie = 1, nelv
460 do j = i0, i1
461 arr(l1 + 1, j, 1, ie) = f1 * arr(l1 + 1, j, 1, ie) &
462 + f2 * arr(l2 + 1, j, 1, ie)
463 arr(nx - l1, j, 1, ie) = f1 * arr(nx - l1, j, 1, ie) &
464 + f2 * arr(nx - l2, j, 1, ie)
465 end do
466 do i = i0, i1
467 arr(i, l1 + 1, 1, ie) = f1 * arr(i, l1 + 1, 1, ie) &
468 + f2 * arr(i, l2 + 1, 1, ie)
469 arr(i, ny - l1, 1, ie) = f1 * arr(i, ny - l1, 1, ie) &
470 + f2 * arr(i, nx - l2, 1, ie)
471 end do
472 end do
473 !$omp end parallel do
474 else
475 !$omp parallel do private(ie, i, j, k)
476 do ie = 1, nelv
477 do k = i0, i1
478 do j = i0, i1
479 arr(l1 + 1, j, k, ie) = f1 * arr(l1 + 1, j, k, ie) &
480 + f2 * arr(l2 + 1, j, k, ie)
481 arr(nx - l1, j, k, ie) = f1 * arr(nx - l1, j, k, ie) &
482 + f2 * arr(nx - l2, j, k, ie)
483 end do
484 end do
485 do k = i0, i1
486 do i = i0, i1
487 arr(i, l1 + 1, k, ie) = f1 * arr(i, l1 + 1, k, ie) &
488 + f2 * arr(i, l2 + 1, k, ie)
489 arr(i, nx - l1, k, ie) = f1 * arr(i, nx - l1, k, ie) &
490 + f2 * arr(i, nx - l2, k, ie)
491 end do
492 end do
493 do j = i0, i1
494 do i = i0, i1
495 arr(i, j, l1 + 1, ie) = f1 * arr(i, j, l1 + 1, ie) &
496 + f2 * arr(i, j, l2 + 1, ie)
497 arr(i, j, nx - l1, ie) = f1 * arr(i, j, nx - l1, ie) &
498 + f2 * arr(i, j, nx - l2, ie)
499 end do
500 end do
501 end do
502 !$omp end parallel do
503 end if
504 end subroutine schwarz_extrude_single
505
506 subroutine schwarz_compute(this, e, r)
507 class(schwarz_t), intent(inout) :: this
508 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
509 integer :: n, enx, eny, enz, ns
510 real(kind=rp), parameter :: zero = 0.0_rp
511 real(kind=rp), parameter :: one = 1.0_rp
512 type(c_ptr) :: e_d, r_d
513 associate(work1 => this%work1, work1_d => this%work1_d, &
514 work2 => this%work2, work2_d => this%work2_d)
515
516 n = this%dof%size()
517 enx = this%Xh_schwarz%lx
518 eny = this%Xh_schwarz%ly
519 enz = this%Xh_schwarz%lz
520 if (.not. this%msh%gdim .eq. 3) enz = 1
521 ns = enx * eny * enz * this%msh%nelv
522 if (neko_bcknd_device .eq. 1) then
523 r_d = device_get_ptr(r)
524 e_d = device_get_ptr(e)
525 call device_event_record(this%event, glb_cmd_queue)
526 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
527 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
528 this%msh%nelv, aux_cmd_queue)
529 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
530 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
531
532 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
533 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
534 call device_event_sync(this%event)
535 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
536 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
537
538 call this%fdm%compute(work2, work1, aux_cmd_queue) ! do local solves
539
540 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
541 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
542 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
543 call device_event_sync(this%event)
544
545 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
546 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
547 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
548 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
549 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
550 this%msh%nelv, aux_cmd_queue)
551
552 this%gs_h%bcknd%gs_stream = aux_cmd_queue
553 call this%gs_h%op(e, n, gs_op_add, this%event)
554
555 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
556 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
557
558 ! switch back to the default stream on the shared gs
559 if (.not. this%local_gs) then
560 call device_event_sync(this%event)
561 this%gs_h%bcknd%gs_stream = glb_cmd_queue
562 end if
563 else
564 call this%bclst%apply_scalar(r, n)
565 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
566
567 ! exchange interior nodes
568 call schwarz_extrude_single(work1, 0, zero, 2, one, &
569 enx, eny, enz, this%msh%nelv)
570 call this%gs_schwarz%op(work1, ns, gs_op_add)
571 call schwarz_extrude_single(work1, 0, one, 2, -one, &
572 enx, eny, enz, this%msh%nelv)
573
574 call this%fdm%compute(work2, work1) ! do local solves
575
576 ! Sum overlap region (border excluded)
577 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
578 enx, eny, enz, this%msh%nelv)
579 call this%gs_schwarz%op(work2, ns, gs_op_add)
580 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
581 enx, eny, enz, this%msh%nelv)
582 call schwarz_extrude_single(work2, 2, one, 0, one, &
583 enx, eny, enz, this%msh%nelv)
584
585 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
586
587 ! sum border nodes
588 call this%gs_h%op(e, n, gs_op_add)
589 call this%bclst%apply_scalar(e, n)
590
591 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
592 end if
593 end associate
594 end subroutine schwarz_compute
595
596 !Apply schwarz weights along the boundary of each element.
597 subroutine schwarz_wt3d(e, wt, n, nelv)
598 integer, intent(in) :: n, nelv
599 real(kind=rp), intent(inout) :: e(n, n, n, nelv)
600 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
601 integer :: ie, i, j, k
602
603 !$omp parallel do private(ie, i, j, k)
604 do ie = 1, nelv
605 do k = 1, n
606 do j = 1, n
607 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
608 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
609 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
610 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
611 end do
612 end do
613 do k = 1, n
614 do i = 3, n-2
615 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
616 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
617 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
618 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
619 end do
620 end do
621 do j = 3, n-2
622 do i = 3, n-2
623 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
624 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
625 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
626 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)
627 end do
628 end do
629 end do
630 !$omp end parallel do
631 end subroutine schwarz_wt3d
632end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1571
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
integer, parameter, public host_to_device
Definition device.F90:48
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:240
integer, parameter, public device_to_host
Definition device.F90:48
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1550
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:209
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1471
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:52
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1516
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:55
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:275
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:345
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:218
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:113
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:389
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:309
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:507
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:598
subroutine schwarz_extrude_single(arr, l1, f1, l2, f2, nx, ny, nz, nelv)
Same as schwarz_extrude but for the case when arr1 and arr2 are the same array. Avoids aliasing by us...
Definition schwarz.f90:450
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:364
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:287
subroutine schwarz_free(this)
Definition schwarz.f90:174
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
The function space for the SEM solution fields.
Definition space.f90:63