Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, &
80 c_associated
81 !$ use omp_lib
82 implicit none
83 private
84
85 type, public :: schwarz_t
86 real(kind=rp), allocatable :: work1(:)
87 real(kind=rp), allocatable :: work2(:)
88 real(kind=rp), allocatable :: wt(:,:,:,:,:)
89 type(c_ptr) :: work1_d = c_null_ptr
90 type(c_ptr) :: work2_d = c_null_ptr
91 type(c_ptr) :: wt_d = c_null_ptr
92 type(space_t) :: xh_schwarz
93 type(gs_t) :: gs_schwarz
94 type(dofmap_t) :: dm_schwarz
95 type(fdm_t) :: fdm
96 type(space_t), pointer :: xh => null()
97 type(bc_list_t), pointer :: bclst => null()
98 type(dofmap_t), pointer :: dof => null()
99 type(gs_t), pointer :: gs_h => null()
100 type(mesh_t), pointer :: msh => null()
101 type(c_ptr) :: event = c_null_ptr
102 logical :: local_gs = .false.
103 type(gs_t), allocatable :: gs_h_local
104 contains
105 procedure, pass(this) :: init => schwarz_init
106 procedure, pass(this) :: free => schwarz_free
107 procedure, pass(this) :: compute => schwarz_compute
108 end type schwarz_t
109
110contains
111
112 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
113 class(schwarz_t), target, intent(inout) :: this
114 type(space_t), target, intent(inout) :: Xh
115 type(dofmap_t), target, intent(in) :: dof
116 type(gs_t), target, intent(inout) :: gs_h
117 type(mesh_t), target, intent(inout) :: msh
118 type(bc_list_t), target, intent(inout) :: bclst
119 integer :: nthrds
120
121 call this%free()
122
123 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
126
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
130
131 call this%fdm%init(xh, dof, gs_h)
132
133
134 this%msh => msh
135 this%Xh => xh
136 this%bclst => bclst
137 this%dof => dof
138
139 nthrds = 1
140 !$omp parallel
141 !$ nthrds = omp_get_num_threads()
142 !$omp end parallel
143
144 ! If we are running multithreaded, we need a local gs object,
145 ! otherwise we can reuse the external one
146 if (nthrds .gt. 1) then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
151 else
152 this%gs_h => gs_h
153 this%local_gs = .false.
154 end if
155
156 if (neko_bcknd_device .eq. 1) then
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
159 end if
160
161 call schwarz_setup_wt(this)
162 if (neko_bcknd_device .eq. 1) then
163 call device_alloc(this%wt_d, &
164 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
165 call rone(this%work1, this%dof%size())
166 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
167 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
168 host_to_device, sync = .false.)
169 call device_event_create(this%event, 2)
170 end if
171 end subroutine schwarz_init
172
173 subroutine schwarz_free(this)
174 class(schwarz_t), intent(inout) :: this
175
176 if (allocated(this%work1)) deallocate(this%work1)
177 if (allocated(this%work2)) deallocate(this%work2)
178 if (allocated(this%wt)) deallocate(this%wt)
179
180 if (c_associated(this%work1_d)) then
181 call device_free(this%work1_d)
182 end if
183
184 if (c_associated(this%work2_d)) then
185 call device_free(this%work2_d)
186 end if
187
188 if (c_associated(this%wt_d)) then
189 call device_free(this%wt_d)
190 end if
191
192 call this%Xh_schwarz%free()
193 call this%gs_schwarz%free()
194 !why cant I do this?
195 !call dofmap_free(this%dm_schwarz)
196 call this%dm_schwarz%free()
197 call this%fdm%free()
198
199 nullify(this%Xh)
200 nullify(this%bclst)
201 nullify(this%dof)
202 if (allocated(this%gs_h_local)) then
203 call this%gs_h_local%free()
204 deallocate(this%gs_h_local)
205 end if
206 nullify(this%gs_h)
207 nullify(this%msh)
208
209 this%local_gs = .false.
210 if (c_associated(this%event)) then
211 call device_event_destroy(this%event)
212 end if
213 end subroutine schwarz_free
215 subroutine schwarz_setup_wt(this)
216 class(schwarz_t), intent(inout) :: this
217 integer :: enx, eny, enz, n, ie, k, ns
218 real(kind=rp), parameter :: zero = 0.0
219 real(kind=rp), parameter :: one = 1.0
220 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
221 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
222
223 n = this%dof%size()
224
225 enx = xh_schwarz%lx
226 eny = xh_schwarz%ly
227 enz = xh_schwarz%lz
228 if (.not. msh%gdim .eq. 3) enz = 1
229 ns = enx * eny * enz * msh%nelv
230
231 call rone(work2, ns)
232 call rzero(work1, ns)
233
234 ! Sum overlap region (border excluded)
235 ! Cred to PFF for this, very clever
236 call schwarz_extrude(work1, 0, zero, work2, 0, one, enx, eny, enz, &
237 msh%nelv)
238 if (neko_bcknd_device .eq. 1) then
239 call device_memcpy(work2, this%work2_d, ns, &
240 host_to_device, sync = .false.)
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
242 call device_memcpy(work2, this%work2_d, ns, &
243 device_to_host, sync = .true.)
244 else
245 call this%gs_schwarz%op(work2, ns, gs_op_add)
246 end if
247 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, &
248 msh%nelv)
249 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, &
250 msh%nelv)
251
252 ! if(.not.if3d) then ! Go back to regular size array
253 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
254 ! else
255 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
256 ! endif
257
258 if (neko_bcknd_device .eq. 1) then
259 call device_memcpy(work1, this%work1_d, n, &
260 host_to_device, sync = .false.)
261 call this%gs_h%op(work1, n, gs_op_add)
262 call device_memcpy(work1, this%work1_d, n, &
263 device_to_host, sync = .true.)
264 else
265 call this%gs_h%op(work1, n, gs_op_add)
266 end if
267
268 k = 1
269 do ie = 1, msh%nelv
270 if (msh%gdim .eq. 2) then
271 call schwarz_setup_schwarz_wt2d_2(this%wt, ie, xh%lx, &
272 work1(k), msh%nelv)
273 end if
274 if (this%msh%gdim .eq. 3) then
275 call schwarz_setup_schwarz_wt3d_2(this%wt, ie, xh%lx, &
276 work1(k), msh%nelv)
277 k = k + xh%lxyz
278 end if
279 end do
280 end associate
281 end subroutine schwarz_setup_wt
282
284 subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
285 integer, intent(in) :: n, nelv
286 real(kind=rp), intent(inout) :: wt(n, 4, 2, nelv)
287 real(kind=rp), intent(inout) :: work(n, n)
288 integer :: ie, i, j
289 do j = 1, n
290 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
291 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
292 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
293 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
294 end do
295 do i = 1, n
296 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
297 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
298 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
299 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
300 end do
301
302 return
303 end subroutine schwarz_setup_schwarz_wt2d_2
304
306 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
307 integer, intent(in) :: n, nelv, ie
308 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
309 real(kind=rp), intent(inout) :: work(n, n, n)
310 integer :: i, j, k
311
312 do k = 1, n
313 do j = 1, n
314 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
315 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
316 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
317 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
318 end do
319 end do
320
321 do k = 1, n
322 do i = 1, n
323 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
324 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
325 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
326 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
327 end do
328 end do
329
330 do j = 1, n
331 do i = 1, n
332 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
333 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
334 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
335 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
336 end do
337 end do
338
339 end subroutine schwarz_setup_schwarz_wt3d_2
340
342 subroutine schwarz_toreg3d(b, a, n, nelv)
343 integer, intent(in) :: n, nelv
344 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
345 real(kind=rp), intent(inout) :: b(n, n, n, nelv)
346 integer :: i, j, k, ie
347 do ie = 1, nelv
348 do k = 1, n
349 do j = 1, n
350 do i = 1, n
351 b(i, j, k, ie) = a(i, j, k, ie)
352 end do
353 end do
354 end do
355 end do
356 end subroutine schwarz_toreg3d
357
359 subroutine schwarz_toext3d(a, b, n, nelv)
360 integer, intent(in) :: n, nelv
361 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv), &
362 b(n, n, n, nelv)
363 integer :: i, j, k, ie
364
365 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
366 do ie = 1, nelv
367 do k = 1, n
368 do j = 1, n
369 do i = 1, n
370 a(i, j, k, ie) = b(i, j, k, ie)
371 end do
372 end do
373 end do
374 end do
375 end subroutine schwarz_toext3d
376
380 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
381 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
382 real(kind=rp), intent(inout) :: arr1(nx, ny, nz, nelv), &
383 arr2(nx, ny, nz, nelv)
384 real(kind=rp), intent(in) :: f1, f2
385 integer :: i, j, k, ie, i0, i1
386 i0 = 2
387 i1 = nx - 1
388
389 if (nz .eq. 1) then
390 do ie = 1, nelv
391 do j = i0, i1
392 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
393 + f2 * arr2(l2 + 1, j, 1, ie)
394 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
395 + f2 * arr2(nx - l2, j, 1, ie)
396 end do
397 do i = i0, i1
398 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
399 + f2 * arr2(i, l2 + 1, 1, ie)
400 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
401 + f2 * arr2(i, nx - l2, 1, ie)
402 end do
403 end do
404 else
405 do ie = 1, nelv
406 do k = i0, i1
407 do j = i0, i1
408 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
409 + f2 * arr2(l2 + 1, j, k, ie)
410 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
411 + f2 * arr2(nx - l2, j, k, ie)
412 end do
413 end do
414 do k = i0, i1
415 do i = i0, i1
416 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
417 + f2 * arr2(i, l2 + 1, k, ie)
418 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
419 + f2 * arr2(i, nx - l2, k, ie)
420 end do
421 end do
422 do j = i0, i1
423 do i = i0, i1
424 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
425 + f2 * arr2(i, j, l2 + 1, ie)
426 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
427 + f2 * arr2(i, j, nx - l2, ie)
428 end do
429 end do
430 end do
431 end if
432 end subroutine schwarz_extrude
433
434 subroutine schwarz_compute(this, e, r)
435 class(schwarz_t), intent(inout) :: this
436 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
437 integer :: n, enx, eny, enz, ns
438 real(kind=rp), parameter :: zero = 0.0_rp
439 real(kind=rp), parameter :: one = 1.0_rp
440 type(c_ptr) :: e_d, r_d
441 associate(work1 => this%work1, work1_d => this%work1_d, &
442 work2 => this%work2, work2_d => this%work2_d)
443
444 n = this%dof%size()
445 enx = this%Xh_schwarz%lx
446 eny = this%Xh_schwarz%ly
447 enz = this%Xh_schwarz%lz
448 if (.not. this%msh%gdim .eq. 3) enz = 1
449 ns = enx * eny * enz * this%msh%nelv
450 if (neko_bcknd_device .eq. 1) then
451 r_d = device_get_ptr(r)
452 e_d = device_get_ptr(e)
453 call device_event_record(this%event, glb_cmd_queue)
454 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
455 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
456 this%msh%nelv, aux_cmd_queue)
457 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
458 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
459
460 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
461 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
462 call device_event_sync(this%event)
463 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
464 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
465
466 call this%fdm%compute(work2, work1, aux_cmd_queue) ! do local solves
467
468 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
469 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
470 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
471 call device_event_sync(this%event)
472
473 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
474 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
475 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
476 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
477 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
478 this%msh%nelv, aux_cmd_queue)
479
480 this%gs_h%bcknd%gs_stream = aux_cmd_queue
481 call this%gs_h%op(e, n, gs_op_add, this%event)
482
483 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
484 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
485
486 ! switch back to the default stream on the shared gs
487 if (.not. this%local_gs) then
488 call device_event_sync(this%event)
489 this%gs_h%bcknd%gs_stream = glb_cmd_queue
490 end if
491 else
492 call this%bclst%apply_scalar(r, n)
493 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
494
495 ! exchange interior nodes
496 call schwarz_extrude(work1, 0, zero, work1, 2, one, &
497 enx, eny, enz, this%msh%nelv)
498 call this%gs_schwarz%op(work1, ns, gs_op_add)
499 call schwarz_extrude(work1, 0, one, work1, 2, -one, &
500 enx, eny, enz, this%msh%nelv)
501
502 call this%fdm%compute(work2, work1) ! do local solves
503
504 ! Sum overlap region (border excluded)
505 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
506 enx, eny, enz, this%msh%nelv)
507 call this%gs_schwarz%op(work2, ns, gs_op_add)
508 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
509 enx, eny, enz, this%msh%nelv)
510 call schwarz_extrude(work2, 2, one, work2, 0, one, &
511 enx, eny, enz, this%msh%nelv)
512
513 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
514
515 ! sum border nodes
516 call this%gs_h%op(e, n, gs_op_add)
517 call this%bclst%apply_scalar(e, n)
518
519 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
520 end if
521 end associate
522 end subroutine schwarz_compute
523
524 !Apply schwarz weights along the boundary of each element.
525 subroutine schwarz_wt3d(e, wt, n, nelv)
526 integer, intent(in) :: n, nelv
527 real(kind=rp), intent(inout) :: e(n, n, n, nelv)
528 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
529 integer :: ie, i, j, k
530
531 do ie = 1, nelv
532 do k = 1, n
533 do j = 1, n
534 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
535 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
536 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
537 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
538 end do
539 end do
540 do k = 1, n
541 do i = 3, n-2
542 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
543 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
544 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
545 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
546 end do
547 end do
548 do j = 3, n-2
549 do i = 3, n-2
550 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
551 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
552 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
553 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)
554 end do
555 end do
556 end do
557 end subroutine schwarz_wt3d
558end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1295
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1279
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1208
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1249
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:343
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:216
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:113
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:381
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:307
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:435
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:526
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:360
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:285
subroutine schwarz_free(this)
Definition schwarz.f90:174
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:63