Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, &
80 c_associated
81 !$ use omp_lib
82 implicit none
83 private
84
85 type, public :: schwarz_t
86 real(kind=rp), allocatable :: work1(:)
87 real(kind=rp), allocatable :: work2(:)
88 real(kind=rp), allocatable :: wt(:,:,:,:,:)
89 type(c_ptr) :: work1_d = c_null_ptr
90 type(c_ptr) :: work2_d = c_null_ptr
91 type(c_ptr) :: wt_d = c_null_ptr
92 type(space_t) :: xh_schwarz
93 type(gs_t) :: gs_schwarz
94 type(dofmap_t) :: dm_schwarz
95 type(fdm_t) :: fdm
96 type(space_t), pointer :: xh => null()
97 type(bc_list_t), pointer :: bclst => null()
98 type(dofmap_t), pointer :: dof => null()
99 type(gs_t), pointer :: gs_h => null()
100 type(mesh_t), pointer :: msh => null()
101 type(c_ptr) :: event = c_null_ptr
102 logical :: local_gs = .false.
103 type(gs_t), allocatable :: gs_h_local
104 contains
105 procedure, pass(this) :: init => schwarz_init
106 procedure, pass(this) :: free => schwarz_free
107 procedure, pass(this) :: compute => schwarz_compute
108 end type schwarz_t
109
110contains
111
112 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
113 class(schwarz_t), target, intent(inout) :: this
114 type(space_t), target, intent(inout) :: Xh
115 type(dofmap_t), target, intent(in) :: dof
116 type(gs_t), target, intent(inout) :: gs_h
117 type(mesh_t), target, intent(inout) :: msh
118 type(bc_list_t), target, intent(inout) :: bclst
119 integer :: nthrds
120
121 call this%free()
122
123 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
126
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
130
131 call this%fdm%init(xh, dof, gs_h)
132
133
134 this%msh => msh
135 this%Xh => xh
136 this%bclst => bclst
137 this%dof => dof
138
139 nthrds = 1
140 !$omp parallel
141 !$ nthrds = omp_get_num_threads()
142 !$omp end parallel
143
144 ! If we are running multithreaded, we need a local gs object,
145 ! otherwise we can reuse the external one
146 if (nthrds .gt. 1) then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
151 else
152 this%gs_h => gs_h
153 this%local_gs = .false.
154 end if
155
156 if (neko_bcknd_device .eq. 1) then
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
159 end if
160
161 call schwarz_setup_wt(this)
162 if (neko_bcknd_device .eq. 1) then
163 call device_alloc(this%wt_d, &
164 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
165 call rone(this%work1, this%dof%size())
166 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
167 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
168 host_to_device, sync = .false.)
169 call device_event_create(this%event, 2)
170 end if
171 end subroutine schwarz_init
172
173 subroutine schwarz_free(this)
174 class(schwarz_t), intent(inout) :: this
175
176 if (allocated(this%work1)) deallocate(this%work1)
177 if (allocated(this%work2)) deallocate(this%work2)
178 if (allocated(this%wt)) deallocate(this%wt)
179
180 if (c_associated(this%work1_d)) then
181 call device_free(this%work1_d)
182 end if
183
184 if (c_associated(this%work2_d)) then
185 call device_free(this%work2_d)
186 end if
187
188 if (c_associated(this%wt_d)) then
189 call device_free(this%wt_d)
190 end if
191
192 call this%Xh_schwarz%free()
193 call this%gs_schwarz%free()
194 !why cant I do this?
195 !call dofmap_free(this%dm_schwarz)
196 call this%dm_schwarz%free()
197 call this%fdm%free()
198
199 nullify(this%Xh)
200 nullify(this%bclst)
201 nullify(this%dof)
202 if (allocated(this%gs_h_local)) then
203 call this%gs_h_local%free()
204 deallocate(this%gs_h_local)
205 end if
206 nullify(this%gs_h)
207 nullify(this%msh)
208
209 this%local_gs = .false.
210 if (c_associated(this%event)) then
211 call device_event_destroy(this%event)
212 end if
213 end subroutine schwarz_free
215 subroutine schwarz_setup_wt(this)
216 class(schwarz_t), intent(inout) :: this
217 integer :: enx, eny, enz, n, ie, k, ns
218 real(kind=rp), parameter :: zero = 0.0_rp
219 real(kind=rp), parameter :: one = 1.0_rp
220 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
221 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
222
223 n = this%dof%size()
224
225 enx = xh_schwarz%lx
226 eny = xh_schwarz%ly
227 enz = xh_schwarz%lz
228 if (.not. msh%gdim .eq. 3) enz = 1
229 ns = enx * eny * enz * msh%nelv
230
231 call rone(work2, ns)
232 call rzero(work1, ns)
233
234 ! Sum overlap region (border excluded)
235 ! Cred to PFF for this, very clever
236 call schwarz_extrude(work1, 0, zero, work2, 0, one, enx, eny, enz, &
237 msh%nelv)
238 if (neko_bcknd_device .eq. 1) then
239 call device_memcpy(work2, this%work2_d, ns, &
240 host_to_device, sync = .false.)
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
242 call device_memcpy(work2, this%work2_d, ns, &
243 device_to_host, sync = .true.)
244 else
245 call this%gs_schwarz%op(work2, ns, gs_op_add)
246 end if
247 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, &
248 msh%nelv)
249 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, &
250 msh%nelv)
251
252 ! if(.not.if3d) then ! Go back to regular size array
253 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
254 ! else
255 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
256 ! endif
257
258 if (neko_bcknd_device .eq. 1) then
259 call device_memcpy(work1, this%work1_d, n, &
260 host_to_device, sync = .false.)
261 call this%gs_h%op(work1, n, gs_op_add)
262 call device_memcpy(work1, this%work1_d, n, &
263 device_to_host, sync = .true.)
264 else
265 call this%gs_h%op(work1, n, gs_op_add)
266 end if
267
268 k = 1
269 do ie = 1, msh%nelv
270 if (msh%gdim .eq. 2) then
271 call schwarz_setup_schwarz_wt2d_2(this%wt, ie, xh%lx, &
272 work1(k), msh%nelv)
273 end if
274 if (this%msh%gdim .eq. 3) then
275 call schwarz_setup_schwarz_wt3d_2(this%wt, ie, xh%lx, &
276 work1(k), msh%nelv)
277 k = k + xh%lxyz
278 end if
279 end do
280 end associate
281 end subroutine schwarz_setup_wt
282
284 subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
285 integer, intent(in) :: n, nelv
286 real(kind=rp), intent(inout) :: wt(n, 4, 2, nelv)
287 real(kind=rp), intent(inout) :: work(n, n)
288 integer :: ie, i, j
289 do j = 1, n
290 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
291 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
292 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
293 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
294 end do
295 do i = 1, n
296 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
297 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
298 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
299 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
300 end do
301
302 return
303 end subroutine schwarz_setup_schwarz_wt2d_2
304
306 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
307 integer, intent(in) :: n, nelv, ie
308 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
309 real(kind=rp), intent(inout) :: work(n, n, n)
310 integer :: i, j, k
311
312 do k = 1, n
313 do j = 1, n
314 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
315 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
316 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
317 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
318 end do
319 end do
320
321 do k = 1, n
322 do i = 1, n
323 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
324 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
325 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
326 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
327 end do
328 end do
329
330 do j = 1, n
331 do i = 1, n
332 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
333 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
334 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
335 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
336 end do
337 end do
338
339 end subroutine schwarz_setup_schwarz_wt3d_2
340
342 subroutine schwarz_toreg3d(b, a, n, nelv)
343 integer, intent(in) :: n, nelv
344 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
345 real(kind=rp), intent(inout) :: b(n, n, n, nelv)
346 integer :: i, j, k, ie
347 !$omp parallel do private(ie, i, j, k)
348 do ie = 1, nelv
349 do k = 1, n
350 do j = 1, n
351 do i = 1, n
352 b(i, j, k, ie) = a(i, j, k, ie)
353 end do
354 end do
355 end do
356 end do
357 !$omp end parallel do
358 end subroutine schwarz_toreg3d
359
361 subroutine schwarz_toext3d(a, b, n, nelv)
362 integer, intent(in) :: n, nelv
363 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv), &
364 b(n, n, n, nelv)
365 integer :: i, j, k, ie
366
367 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
368 !$omp parallel do private(ie, i, j, k)
369 do ie = 1, nelv
370 do k = 1, n
371 do j = 1, n
372 do i = 1, n
373 a(i, j, k, ie) = b(i, j, k, ie)
374 end do
375 end do
376 end do
377 end do
378 !$omp end parallel do
379 end subroutine schwarz_toext3d
380
384 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
385 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
386 real(kind=rp), intent(inout) :: arr1(nx, ny, nz, nelv), &
387 arr2(nx, ny, nz, nelv)
388 real(kind=rp), intent(in) :: f1, f2
389 integer :: i, j, k, ie, i0, i1
390 i0 = 2
391 i1 = nx - 1
392
393 if (nz .eq. 1) then
394 !$omp parallel do private(ie, i, j)
395 do ie = 1, nelv
396 do j = i0, i1
397 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
398 + f2 * arr2(l2 + 1, j, 1, ie)
399 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
400 + f2 * arr2(nx - l2, j, 1, ie)
401 end do
402 do i = i0, i1
403 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
404 + f2 * arr2(i, l2 + 1, 1, ie)
405 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
406 + f2 * arr2(i, nx - l2, 1, ie)
407 end do
408 end do
409 !$omp end parallel do
410 else
411 !$omp parallel do private(ie, i, j, k)
412 do ie = 1, nelv
413 do k = i0, i1
414 do j = i0, i1
415 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
416 + f2 * arr2(l2 + 1, j, k, ie)
417 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
418 + f2 * arr2(nx - l2, j, k, ie)
419 end do
420 end do
421 do k = i0, i1
422 do i = i0, i1
423 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
424 + f2 * arr2(i, l2 + 1, k, ie)
425 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
426 + f2 * arr2(i, nx - l2, k, ie)
427 end do
428 end do
429 do j = i0, i1
430 do i = i0, i1
431 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
432 + f2 * arr2(i, j, l2 + 1, ie)
433 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
434 + f2 * arr2(i, j, nx - l2, ie)
435 end do
436 end do
437 end do
438 !$omp end parallel do
439 end if
440 end subroutine schwarz_extrude
441
442 subroutine schwarz_compute(this, e, r)
443 class(schwarz_t), intent(inout) :: this
444 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
445 integer :: n, enx, eny, enz, ns
446 real(kind=rp), parameter :: zero = 0.0_rp
447 real(kind=rp), parameter :: one = 1.0_rp
448 type(c_ptr) :: e_d, r_d
449 associate(work1 => this%work1, work1_d => this%work1_d, &
450 work2 => this%work2, work2_d => this%work2_d)
451
452 n = this%dof%size()
453 enx = this%Xh_schwarz%lx
454 eny = this%Xh_schwarz%ly
455 enz = this%Xh_schwarz%lz
456 if (.not. this%msh%gdim .eq. 3) enz = 1
457 ns = enx * eny * enz * this%msh%nelv
458 if (neko_bcknd_device .eq. 1) then
459 r_d = device_get_ptr(r)
460 e_d = device_get_ptr(e)
461 call device_event_record(this%event, glb_cmd_queue)
462 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
463 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
464 this%msh%nelv, aux_cmd_queue)
465 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
466 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
467
468 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
469 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
470 call device_event_sync(this%event)
471 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
472 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
473
474 call this%fdm%compute(work2, work1, aux_cmd_queue) ! do local solves
475
476 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
477 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
478 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
479 call device_event_sync(this%event)
480
481 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
482 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
483 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
484 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
485 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
486 this%msh%nelv, aux_cmd_queue)
487
488 this%gs_h%bcknd%gs_stream = aux_cmd_queue
489 call this%gs_h%op(e, n, gs_op_add, this%event)
490
491 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
492 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
493
494 ! switch back to the default stream on the shared gs
495 if (.not. this%local_gs) then
496 call device_event_sync(this%event)
497 this%gs_h%bcknd%gs_stream = glb_cmd_queue
498 end if
499 else
500 call this%bclst%apply_scalar(r, n)
501 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
502
503 ! exchange interior nodes
504 call schwarz_extrude(work1, 0, zero, work1, 2, one, &
505 enx, eny, enz, this%msh%nelv)
506 call this%gs_schwarz%op(work1, ns, gs_op_add)
507 call schwarz_extrude(work1, 0, one, work1, 2, -one, &
508 enx, eny, enz, this%msh%nelv)
509
510 call this%fdm%compute(work2, work1) ! do local solves
511
512 ! Sum overlap region (border excluded)
513 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
514 enx, eny, enz, this%msh%nelv)
515 call this%gs_schwarz%op(work2, ns, gs_op_add)
516 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
517 enx, eny, enz, this%msh%nelv)
518 call schwarz_extrude(work2, 2, one, work2, 0, one, &
519 enx, eny, enz, this%msh%nelv)
520
521 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
522
523 ! sum border nodes
524 call this%gs_h%op(e, n, gs_op_add)
525 call this%bclst%apply_scalar(e, n)
526
527 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
528 end if
529 end associate
530 end subroutine schwarz_compute
531
532 !Apply schwarz weights along the boundary of each element.
533 subroutine schwarz_wt3d(e, wt, n, nelv)
534 integer, intent(in) :: n, nelv
535 real(kind=rp), intent(inout) :: e(n, n, n, nelv)
536 real(kind=rp), intent(inout) :: wt(n, n, 4, 3, nelv)
537 integer :: ie, i, j, k
538
539 !$omp parallel do private(ie, i, j, k)
540 do ie = 1, nelv
541 do k = 1, n
542 do j = 1, n
543 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
544 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
545 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
546 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
547 end do
548 end do
549 do k = 1, n
550 do i = 3, n-2
551 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
552 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
553 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
554 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
555 end do
556 end do
557 do j = 3, n-2
558 do i = 3, n-2
559 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
560 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
561 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
562 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)
563 end do
564 end do
565 end do
566 !$omp end parallel do
567 end subroutine schwarz_wt3d
568end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1449
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1468
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1433
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:198
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1362
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1403
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:343
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:216
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:113
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:385
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:307
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:443
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:534
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:362
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:285
subroutine schwarz_free(this)
Definition schwarz.f90:174
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:63