113 class(
schwarz_t),
target,
intent(inout) :: this
114 type(
space_t),
target,
intent(inout) :: Xh
115 type(
dofmap_t),
target,
intent(in) :: dof
116 type(
gs_t),
target,
intent(inout) :: gs_h
117 type(
mesh_t),
target,
intent(inout) :: msh
118 type(
bc_list_t),
target,
intent(inout) :: bclst
123 call this%Xh_schwarz%init(
gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
131 call this%fdm%init(xh, dof, gs_h)
146 if (nthrds .gt. 1)
then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
153 this%local_gs = .false.
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
164 int(this%dof%size(),
i8) * int(c_sizeof(this%work1(1)),
i8))
165 call rone(this%work1, this%dof%size())
217 integer :: enx, eny, enz, n, ie, k, ns
218 real(kind=
rp),
parameter :: zero = 0.0_rp
219 real(kind=
rp),
parameter :: one = 1.0_rp
220 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
221 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
228 if (.not. msh%gdim .eq. 3) enz = 1
229 ns = enx * eny * enz * msh%nelv
232 call rzero(work1, ns)
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
245 call this%gs_schwarz%op(work2, ns, gs_op_add)
261 call this%gs_h%op(work1, n, gs_op_add)
265 call this%gs_h%op(work1, n, gs_op_add)
270 if (msh%gdim .eq. 2)
then
274 if (this%msh%gdim .eq. 3)
then
285 integer,
intent(in) :: n, nelv
286 real(kind=rp),
intent(inout) :: wt(n, 4, 2, nelv)
287 real(kind=rp),
intent(inout) :: work(n, n)
290 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
291 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
292 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
293 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
296 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
297 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
298 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
299 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
307 integer,
intent(in) :: n, nelv, ie
308 real(kind=rp),
intent(inout) :: wt(n, n, 4, 3, nelv)
309 real(kind=rp),
intent(inout) :: work(n, n, n)
314 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
315 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
316 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
317 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
323 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
324 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
325 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
326 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
332 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
333 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
334 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
335 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
387 integer,
intent(in) :: l1, l2, nx, ny, nz, nelv
388 real(kind=rp),
intent(inout) :: arr1(nx, ny, nz, nelv)
389 real(kind=rp),
intent(in) :: arr2(nx, ny, nz, nelv)
390 real(kind=rp),
intent(in) :: f1, f2
391 integer :: i, j, k, ie, i0, i1
399 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
400 + f2 * arr2(l2 + 1, j, 1, ie)
401 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
402 + f2 * arr2(nx - l2, j, 1, ie)
405 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
406 + f2 * arr2(i, l2 + 1, 1, ie)
407 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
408 + f2 * arr2(i, nx - l2, 1, ie)
417 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
418 + f2 * arr2(l2 + 1, j, k, ie)
419 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
420 + f2 * arr2(nx - l2, j, k, ie)
425 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
426 + f2 * arr2(i, l2 + 1, k, ie)
427 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
428 + f2 * arr2(i, nx - l2, k, ie)
433 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
434 + f2 * arr2(i, j, l2 + 1, ie)
435 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
436 + f2 * arr2(i, j, nx - l2, ie)
448 integer,
intent(in) :: l1, l2, nx, ny, nz, nelv
449 real(kind=rp),
intent(inout) :: arr(nx, ny, nz, nelv)
450 real(kind=rp),
intent(in) :: f1, f2
451 integer :: i, j, k, ie, i0, i1
459 arr(l1 + 1, j, 1, ie) = f1 * arr(l1 + 1, j, 1, ie) &
460 + f2 * arr(l2 + 1, j, 1, ie)
461 arr(nx - l1, j, 1, ie) = f1 * arr(nx - l1, j, 1, ie) &
462 + f2 * arr(nx - l2, j, 1, ie)
465 arr(i, l1 + 1, 1, ie) = f1 * arr(i, l1 + 1, 1, ie) &
466 + f2 * arr(i, l2 + 1, 1, ie)
467 arr(i, ny - l1, 1, ie) = f1 * arr(i, ny - l1, 1, ie) &
468 + f2 * arr(i, nx - l2, 1, ie)
477 arr(l1 + 1, j, k, ie) = f1 * arr(l1 + 1, j, k, ie) &
478 + f2 * arr(l2 + 1, j, k, ie)
479 arr(nx - l1, j, k, ie) = f1 * arr(nx - l1, j, k, ie) &
480 + f2 * arr(nx - l2, j, k, ie)
485 arr(i, l1 + 1, k, ie) = f1 * arr(i, l1 + 1, k, ie) &
486 + f2 * arr(i, l2 + 1, k, ie)
487 arr(i, nx - l1, k, ie) = f1 * arr(i, nx - l1, k, ie) &
488 + f2 * arr(i, nx - l2, k, ie)
493 arr(i, j, l1 + 1, ie) = f1 * arr(i, j, l1 + 1, ie) &
494 + f2 * arr(i, j, l2 + 1, ie)
495 arr(i, j, nx - l1, ie) = f1 * arr(i, j, nx - l1, ie) &
496 + f2 * arr(i, j, nx - l2, ie)
506 real(kind=rp),
dimension(this%dof%size()),
intent(inout) :: e, r
507 integer :: n, enx, eny, enz, ns
508 real(kind=rp),
parameter :: zero = 0.0_rp
509 real(kind=rp),
parameter :: one = 1.0_rp
510 type(c_ptr) :: e_d, r_d
511 associate(work1 => this%work1, work1_d => this%work1_d, &
512 work2 => this%work2, work2_d => this%work2_d)
515 enx = this%Xh_schwarz%lx
516 eny = this%Xh_schwarz%ly
517 enz = this%Xh_schwarz%lz
518 if (.not. this%msh%gdim .eq. 3) enz = 1
519 ns = enx * eny * enz * this%msh%nelv
520 if (neko_bcknd_device .eq. 1)
then
521 r_d = device_get_ptr(r)
522 e_d = device_get_ptr(e)
523 call device_event_record(this%event, glb_cmd_queue)
524 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
525 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
526 this%msh%nelv, aux_cmd_queue)
527 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
528 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
530 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
531 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
532 call device_event_sync(this%event)
533 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
534 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
536 call this%fdm%compute(work2, work1, aux_cmd_queue)
538 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
539 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
540 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
541 call device_event_sync(this%event)
543 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
544 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
545 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
546 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
547 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
548 this%msh%nelv, aux_cmd_queue)
550 this%gs_h%bcknd%gs_stream = aux_cmd_queue
551 call this%gs_h%op(e, n, gs_op_add, this%event)
553 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
554 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
557 if (.not. this%local_gs)
then
558 call device_event_sync(this%event)
559 this%gs_h%bcknd%gs_stream = glb_cmd_queue
562 call this%bclst%apply_scalar(r, n)
567 enx, eny, enz, this%msh%nelv)
568 call this%gs_schwarz%op(work1, ns, gs_op_add)
570 enx, eny, enz, this%msh%nelv)
572 call this%fdm%compute(work2, work1)
576 enx, eny, enz, this%msh%nelv)
577 call this%gs_schwarz%op(work2, ns, gs_op_add)
579 enx, eny, enz, this%msh%nelv)
581 enx, eny, enz, this%msh%nelv)
586 call this%gs_h%op(e, n, gs_op_add)
587 call this%bclst%apply_scalar(e, n)
589 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
596 integer,
intent(in) :: n, nelv
597 real(kind=rp),
intent(inout) :: e(n, n, n, nelv)
598 real(kind=rp),
intent(inout) :: wt(n, n, 4, 3, nelv)
599 integer :: ie, i, j, k
605 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
606 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
607 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
608 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
613 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
614 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
615 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
616 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
621 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
622 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
623 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
624 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)