113 class(
schwarz_t),
target,
intent(inout) :: this
114 type(
space_t),
target,
intent(inout) :: Xh
115 type(
dofmap_t),
target,
intent(in) :: dof
116 type(
gs_t),
target,
intent(inout) :: gs_h
117 type(
mesh_t),
target,
intent(inout) :: msh
118 type(
bc_list_t),
target,
intent(inout) :: bclst
123 call this%Xh_schwarz%init(
gll, xh%lx+2, xh%lx+2, xh%lx+2)
124 call this%dm_schwarz%init(msh, this%Xh_schwarz)
125 call this%gs_schwarz%init(this%dm_schwarz)
127 allocate(this%work1(this%dm_schwarz%size()))
128 allocate(this%work2(this%dm_schwarz%size()))
129 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
131 call this%fdm%init(xh, dof, gs_h)
146 if (nthrds .gt. 1)
then
147 allocate(this%gs_h_local)
148 call this%gs_h_local%init(this%dof)
149 this%gs_h => this%gs_h_local
150 this%local_gs = .true.
153 this%local_gs = .false.
157 call device_map(this%work1, this%work1_d, this%dm_schwarz%size())
158 call device_map(this%work2, this%work2_d, this%dm_schwarz%size())
164 int(this%dof%size(),
i8) * int(c_sizeof(this%work1(1)),
i8))
165 call rone(this%work1, this%dof%size())
217 integer :: enx, eny, enz, n, ie, k, ns
218 real(kind=
rp),
parameter :: zero = 0.0
219 real(kind=
rp),
parameter :: one = 1.0
220 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
221 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
228 if (.not. msh%gdim .eq. 3) enz = 1
229 ns = enx * eny * enz * msh%nelv
232 call rzero(work1, ns)
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
245 call this%gs_schwarz%op(work2, ns, gs_op_add)
261 call this%gs_h%op(work1, n, gs_op_add)
265 call this%gs_h%op(work1, n, gs_op_add)
270 if (msh%gdim .eq. 2)
then
274 if (this%msh%gdim .eq. 3)
then
285 integer,
intent(in) :: n, nelv
286 real(kind=rp),
intent(inout) :: wt(n, 4, 2, nelv)
287 real(kind=rp),
intent(inout) :: work(n, n)
290 wt(j, 1, 1, ie) = 1.0_rp / work(1, j)
291 wt(j, 2, 1, ie) = 1.0_rp / work(2, j)
292 wt(j, 3, 1, ie) = 1.0_rp / work(n - 1, j)
293 wt(j, 4, 1, ie) = 1.0_rp / work(n, j)
296 wt(i, 1, 2, ie) = 1.0_rp / work(i, 1)
297 wt(i, 2, 2, ie) = 1.0_rp / work(i, 2)
298 wt(i, 3, 2, ie) = 1.0_rp / work(i, n - 1)
299 wt(i, 4, 2, ie) = 1.0_rp / work(i, n)
307 integer,
intent(in) :: n, nelv, ie
308 real(kind=rp),
intent(inout) :: wt(n, n, 4, 3, nelv)
309 real(kind=rp),
intent(inout) :: work(n, n, n)
314 wt(j, k, 1, 1, ie) = 1.0_rp / work(1, j, k)
315 wt(j, k, 2, 1, ie) = 1.0_rp / work(2, j, k)
316 wt(j, k, 3, 1, ie) = 1.0_rp / work(n - 1, j, k)
317 wt(j, k, 4, 1, ie) = 1.0_rp / work(n, j, k)
323 wt(i, k, 1, 2, ie) = 1.0_rp / work(i, 1, k)
324 wt(i, k, 2, 2, ie) = 1.0_rp / work(i, 2, k)
325 wt(i, k, 3, 2, ie) = 1.0_rp / work(i, n - 1, k)
326 wt(i, k, 4, 2, ie) = 1.0_rp / work(i, n, k)
332 wt(i, j, 1, 3, ie) = 1.0_rp / work(i, j, 1)
333 wt(i, j, 2, 3, ie) = 1.0_rp / work(i, j, 2)
334 wt(i, j, 3, 3, ie) = 1.0_rp / work(i, j, n - 1)
335 wt(i, j, 4, 3, ie) = 1.0_rp / work(i, j, n)
381 integer,
intent(in) :: l1, l2, nx, ny, nz, nelv
382 real(kind=rp),
intent(inout) :: arr1(nx, ny, nz, nelv), &
383 arr2(nx, ny, nz, nelv)
384 real(kind=rp),
intent(in) :: f1, f2
385 integer :: i, j, k, ie, i0, i1
392 arr1(l1 + 1, j, 1, ie) = f1 * arr1(l1 + 1, j, 1, ie) &
393 + f2 * arr2(l2 + 1, j, 1, ie)
394 arr1(nx - l1, j, 1, ie) = f1 * arr1(nx - l1, j, 1, ie) &
395 + f2 * arr2(nx - l2, j, 1, ie)
398 arr1(i, l1 + 1, 1, ie) = f1 * arr1(i, l1 + 1, 1, ie) &
399 + f2 * arr2(i, l2 + 1, 1, ie)
400 arr1(i, ny - l1, 1, ie) = f1 * arr1(i, ny - l1, 1, ie) &
401 + f2 * arr2(i, nx - l2, 1, ie)
408 arr1(l1 + 1, j, k, ie) = f1 * arr1(l1 + 1, j, k, ie) &
409 + f2 * arr2(l2 + 1, j, k, ie)
410 arr1(nx - l1, j, k, ie) = f1 * arr1(nx - l1, j, k, ie) &
411 + f2 * arr2(nx - l2, j, k, ie)
416 arr1(i, l1 + 1, k, ie) = f1 * arr1(i, l1 + 1, k, ie) &
417 + f2 * arr2(i, l2 + 1, k, ie)
418 arr1(i, nx - l1, k, ie) = f1 * arr1(i, nx - l1, k, ie) &
419 + f2 * arr2(i, nx - l2, k, ie)
424 arr1(i, j, l1 + 1, ie) = f1 * arr1(i, j, l1 + 1, ie) &
425 + f2 * arr2(i, j, l2 + 1, ie)
426 arr1(i, j, nx - l1, ie) = f1 * arr1(i, j, nx - l1, ie) &
427 + f2 * arr2(i, j, nx - l2, ie)
436 real(kind=rp),
dimension(this%dof%size()),
intent(inout) :: e, r
437 integer :: n, enx, eny, enz, ns
438 real(kind=rp),
parameter :: zero = 0.0_rp
439 real(kind=rp),
parameter :: one = 1.0_rp
440 type(c_ptr) :: e_d, r_d
441 associate(work1 => this%work1, work1_d => this%work1_d, &
442 work2 => this%work2, work2_d => this%work2_d)
445 enx = this%Xh_schwarz%lx
446 eny = this%Xh_schwarz%ly
447 enz = this%Xh_schwarz%lz
448 if (.not. this%msh%gdim .eq. 3) enz = 1
449 ns = enx * eny * enz * this%msh%nelv
450 if (neko_bcknd_device .eq. 1)
then
451 r_d = device_get_ptr(r)
452 e_d = device_get_ptr(e)
453 call device_event_record(this%event, glb_cmd_queue)
454 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
455 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
456 this%msh%nelv, aux_cmd_queue)
457 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
458 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
460 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
461 call this%gs_schwarz%op(work1, ns, gs_op_add, this%event)
462 call device_event_sync(this%event)
463 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
464 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
466 call this%fdm%compute(work2, work1, aux_cmd_queue)
468 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
469 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
470 call this%gs_schwarz%op(work2, ns, gs_op_add, this%event)
471 call device_event_sync(this%event)
473 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
474 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
475 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
476 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
477 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
478 this%msh%nelv, aux_cmd_queue)
480 this%gs_h%bcknd%gs_stream = aux_cmd_queue
481 call this%gs_h%op(e, n, gs_op_add, this%event)
483 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
484 call device_col2(e_d, this%wt_d, n, aux_cmd_queue)
487 if (.not. this%local_gs)
then
488 call device_event_sync(this%event)
489 this%gs_h%bcknd%gs_stream = glb_cmd_queue
492 call this%bclst%apply_scalar(r, n)
497 enx, eny, enz, this%msh%nelv)
498 call this%gs_schwarz%op(work1, ns, gs_op_add)
500 enx, eny, enz, this%msh%nelv)
502 call this%fdm%compute(work2, work1)
506 enx, eny, enz, this%msh%nelv)
507 call this%gs_schwarz%op(work2, ns, gs_op_add)
509 enx, eny, enz, this%msh%nelv)
511 enx, eny, enz, this%msh%nelv)
516 call this%gs_h%op(e, n, gs_op_add)
517 call this%bclst%apply_scalar(e, n)
519 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
526 integer,
intent(in) :: n, nelv
527 real(kind=rp),
intent(inout) :: e(n, n, n, nelv)
528 real(kind=rp),
intent(inout) :: wt(n, n, 4, 3, nelv)
529 integer :: ie, i, j, k
534 e(1, j, k, ie) = e(1, j, k, ie) * wt(j, k, 1, 1, ie)
535 e(2, j, k, ie) = e(2, j, k, ie) * wt(j, k, 2, 1, ie)
536 e(n - 1, j, k, ie) = e(n - 1, j, k, ie) * wt(j, k, 3, 1, ie)
537 e(n, j, k, ie) = e(n, j, k, ie) * wt(j, k, 4, 1, ie)
542 e(i, 1, k, ie) = e(i, 1, k, ie) * wt(i, k, 1, 2, ie)
543 e(i, 2, k, ie) = e(i, 2, k, ie) * wt(i, k, 2, 2, ie)
544 e(i, n - 1, k, ie) = e(i, n - 1, k, ie) * wt(i, k, 3, 2, ie)
545 e(i, n, k, ie) = e(i, n, k, ie) * wt(i, k, 4, 2, ie)
550 e(i, j, 1, ie) = e(i, j, 1, ie) * wt(i, j, 1, 3, ie)
551 e(i, j, 2, ie) = e(i, j, 2, ie) * wt(i, j, 2, 3, ie)
552 e(i, j, n - 1, ie) = e(i, j, n - 1, ie) * wt(i, j, 3, 3, ie)
553 e(i, j, n, ie) = e(i, j, n, ie) * wt(i, j, 4, 3, ie)