74 use,
intrinsic :: iso_c_binding
79 real(kind=
rp),
allocatable :: work1(:)
80 real(kind=
rp),
allocatable :: work2(:)
81 real(kind=
rp),
allocatable :: wt(:,:,:,:,:)
82 type(c_ptr) :: work1_d = c_null_ptr
83 type(c_ptr) :: work2_d = c_null_ptr
84 type(c_ptr) :: wt_d = c_null_ptr
92 type(
gs_t),
pointer :: gs_h
104 class(
schwarz_t),
target,
intent(inout) :: this
105 type(
space_t),
target,
intent(inout) :: Xh
106 type(
dofmap_t),
target,
intent(inout) :: dm
107 type(
gs_t),
target,
intent(inout) :: gs_h
108 type(
mesh_t),
target,
intent(inout) :: msh
109 type(
bc_list_t),
target,
intent(inout):: bclst
113 call this%Xh_schwarz%init(
gll, xh%lx+2, xh%lx+2, xh%lx+2)
114 call this%dm_schwarz%init(msh, this%Xh_schwarz)
115 call this%gs_schwarz%init(this%dm_schwarz)
117 allocate(this%work1(this%dm_schwarz%size()))
118 allocate(this%work2(this%dm_schwarz%size()))
119 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
121 call this%fdm%init(xh, dm, gs_h)
130 call device_map(this%work1, this%work1_d,this%dm_schwarz%size())
131 call device_map(this%work2, this%work2_d,this%dm_schwarz%size())
138 call rone(this%work1, this%dm%size())
149 if(
allocated(this%work1))
deallocate(this%work1)
150 if(
allocated(this%work2))
deallocate(this%work2)
151 if(
allocated(this%wt))
deallocate(this%wt)
153 call this%Xh_schwarz%free()
154 call this%gs_schwarz%free()
168 integer :: enx,eny,enz, n, ie, k, ns
169 real(kind=
rp),
parameter :: zero = 0.0
170 real(kind=
rp),
parameter :: one = 1.0
171 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
172 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
179 if(.not. msh%gdim .eq. 3) enz=1
180 ns = enx*eny*enz*msh%nelv
183 call rzero(work1, ns)
187 call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv)
191 call this%gs_schwarz%op(work2, ns, gs_op_add)
195 call this%gs_schwarz%op(work2, ns, gs_op_add)
197 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv)
198 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv)
209 call this%gs_h%op(work1, n, gs_op_add)
213 call this%gs_h%op(work1, n, gs_op_add)
218 if (msh%gdim .eq. 2)
then
221 if (this%msh%gdim.eq. 3)
then
231 integer,
intent(in) :: n, nelv
232 real(kind=rp),
intent(inout) :: wt(n,4,2,nelv)
233 real(kind=rp),
intent(inout) :: work(n,n)
236 wt(j,1,1,ie) = 1.0_rp / work(1,j)
237 wt(j,2,1,ie) = 1.0_rp / work(2,j)
238 wt(j,3,1,ie) = 1.0_rp / work(n-1,j)
239 wt(j,4,1,ie) = 1.0_rp / work(n,j)
242 wt(i,1,2,ie) = 1.0_rp / work(i,1)
243 wt(i,2,2,ie) = 1.0_rp / work(i,2)
244 wt(i,3,2,ie) = 1.0_rp / work(i,n-1)
245 wt(i,4,2,ie) = 1.0_rp / work(i,n)
253 integer,
intent(in) ::n, nelv, ie
254 real(kind=rp),
intent(inout) :: wt(n,n,4,3,nelv)
255 real(kind=rp),
intent(inout) :: work(n,n,n)
260 wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k)
261 wt(j,k,2,1,ie) = 1.0_rp / work(2,j,k)
262 wt(j,k,3,1,ie) = 1.0_rp / work(n-1,j,k)
263 wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k)
269 wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k)
270 wt(i,k,2,2,ie) = 1.0_rp / work(i,2,k)
271 wt(i,k,3,2,ie) = 1.0_rp / work(i,n-1,k)
272 wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k)
278 wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1)
279 wt(i,j,2,3,ie) = 1.0_rp / work(i,j,2)
280 wt(i,j,3,3,ie) = 1.0_rp / work(i,j,n-1)
281 wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n)
289 integer,
intent(in) :: n, nelv
290 real(kind=rp),
intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
291 real(kind=rp),
intent(inout) :: b(n,n,n,nelv)
292 integer :: i, j, k, ie
297 b(i,j,k,ie) = a(i,j,k,ie)
306 integer,
intent(in) :: n, nelv
307 real (kind=rp),
intent(inout) :: a(0:n+1,0:n+1,0:n+1,nelv), b(n,n,n,nelv)
310 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
315 a(i,j,k,ie) = b(i,j,k,ie)
326 integer,
intent(in) :: l1, l2, nx, ny, nz, nelv
327 real(kind=rp),
intent(inout) :: arr1(nx,ny,nz,nelv), arr2(nx,ny,nz,nelv)
328 real(kind=rp),
intent(in) :: f1, f2
329 integer :: i, j, k, ie, i0, i1
336 arr1(l1+1 ,j,1,ie) = f1*arr1(l1+1 ,j,1,ie) &
337 +f2*arr2(l2+1 ,j,1,ie)
338 arr1(nx-l1,j,1,ie) = f1*arr1(nx-l1,j,1,ie) &
339 +f2*arr2(nx-l2,j,1,ie)
342 arr1(i,l1+1 ,1,ie) = f1*arr1(i,l1+1 ,1,ie) &
343 +f2*arr2(i,l2+1 ,1,ie)
344 arr1(i,ny-l1,1,ie) = f1*arr1(i,ny-l1,1,ie) &
345 +f2*arr2(i,nx-l2,1,ie)
352 arr1(l1+1 ,j,k,ie) = f1*arr1(l1+1 ,j,k,ie) &
353 +f2*arr2(l2+1 ,j,k,ie)
354 arr1(nx-l1,j,k,ie) = f1*arr1(nx-l1,j,k,ie) &
355 +f2*arr2(nx-l2,j,k,ie)
360 arr1(i,l1+1 ,k,ie) = f1*arr1(i,l1+1 ,k,ie) &
361 +f2*arr2(i,l2+1 ,k,ie)
362 arr1(i,nx-l1,k,ie) = f1*arr1(i,nx-l1,k,ie) &
363 +f2*arr2(i,nx-l2,k,ie)
368 arr1(i,j,l1+1 ,ie) = f1*arr1(i,j,l1+1 ,ie) &
369 +f2*arr2(i,j,l2+1 ,ie)
370 arr1(i,j,nx-l1,ie) = f1*arr1(i,j,nx-l1,ie) &
371 +f2*arr2(i,j,nx-l2,ie)
380 real(kind=rp),
dimension(this%dm%size()),
intent(inout) :: e, r
381 integer :: n, enx, eny, enz, ns
382 real(kind=rp),
parameter :: zero = 0.0_rp
383 real(kind=rp),
parameter :: one = 1.0_rp
384 type(c_ptr) :: e_d, r_d
385 associate(work1 => this%work1, work1_d => this%work1_d,&
386 work2 => this%work2, work2_d => this%work2_d)
389 enx=this%Xh_schwarz%lx
390 eny=this%Xh_schwarz%ly
391 enz=this%Xh_schwarz%lz
392 if(.not. this%msh%gdim .eq. 3) enz=1
393 ns = enx*eny*enz*this%msh%nelv
394 if (neko_bcknd_device .eq. 1)
then
395 r_d = device_get_ptr(r)
396 e_d = device_get_ptr(e)
397 call device_event_record(this%event, glb_cmd_queue)
398 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
399 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
400 this%msh%nelv, aux_cmd_queue)
401 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
402 enx,eny,enz, this%msh%nelv,aux_cmd_queue)
404 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
405 call this%gs_schwarz%op(work1, ns, gs_op_add,this%event)
406 call device_event_sync(this%event)
407 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
408 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
410 call this%fdm%compute(work2, work1,aux_cmd_queue)
412 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
413 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
414 call this%gs_schwarz%op(work2, ns, gs_op_add,this%event)
415 call device_event_sync(this%event)
417 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
418 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
419 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
420 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
421 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
422 this%msh%nelv, aux_cmd_queue)
424 call device_event_record(this%event,aux_cmd_queue)
425 call device_event_sync(this%event)
427 call this%gs_h%op(e, n, gs_op_add, this%event)
428 call bc_list_apply_scalar(this%bclst, e, n)
429 call device_col2(e_d,this%wt_d, n)
430 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
432 call bc_list_apply_scalar(this%bclst, r, n)
437 enx, eny, enz, this%msh%nelv)
438 call this%gs_schwarz%op(work1, ns, gs_op_add)
440 enx, eny, enz, this%msh%nelv)
442 call this%fdm%compute(work2, work1)
446 enx, eny, enz, this%msh%nelv)
447 call this%gs_schwarz%op(work2, ns, gs_op_add)
449 enx, eny, enz, this%msh%nelv)
451 enx, eny, enz, this%msh%nelv)
456 call this%gs_h%op(e, n, gs_op_add)
457 call bc_list_apply_scalar(this%bclst, e, n)
459 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
466 integer,
intent(in) :: n, nelv
467 real(kind=rp),
intent(inout) :: e(n,n,n,nelv)
468 real(kind=rp),
intent(inout) :: wt(n,n,4,3,nelv)
469 integer :: ie, i, j, k
474 e(1 ,j,k,ie) = e(1 ,j,k,ie) * wt(j,k,1,1,ie)
475 e(2 ,j,k,ie) = e(2 ,j,k,ie) * wt(j,k,2,1,ie)
476 e(n-1,j,k,ie) = e(n-1,j,k,ie) * wt(j,k,3,1,ie)
477 e(n ,j,k,ie) = e(n ,j,k,ie) * wt(j,k,4,1,ie)
482 e(i,1 ,k,ie) = e(i,1 ,k,ie) * wt(i,k,1,2,ie)
483 e(i,2 ,k,ie) = e(i,2 ,k,ie) * wt(i,k,2,2,ie)
484 e(i,n-1,k,ie) = e(i,n-1,k,ie) * wt(i,k,3,2,ie)
485 e(i,n ,k,ie) = e(i,n ,k,ie) * wt(i,k,4,2,ie)
490 e(i,j,1 ,ie) = e(i,j,1 ,ie) * wt(i,j,1,3,ie)
491 e(i,j,2 ,ie) = e(i,j,2 ,ie) * wt(i,j,2,3,ie)
492 e(i,j,n-1,ie) = e(i,j,n-1,ie) * wt(i,j,3,3,ie)
493 e(i,j,n ,ie) = e(i,j,n ,ie) * wt(i,j,4,3,ie)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a boundary condition.
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
integer, parameter, public device_to_host
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Defines a mapping of the degrees of freedom.
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
subroutine, public rone(a, n)
Set all elements to one.
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public i8
integer, parameter, public rp
Global precision used in computations.
Overlapping schwarz solves.
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
subroutine schwarz_setup_wt(this)
setup weights
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
subroutine schwarz_compute(this, e, r)
subroutine schwarz_wt3d(e, wt, n, nelv)
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh)
subroutine schwarz_free(this)
Defines a function space.
integer, parameter, public gll
A list of boundary conditions.
The function space for the SEM solution fields.