Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, c_associated
80 !$ use omp_lib
81 implicit none
82 private
83
84 type, public :: schwarz_t
85 real(kind=rp), allocatable :: work1(:)
86 real(kind=rp), allocatable :: work2(:)
87 real(kind=rp), allocatable :: wt(:,:,:,:,:)
88 type(c_ptr) :: work1_d = c_null_ptr
89 type(c_ptr) :: work2_d = c_null_ptr
90 type(c_ptr) :: wt_d = c_null_ptr
91 type(space_t) :: xh_schwarz
92 type(gs_t) :: gs_schwarz
93 type(dofmap_t) :: dm_schwarz
94 type(fdm_t) :: fdm
95 type(space_t), pointer :: xh
96 type(bc_list_t), pointer :: bclst
97 type(dofmap_t), pointer :: dof
98 type(gs_t), pointer :: gs_h
99 type(mesh_t), pointer :: msh
100 type(c_ptr) :: event
101 logical :: local_gs = .false.
102 contains
103 procedure, pass(this) :: init => schwarz_init
104 procedure, pass(this) :: free => schwarz_free
105 procedure, pass(this) :: compute => schwarz_compute
106 end type schwarz_t
107
108contains
109
110 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
111 class(schwarz_t), target, intent(inout) :: this
112 type(space_t), target, intent(inout) :: Xh
113 type(dofmap_t), target, intent(in) :: dof
114 type(gs_t), target, intent(inout) :: gs_h
115 type(mesh_t), target, intent(inout) :: msh
116 type(bc_list_t), target, intent(inout):: bclst
117 integer :: nthrds
118
119 call this%free()
120
121 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
122 call this%dm_schwarz%init(msh, this%Xh_schwarz)
123 call this%gs_schwarz%init(this%dm_schwarz)
124
125 allocate(this%work1(this%dm_schwarz%size()))
126 allocate(this%work2(this%dm_schwarz%size()))
127 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
128
129 call this%fdm%init(xh, dof, gs_h)
130
131
132 this%msh => msh
133 this%Xh => xh
134 this%bclst => bclst
135 this%dof => dof
136
137 nthrds = 1
138 !$omp parallel
139 !$ nthrds = omp_get_num_threads()
140 !$omp end parallel
141
142 ! If we are running multithreaded, we need a local gs object,
143 ! otherwise we can reuse the external one
144 if (nthrds .gt. 1) then
145 allocate(this%gs_h)
146 call this%gs_h%init(this%dof)
147 this%local_gs = .true.
148 else
149 this%gs_h => gs_h
150 this%local_gs = .false.
151 end if
152
153 if (neko_bcknd_device .eq. 1) then
154 call device_map(this%work1, this%work1_d,this%dm_schwarz%size())
155 call device_map(this%work2, this%work2_d,this%dm_schwarz%size())
156 end if
157
158 call schwarz_setup_wt(this)
159 if (neko_bcknd_device .eq. 1) then
160 call device_alloc(this%wt_d, &
161 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
162 call rone(this%work1, this%dof%size())
163 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
164 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
165 host_to_device, sync=.false.)
166 call device_event_create(this%event, 2)
167 end if
168 end subroutine schwarz_init
169
170 subroutine schwarz_free(this)
171 class(schwarz_t), intent(inout) :: this
172
173 if(allocated(this%work1)) deallocate(this%work1)
174 if(allocated(this%work2)) deallocate(this%work2)
175 if(allocated(this%wt)) deallocate(this%wt)
176
177 if (c_associated(this%work1_d)) then
178 call device_free(this%work1_d)
179 end if
180
181 if (c_associated(this%work2_d)) then
182 call device_free(this%work2_d)
183 end if
184
185 if (c_associated(this%wt_d)) then
186 call device_free(this%wt_d)
187 end if
188
189 call this%Xh_schwarz%free()
190 call this%gs_schwarz%free()
191 !why cant I do this?
192 !call dofmap_free(this%dm_schwarz)
193 call this%dm_schwarz%free()
194 call this%fdm%free()
195
196 nullify(this%Xh)
197 nullify(this%bclst)
198 nullify(this%dof)
199 if (this%local_gs) then
200 call this%gs_h%free()
201 deallocate(this%gs_h)
202 end if
203 nullify(this%gs_h)
204 nullify(this%msh)
205
206 this%local_gs = .false.
207 if (c_associated(this%event)) then
208 call device_event_destroy(this%event)
209 end if
210 end subroutine schwarz_free
212 subroutine schwarz_setup_wt(this)
213 class(schwarz_t), intent(inout) :: this
214 integer :: enx,eny,enz, n, ie, k, ns
215 real(kind=rp), parameter :: zero = 0.0
216 real(kind=rp), parameter :: one = 1.0
217 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
218 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
219
220 n = this%dof%size()
221
222 enx = xh_schwarz%lx
223 eny = xh_schwarz%ly
224 enz = xh_schwarz%lz
225 if(.not. msh%gdim .eq. 3) enz=1
226 ns = enx*eny*enz*msh%nelv
227
228 call rone(work2, ns)
229 call rzero(work1, ns)
230
231 ! Sum overlap region (border excluded)
232 ! Cred to PFF for this, very clever
233 call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv)
234 if (neko_bcknd_device .eq. 1) then
235 call device_memcpy(work2, this%work2_d, ns, &
236 host_to_device, sync=.false.)
237 call this%gs_schwarz%op(work2, ns, gs_op_add)
238 call device_memcpy(work2, this%work2_d, ns, &
239 device_to_host, sync=.true.)
240 else
241 call this%gs_schwarz%op(work2, ns, gs_op_add)
242 end if
243 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv)
244 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv)
245
246 ! if(.not.if3d) then ! Go back to regular size array
247 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
248 ! else
249 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
250 ! endif
251
252 if (neko_bcknd_device .eq. 1) then
253 call device_memcpy(work1, this%work1_d, n, &
254 host_to_device, sync=.false.)
255 call this%gs_h%op(work1, n, gs_op_add)
256 call device_memcpy(work1, this%work1_d, n, &
257 device_to_host, sync=.true.)
258 else
259 call this%gs_h%op(work1, n, gs_op_add)
260 end if
261
262 k = 1
263 do ie = 1,msh%nelv
264 if (msh%gdim .eq. 2) then
265 call schwarz_setup_schwarz_wt2d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
266 end if
267 if (this%msh%gdim.eq. 3) then
268 call schwarz_setup_schwarz_wt3d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
269 k = k + xh%lxyz
270 end if
271 end do
272 end associate
273 end subroutine schwarz_setup_wt
274
276 subroutine schwarz_setup_schwarz_wt2d_2(wt,ie,n,work, nelv)
277 integer, intent(in) :: n, nelv
278 real(kind=rp), intent(inout) :: wt(n,4,2,nelv)
279 real(kind=rp), intent(inout) :: work(n,n)
280 integer :: ie,i,j
281 do j = 1, n
282 wt(j,1,1,ie) = 1.0_rp / work(1,j)
283 wt(j,2,1,ie) = 1.0_rp / work(2,j)
284 wt(j,3,1,ie) = 1.0_rp / work(n-1,j)
285 wt(j,4,1,ie) = 1.0_rp / work(n,j)
286 end do
287 do i = 1, n
288 wt(i,1,2,ie) = 1.0_rp / work(i,1)
289 wt(i,2,2,ie) = 1.0_rp / work(i,2)
290 wt(i,3,2,ie) = 1.0_rp / work(i,n-1)
291 wt(i,4,2,ie) = 1.0_rp / work(i,n)
292 end do
293
294 return
295 end subroutine schwarz_setup_schwarz_wt2d_2
296
298 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
299 integer, intent(in) ::n, nelv, ie
300 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
301 real(kind=rp), intent(inout) :: work(n,n,n)
302 integer :: i,j,k
303
304 do k = 1, n
305 do j = 1, n
306 wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k)
307 wt(j,k,2,1,ie) = 1.0_rp / work(2,j,k)
308 wt(j,k,3,1,ie) = 1.0_rp / work(n-1,j,k)
309 wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k)
310 end do
311 end do
312
313 do k = 1, n
314 do i = 1, n
315 wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k)
316 wt(i,k,2,2,ie) = 1.0_rp / work(i,2,k)
317 wt(i,k,3,2,ie) = 1.0_rp / work(i,n-1,k)
318 wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k)
319 end do
320 end do
321
322 do j = 1, n
323 do i = 1, n
324 wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1)
325 wt(i,j,2,3,ie) = 1.0_rp / work(i,j,2)
326 wt(i,j,3,3,ie) = 1.0_rp / work(i,j,n-1)
327 wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n)
328 end do
329 end do
330
331 end subroutine schwarz_setup_schwarz_wt3d_2
332
334 subroutine schwarz_toreg3d(b, a, n, nelv)
335 integer, intent(in) :: n, nelv
336 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
337 real(kind=rp), intent(inout) :: b(n,n,n,nelv)
338 integer :: i, j, k, ie
339 do ie = 1, nelv
340 do k = 1, n
341 do j = 1, n
342 do i = 1, n
343 b(i,j,k,ie) = a(i,j,k,ie)
344 end do
345 end do
346 end do
347 end do
348 end subroutine schwarz_toreg3d
349
351 subroutine schwarz_toext3d(a, b, n, nelv)
352 integer, intent(in) :: n, nelv
353 real (kind=rp), intent(inout) :: a(0:n+1,0:n+1,0:n+1,nelv), b(n,n,n,nelv)
354 integer :: i,j,k,ie
355
356 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
357 do ie = 1, nelv
358 do k = 1, n
359 do j = 1, n
360 do i = 1, n
361 a(i,j,k,ie) = b(i,j,k,ie)
362 end do
363 end do
364 end do
365 end do
366 end subroutine schwarz_toext3d
367
371 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
372 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
373 real(kind=rp), intent(inout) :: arr1(nx,ny,nz,nelv), arr2(nx,ny,nz,nelv)
374 real(kind=rp), intent(in) :: f1, f2
375 integer :: i, j, k, ie, i0, i1
376 i0=2
377 i1=nx-1
378
379 if(nz .eq. 1) then
380 do ie = 1, nelv
381 do j = i0, i1
382 arr1(l1+1 ,j,1,ie) = f1*arr1(l1+1 ,j,1,ie) &
383 +f2*arr2(l2+1 ,j,1,ie)
384 arr1(nx-l1,j,1,ie) = f1*arr1(nx-l1,j,1,ie) &
385 +f2*arr2(nx-l2,j,1,ie)
386 end do
387 do i = i0, i1
388 arr1(i,l1+1 ,1,ie) = f1*arr1(i,l1+1 ,1,ie) &
389 +f2*arr2(i,l2+1 ,1,ie)
390 arr1(i,ny-l1,1,ie) = f1*arr1(i,ny-l1,1,ie) &
391 +f2*arr2(i,nx-l2,1,ie)
392 end do
393 end do
394 else
395 do ie = 1, nelv
396 do k = i0, i1
397 do j = i0, i1
398 arr1(l1+1 ,j,k,ie) = f1*arr1(l1+1 ,j,k,ie) &
399 +f2*arr2(l2+1 ,j,k,ie)
400 arr1(nx-l1,j,k,ie) = f1*arr1(nx-l1,j,k,ie) &
401 +f2*arr2(nx-l2,j,k,ie)
402 end do
403 end do
404 do k = i0, i1
405 do i = i0, i1
406 arr1(i,l1+1 ,k,ie) = f1*arr1(i,l1+1 ,k,ie) &
407 +f2*arr2(i,l2+1 ,k,ie)
408 arr1(i,nx-l1,k,ie) = f1*arr1(i,nx-l1,k,ie) &
409 +f2*arr2(i,nx-l2,k,ie)
410 end do
411 end do
412 do j = i0, i1
413 do i = i0, i1
414 arr1(i,j,l1+1 ,ie) = f1*arr1(i,j,l1+1 ,ie) &
415 +f2*arr2(i,j,l2+1 ,ie)
416 arr1(i,j,nx-l1,ie) = f1*arr1(i,j,nx-l1,ie) &
417 +f2*arr2(i,j,nx-l2,ie)
418 end do
419 end do
420 end do
421 endif
422 end subroutine schwarz_extrude
423
424 subroutine schwarz_compute(this, e, r)
425 class(schwarz_t), intent(inout) :: this
426 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
427 integer :: n, enx, eny, enz, ns
428 real(kind=rp), parameter :: zero = 0.0_rp
429 real(kind=rp), parameter :: one = 1.0_rp
430 type(c_ptr) :: e_d, r_d
431 associate(work1 => this%work1, work1_d => this%work1_d,&
432 work2 => this%work2, work2_d => this%work2_d)
433
434 n = this%dof%size()
435 enx=this%Xh_schwarz%lx
436 eny=this%Xh_schwarz%ly
437 enz=this%Xh_schwarz%lz
438 if(.not. this%msh%gdim .eq. 3) enz=1
439 ns = enx*eny*enz*this%msh%nelv
440 if (neko_bcknd_device .eq. 1) then
441 r_d = device_get_ptr(r)
442 e_d = device_get_ptr(e)
443 call device_event_record(this%event, glb_cmd_queue)
444 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
445 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
446 this%msh%nelv, aux_cmd_queue)
447 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
448 enx,eny,enz, this%msh%nelv,aux_cmd_queue)
449
450 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
451 call this%gs_schwarz%op(work1, ns, gs_op_add,this%event)
452 call device_event_sync(this%event)
453 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
454 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
455
456 call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves
457
458 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
459 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
460 call this%gs_schwarz%op(work2, ns, gs_op_add,this%event)
461 call device_event_sync(this%event)
462
463 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
464 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
465 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
466 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
467 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
468 this%msh%nelv, aux_cmd_queue)
469
470 this%gs_h%bcknd%gs_stream = aux_cmd_queue
471 call this%gs_h%op(e, n, gs_op_add, this%event)
472
473 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
474 call device_col2(e_d,this%wt_d, n, aux_cmd_queue)
475
476 ! switch back to the default stream on the shared gs
477 if (.not. this%local_gs) then
478 call device_event_sync(this%event)
479 this%gs_h%bcknd%gs_stream = glb_cmd_queue
480 end if
481 else
482 call this%bclst%apply_scalar(r, n)
483 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
484
485 ! exchange interior nodes
486 call schwarz_extrude(work1, 0, zero, work1, 2, one, &
487 enx, eny, enz, this%msh%nelv)
488 call this%gs_schwarz%op(work1, ns, gs_op_add)
489 call schwarz_extrude(work1, 0, one, work1, 2, -one, &
490 enx, eny, enz, this%msh%nelv)
491
492 call this%fdm%compute(work2, work1) ! do local solves
493
494 ! Sum overlap region (border excluded)
495 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
496 enx, eny, enz, this%msh%nelv)
497 call this%gs_schwarz%op(work2, ns, gs_op_add)
498 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
499 enx, eny, enz, this%msh%nelv)
500 call schwarz_extrude(work2, 2, one, work2, 0, one, &
501 enx, eny, enz, this%msh%nelv)
502
503 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
504
505 ! sum border nodes
506 call this%gs_h%op(e, n, gs_op_add)
507 call this%bclst%apply_scalar(e, n)
508
509 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
510 end if
511 end associate
512 end subroutine schwarz_compute
513
514 !Apply schwarz weights along the boundary of each element.
515 subroutine schwarz_wt3d(e,wt,n, nelv)
516 integer, intent(in) :: n, nelv
517 real(kind=rp), intent(inout) :: e(n,n,n,nelv)
518 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
519 integer :: ie, i, j, k
520
521 do ie = 1, nelv
522 do k = 1, n
523 do j = 1, n
524 e(1,j,k,ie) = e(1,j,k,ie) * wt(j,k,1,1,ie)
525 e(2,j,k,ie) = e(2,j,k,ie) * wt(j,k,2,1,ie)
526 e(n-1,j,k,ie) = e(n-1,j,k,ie) * wt(j,k,3,1,ie)
527 e(n,j,k,ie) = e(n,j,k,ie) * wt(j,k,4,1,ie)
528 end do
529 end do
530 do k = 1, n
531 do i = 3, n-2
532 e(i,1,k,ie) = e(i,1,k,ie) * wt(i,k,1,2,ie)
533 e(i,2,k,ie) = e(i,2,k,ie) * wt(i,k,2,2,ie)
534 e(i,n-1,k,ie) = e(i,n-1,k,ie) * wt(i,k,3,2,ie)
535 e(i,n,k,ie) = e(i,n,k,ie) * wt(i,k,4,2,ie)
536 end do
537 end do
538 do j = 3, n-2
539 do i = 3, n-2
540 e(i,j,1,ie) = e(i,j,1,ie) * wt(i,j,1,3,ie)
541 e(i,j,2,ie) = e(i,j,2,ie) * wt(i,j,2,3,ie)
542 e(i,j,n-1,ie) = e(i,j,n-1,ie) * wt(i,j,3,3,ie)
543 e(i,j,n,ie) = e(i,j,n,ie) * wt(i,j,4,3,ie)
544 end do
545 end do
546 end do
547 end subroutine schwarz_wt3d
548end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1295
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1279
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1208
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1249
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:335
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:213
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:111
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:372
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:299
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:425
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:516
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:352
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:277
subroutine schwarz_free(this)
Definition schwarz.f90:171
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:63