Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
77 use bc_list, only : bc_list_t
78 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr
79 !$ use omp_lib
80 implicit none
81 private
82
83 type, public :: schwarz_t
84 real(kind=rp), allocatable :: work1(:)
85 real(kind=rp), allocatable :: work2(:)
86 real(kind=rp), allocatable :: wt(:,:,:,:,:)
87 type(c_ptr) :: work1_d = c_null_ptr
88 type(c_ptr) :: work2_d = c_null_ptr
89 type(c_ptr) :: wt_d = c_null_ptr
90 type(space_t) :: xh_schwarz
91 type(gs_t) :: gs_schwarz
92 type(dofmap_t) :: dm_schwarz
93 type(fdm_t) :: fdm
94 type(space_t), pointer :: xh
95 type(bc_list_t), pointer :: bclst
96 type(dofmap_t), pointer :: dof
97 type(gs_t), pointer :: gs_h
98 type(mesh_t), pointer :: msh
99 type(c_ptr) :: event
100 logical :: local_gs = .false.
101 contains
102 procedure, pass(this) :: init => schwarz_init
103 procedure, pass(this) :: free => schwarz_free
104 procedure, pass(this) :: compute => schwarz_compute
105 end type schwarz_t
106
107contains
108
109 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
110 class(schwarz_t), target, intent(inout) :: this
111 type(space_t), target, intent(inout) :: Xh
112 type(dofmap_t), target, intent(in) :: dof
113 type(gs_t), target, intent(inout) :: gs_h
114 type(mesh_t), target, intent(inout) :: msh
115 type(bc_list_t), target, intent(inout):: bclst
116 integer :: nthrds
117
118 call this%free()
119
120 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
121 call this%dm_schwarz%init(msh, this%Xh_schwarz)
122 call this%gs_schwarz%init(this%dm_schwarz)
123
124 allocate(this%work1(this%dm_schwarz%size()))
125 allocate(this%work2(this%dm_schwarz%size()))
126 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
127
128 call this%fdm%init(xh, dof, gs_h)
129
130
131 this%msh => msh
132 this%Xh => xh
133 this%bclst => bclst
134 this%dof => dof
135
136 nthrds = 1
137 !$omp parallel
138 !$ nthrds = omp_get_num_threads()
139 !$omp end parallel
140
141 ! If we are running multithreaded, we need a local gs object,
142 ! otherwise we can reuse the external one
143 if (nthrds .gt. 1) then
144 allocate(this%gs_h)
145 call this%gs_h%init(this%dof)
146 this%local_gs = .true.
147 else
148 this%gs_h => gs_h
149 this%local_gs = .false.
150 end if
151
152 if (neko_bcknd_device .eq. 1) then
153 call device_map(this%work1, this%work1_d,this%dm_schwarz%size())
154 call device_map(this%work2, this%work2_d,this%dm_schwarz%size())
155 end if
156
157 call schwarz_setup_wt(this)
158 if (neko_bcknd_device .eq. 1) then
159 call device_alloc(this%wt_d, &
160 int(this%dof%size() * c_sizeof(this%work1(1)), i8))
161 call rone(this%work1, this%dof%size())
162 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
163 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
164 host_to_device, sync=.false.)
165 call device_event_create(this%event, 2)
166 end if
167 end subroutine schwarz_init
168
169 subroutine schwarz_free(this)
170 class(schwarz_t), intent(inout) :: this
171
172 if(allocated(this%work1)) deallocate(this%work1)
173 if(allocated(this%work2)) deallocate(this%work2)
174 if(allocated(this%wt)) deallocate(this%wt)
175
176 call this%Xh_schwarz%free()
177 call this%gs_schwarz%free()
178 !why cant I do this?
179 !call dofmap_free(this%dm_schwarz)
180 call this%fdm%free()
181
182 nullify(this%Xh)
183 nullify(this%bclst)
184 nullify(this%dof)
185 if (this%local_gs) then
186 call this%gs_h%free()
187 deallocate(this%gs_h)
188 end if
189 nullify(this%gs_h)
190 nullify(this%msh)
191
192 this%local_gs = .false.
193 end subroutine schwarz_free
195 subroutine schwarz_setup_wt(this)
196 class(schwarz_t), intent(inout) :: this
197 integer :: enx,eny,enz, n, ie, k, ns
198 real(kind=rp), parameter :: zero = 0.0
199 real(kind=rp), parameter :: one = 1.0
200 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
201 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
202
203 n = this%dof%size()
204
205 enx = xh_schwarz%lx
206 eny = xh_schwarz%ly
207 enz = xh_schwarz%lz
208 if(.not. msh%gdim .eq. 3) enz=1
209 ns = enx*eny*enz*msh%nelv
210
211 call rone(work2, ns)
212 call rzero(work1, ns)
213
214 ! Sum overlap region (border excluded)
215 ! Cred to PFF for this, very clever
216 call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv)
217 if (neko_bcknd_device .eq. 1) then
218 call device_memcpy(work2, this%work2_d, ns, &
219 host_to_device, sync=.false.)
220 call this%gs_schwarz%op(work2, ns, gs_op_add)
221 call device_memcpy(work2, this%work2_d, ns, &
222 device_to_host, sync=.true.)
223 else
224 call this%gs_schwarz%op(work2, ns, gs_op_add)
225 end if
226 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv)
227 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv)
228
229 ! if(.not.if3d) then ! Go back to regular size array
230 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
231 ! else
232 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
233 ! endif
234
235 if (neko_bcknd_device .eq. 1) then
236 call device_memcpy(work1, this%work1_d, n, &
237 host_to_device, sync=.false.)
238 call this%gs_h%op(work1, n, gs_op_add)
239 call device_memcpy(work1, this%work1_d, n, &
240 device_to_host, sync=.true.)
241 else
242 call this%gs_h%op(work1, n, gs_op_add)
243 end if
244
245 k = 1
246 do ie = 1,msh%nelv
247 if (msh%gdim .eq. 2) then
248 call schwarz_setup_schwarz_wt2d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
249 end if
250 if (this%msh%gdim.eq. 3) then
251 call schwarz_setup_schwarz_wt3d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
252 k = k + xh%lxyz
253 end if
254 end do
255 end associate
256 end subroutine schwarz_setup_wt
257
259 subroutine schwarz_setup_schwarz_wt2d_2(wt,ie,n,work, nelv)
260 integer, intent(in) :: n, nelv
261 real(kind=rp), intent(inout) :: wt(n,4,2,nelv)
262 real(kind=rp), intent(inout) :: work(n,n)
263 integer :: ie,i,j
264 do j = 1, n
265 wt(j,1,1,ie) = 1.0_rp / work(1,j)
266 wt(j,2,1,ie) = 1.0_rp / work(2,j)
267 wt(j,3,1,ie) = 1.0_rp / work(n-1,j)
268 wt(j,4,1,ie) = 1.0_rp / work(n,j)
269 end do
270 do i = 1, n
271 wt(i,1,2,ie) = 1.0_rp / work(i,1)
272 wt(i,2,2,ie) = 1.0_rp / work(i,2)
273 wt(i,3,2,ie) = 1.0_rp / work(i,n-1)
274 wt(i,4,2,ie) = 1.0_rp / work(i,n)
275 end do
276
277 return
278 end subroutine schwarz_setup_schwarz_wt2d_2
279
281 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
282 integer, intent(in) ::n, nelv, ie
283 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
284 real(kind=rp), intent(inout) :: work(n,n,n)
285 integer :: i,j,k
286
287 do k = 1, n
288 do j = 1, n
289 wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k)
290 wt(j,k,2,1,ie) = 1.0_rp / work(2,j,k)
291 wt(j,k,3,1,ie) = 1.0_rp / work(n-1,j,k)
292 wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k)
293 end do
294 end do
295
296 do k = 1, n
297 do i = 1, n
298 wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k)
299 wt(i,k,2,2,ie) = 1.0_rp / work(i,2,k)
300 wt(i,k,3,2,ie) = 1.0_rp / work(i,n-1,k)
301 wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k)
302 end do
303 end do
304
305 do j = 1, n
306 do i = 1, n
307 wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1)
308 wt(i,j,2,3,ie) = 1.0_rp / work(i,j,2)
309 wt(i,j,3,3,ie) = 1.0_rp / work(i,j,n-1)
310 wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n)
311 end do
312 end do
313
314 end subroutine schwarz_setup_schwarz_wt3d_2
315
317 subroutine schwarz_toreg3d(b, a, n, nelv)
318 integer, intent(in) :: n, nelv
319 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
320 real(kind=rp), intent(inout) :: b(n,n,n,nelv)
321 integer :: i, j, k, ie
322 do ie = 1, nelv
323 do k = 1, n
324 do j = 1, n
325 do i = 1, n
326 b(i,j,k,ie) = a(i,j,k,ie)
327 end do
328 end do
329 end do
330 end do
331 end subroutine schwarz_toreg3d
332
334 subroutine schwarz_toext3d(a, b, n, nelv)
335 integer, intent(in) :: n, nelv
336 real (kind=rp), intent(inout) :: a(0:n+1,0:n+1,0:n+1,nelv), b(n,n,n,nelv)
337 integer :: i,j,k,ie
338
339 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
340 do ie = 1, nelv
341 do k = 1, n
342 do j = 1, n
343 do i = 1, n
344 a(i,j,k,ie) = b(i,j,k,ie)
345 end do
346 end do
347 end do
348 end do
349 end subroutine schwarz_toext3d
350
354 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
355 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
356 real(kind=rp), intent(inout) :: arr1(nx,ny,nz,nelv), arr2(nx,ny,nz,nelv)
357 real(kind=rp), intent(in) :: f1, f2
358 integer :: i, j, k, ie, i0, i1
359 i0=2
360 i1=nx-1
361
362 if(nz .eq. 1) then
363 do ie = 1, nelv
364 do j = i0, i1
365 arr1(l1+1 ,j,1,ie) = f1*arr1(l1+1 ,j,1,ie) &
366 +f2*arr2(l2+1 ,j,1,ie)
367 arr1(nx-l1,j,1,ie) = f1*arr1(nx-l1,j,1,ie) &
368 +f2*arr2(nx-l2,j,1,ie)
369 end do
370 do i = i0, i1
371 arr1(i,l1+1 ,1,ie) = f1*arr1(i,l1+1 ,1,ie) &
372 +f2*arr2(i,l2+1 ,1,ie)
373 arr1(i,ny-l1,1,ie) = f1*arr1(i,ny-l1,1,ie) &
374 +f2*arr2(i,nx-l2,1,ie)
375 end do
376 end do
377 else
378 do ie = 1, nelv
379 do k = i0, i1
380 do j = i0, i1
381 arr1(l1+1 ,j,k,ie) = f1*arr1(l1+1 ,j,k,ie) &
382 +f2*arr2(l2+1 ,j,k,ie)
383 arr1(nx-l1,j,k,ie) = f1*arr1(nx-l1,j,k,ie) &
384 +f2*arr2(nx-l2,j,k,ie)
385 end do
386 end do
387 do k = i0, i1
388 do i = i0, i1
389 arr1(i,l1+1 ,k,ie) = f1*arr1(i,l1+1 ,k,ie) &
390 +f2*arr2(i,l2+1 ,k,ie)
391 arr1(i,nx-l1,k,ie) = f1*arr1(i,nx-l1,k,ie) &
392 +f2*arr2(i,nx-l2,k,ie)
393 end do
394 end do
395 do j = i0, i1
396 do i = i0, i1
397 arr1(i,j,l1+1 ,ie) = f1*arr1(i,j,l1+1 ,ie) &
398 +f2*arr2(i,j,l2+1 ,ie)
399 arr1(i,j,nx-l1,ie) = f1*arr1(i,j,nx-l1,ie) &
400 +f2*arr2(i,j,nx-l2,ie)
401 end do
402 end do
403 end do
404 endif
405 end subroutine schwarz_extrude
406
407 subroutine schwarz_compute(this, e, r)
408 class(schwarz_t), intent(inout) :: this
409 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
410 integer :: n, enx, eny, enz, ns
411 real(kind=rp), parameter :: zero = 0.0_rp
412 real(kind=rp), parameter :: one = 1.0_rp
413 type(c_ptr) :: e_d, r_d
414 associate(work1 => this%work1, work1_d => this%work1_d,&
415 work2 => this%work2, work2_d => this%work2_d)
416
417 n = this%dof%size()
418 enx=this%Xh_schwarz%lx
419 eny=this%Xh_schwarz%ly
420 enz=this%Xh_schwarz%lz
421 if(.not. this%msh%gdim .eq. 3) enz=1
422 ns = enx*eny*enz*this%msh%nelv
423 if (neko_bcknd_device .eq. 1) then
424 r_d = device_get_ptr(r)
425 e_d = device_get_ptr(e)
426 call device_event_record(this%event, glb_cmd_queue)
427 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
428 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
429 this%msh%nelv, aux_cmd_queue)
430 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
431 enx,eny,enz, this%msh%nelv,aux_cmd_queue)
432
433 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
434 call this%gs_schwarz%op(work1, ns, gs_op_add,this%event)
435 call device_event_sync(this%event)
436 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
437 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
438
439 call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves
440
441 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
442 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
443 call this%gs_schwarz%op(work2, ns, gs_op_add,this%event)
444 call device_event_sync(this%event)
445
446 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
447 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
448 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
449 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
450 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
451 this%msh%nelv, aux_cmd_queue)
452
453 this%gs_h%bcknd%gs_stream = aux_cmd_queue
454 call this%gs_h%op(e, n, gs_op_add, this%event)
455
456 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
457 call device_col2(e_d,this%wt_d, n, aux_cmd_queue)
458
459 ! switch back to the default stream on the shared gs
460 if (.not. this%local_gs) then
461 call device_event_sync(this%event)
462 this%gs_h%bcknd%gs_stream = glb_cmd_queue
463 end if
464 else
465 call this%bclst%apply_scalar(r, n)
466 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
467
468 ! exchange interior nodes
469 call schwarz_extrude(work1, 0, zero, work1, 2, one, &
470 enx, eny, enz, this%msh%nelv)
471 call this%gs_schwarz%op(work1, ns, gs_op_add)
472 call schwarz_extrude(work1, 0, one, work1, 2, -one, &
473 enx, eny, enz, this%msh%nelv)
474
475 call this%fdm%compute(work2, work1) ! do local solves
476
477 ! Sum overlap region (border excluded)
478 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
479 enx, eny, enz, this%msh%nelv)
480 call this%gs_schwarz%op(work2, ns, gs_op_add)
481 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
482 enx, eny, enz, this%msh%nelv)
483 call schwarz_extrude(work2, 2, one, work2, 0, one, &
484 enx, eny, enz, this%msh%nelv)
485
486 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
487
488 ! sum border nodes
489 call this%gs_h%op(e, n, gs_op_add)
490 call this%bclst%apply_scalar(e, n)
491
492 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
493 end if
494 end associate
495 end subroutine schwarz_compute
496
497 !Apply schwarz weights along the boundary of each element.
498 subroutine schwarz_wt3d(e,wt,n, nelv)
499 integer, intent(in) :: n, nelv
500 real(kind=rp), intent(inout) :: e(n,n,n,nelv)
501 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
502 integer :: ie, i, j, k
503
504 do ie = 1, nelv
505 do k = 1, n
506 do j = 1, n
507 e(1,j,k,ie) = e(1,j,k,ie) * wt(j,k,1,1,ie)
508 e(2,j,k,ie) = e(2,j,k,ie) * wt(j,k,2,1,ie)
509 e(n-1,j,k,ie) = e(n-1,j,k,ie) * wt(j,k,3,1,ie)
510 e(n,j,k,ie) = e(n,j,k,ie) * wt(j,k,4,1,ie)
511 end do
512 end do
513 do k = 1, n
514 do i = 3, n-2
515 e(i,1,k,ie) = e(i,1,k,ie) * wt(i,k,1,2,ie)
516 e(i,2,k,ie) = e(i,2,k,ie) * wt(i,k,2,2,ie)
517 e(i,n-1,k,ie) = e(i,n-1,k,ie) * wt(i,k,3,2,ie)
518 e(i,n,k,ie) = e(i,n,k,ie) * wt(i,k,4,2,ie)
519 end do
520 end do
521 do j = 3, n-2
522 do i = 3, n-2
523 e(i,j,1,ie) = e(i,j,1,ie) * wt(i,j,1,3,ie)
524 e(i,j,2,ie) = e(i,j,2,ie) * wt(i,j,2,3,ie)
525 e(i,j,n-1,ie) = e(i,j,n-1,ie) * wt(i,j,3,3,ie)
526 e(i,j,n,ie) = e(i,j,n,ie) * wt(i,j,4,3,ie)
527 end do
528 end do
529 end do
530 end subroutine schwarz_wt3d
531end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1290
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1309
integer, parameter, public host_to_device
Definition device.F90:47
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:187
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1203
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1244
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:244
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:318
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:196
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:110
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:355
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:282
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:408
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:499
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:335
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:260
subroutine schwarz_free(this)
Definition schwarz.f90:170
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:48
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:62