Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
schwarz.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
61module schwarz
62 use num_types, only : rp, i8
63 use math, only : rzero, rone
64 use mesh, only : mesh_t
65 use space, only : space_t, gll
66 use dofmap, only : dofmap_t
67 use gather_scatter, only : gs_t, gs_op_add
71 use fdm, only : fdm_t
78 use bc_list, only : bc_list_t
79 use, intrinsic :: iso_c_binding, only : c_sizeof, c_ptr, c_null_ptr, c_associated
80 !$ use omp_lib
81 implicit none
82 private
83
84 type, public :: schwarz_t
85 real(kind=rp), allocatable :: work1(:)
86 real(kind=rp), allocatable :: work2(:)
87 real(kind=rp), allocatable :: wt(:,:,:,:,:)
88 type(c_ptr) :: work1_d = c_null_ptr
89 type(c_ptr) :: work2_d = c_null_ptr
90 type(c_ptr) :: wt_d = c_null_ptr
91 type(space_t) :: xh_schwarz
92 type(gs_t) :: gs_schwarz
93 type(dofmap_t) :: dm_schwarz
94 type(fdm_t) :: fdm
95 type(space_t), pointer :: xh => null()
96 type(bc_list_t), pointer :: bclst => null()
97 type(dofmap_t), pointer :: dof => null()
98 type(gs_t), pointer :: gs_h => null()
99 type(mesh_t), pointer :: msh => null()
100 type(c_ptr) :: event = c_null_ptr
101 logical :: local_gs = .false.
102 type(gs_t), allocatable :: gs_h_local
103 contains
104 procedure, pass(this) :: init => schwarz_init
105 procedure, pass(this) :: free => schwarz_free
106 procedure, pass(this) :: compute => schwarz_compute
107 end type schwarz_t
108
109contains
110
111 subroutine schwarz_init(this, Xh, dof, gs_h, bclst, msh)
112 class(schwarz_t), target, intent(inout) :: this
113 type(space_t), target, intent(inout) :: Xh
114 type(dofmap_t), target, intent(in) :: dof
115 type(gs_t), target, intent(inout) :: gs_h
116 type(mesh_t), target, intent(inout) :: msh
117 type(bc_list_t), target, intent(inout):: bclst
118 integer :: nthrds
119
120 call this%free()
121
122 call this%Xh_schwarz%init(gll, xh%lx+2, xh%lx+2, xh%lx+2)
123 call this%dm_schwarz%init(msh, this%Xh_schwarz)
124 call this%gs_schwarz%init(this%dm_schwarz)
125
126 allocate(this%work1(this%dm_schwarz%size()))
127 allocate(this%work2(this%dm_schwarz%size()))
128 allocate(this%wt(xh%lx, xh%lx, 4, msh%gdim, msh%nelv))
129
130 call this%fdm%init(xh, dof, gs_h)
131
132
133 this%msh => msh
134 this%Xh => xh
135 this%bclst => bclst
136 this%dof => dof
137
138 nthrds = 1
139 !$omp parallel
140 !$ nthrds = omp_get_num_threads()
141 !$omp end parallel
142
143 ! If we are running multithreaded, we need a local gs object,
144 ! otherwise we can reuse the external one
145 if (nthrds .gt. 1) then
146 allocate(this%gs_h_local)
147 call this%gs_h_local%init(this%dof)
148 this%gs_h => this%gs_h_local
149 this%local_gs = .true.
150 else
151 this%gs_h => gs_h
152 this%local_gs = .false.
153 end if
154
155 if (neko_bcknd_device .eq. 1) then
156 call device_map(this%work1, this%work1_d,this%dm_schwarz%size())
157 call device_map(this%work2, this%work2_d,this%dm_schwarz%size())
158 end if
159
160 call schwarz_setup_wt(this)
161 if (neko_bcknd_device .eq. 1) then
162 call device_alloc(this%wt_d, &
163 int(this%dof%size(), i8) * int(c_sizeof(this%work1(1)), i8))
164 call rone(this%work1, this%dof%size())
165 call schwarz_wt3d(this%work1, this%wt, xh%lx, msh%nelv)
166 call device_memcpy(this%work1, this%wt_d, this%dof%size(), &
167 host_to_device, sync=.false.)
168 call device_event_create(this%event, 2)
169 end if
170 end subroutine schwarz_init
171
172 subroutine schwarz_free(this)
173 class(schwarz_t), intent(inout) :: this
174
175 if(allocated(this%work1)) deallocate(this%work1)
176 if(allocated(this%work2)) deallocate(this%work2)
177 if(allocated(this%wt)) deallocate(this%wt)
178
179 if (c_associated(this%work1_d)) then
180 call device_free(this%work1_d)
181 end if
182
183 if (c_associated(this%work2_d)) then
184 call device_free(this%work2_d)
185 end if
186
187 if (c_associated(this%wt_d)) then
188 call device_free(this%wt_d)
189 end if
190
191 call this%Xh_schwarz%free()
192 call this%gs_schwarz%free()
193 !why cant I do this?
194 !call dofmap_free(this%dm_schwarz)
195 call this%dm_schwarz%free()
196 call this%fdm%free()
197
198 nullify(this%Xh)
199 nullify(this%bclst)
200 nullify(this%dof)
201 if (allocated(this%gs_h_local)) then
202 call this%gs_h_local%free()
203 deallocate(this%gs_h_local)
204 end if
205 nullify(this%gs_h)
206 nullify(this%msh)
207
208 this%local_gs = .false.
209 if (c_associated(this%event)) then
210 call device_event_destroy(this%event)
211 end if
212 end subroutine schwarz_free
214 subroutine schwarz_setup_wt(this)
215 class(schwarz_t), intent(inout) :: this
216 integer :: enx,eny,enz, n, ie, k, ns
217 real(kind=rp), parameter :: zero = 0.0
218 real(kind=rp), parameter :: one = 1.0
219 associate(work1 => this%work1, work2 => this%work2, msh => this%msh, &
220 xh => this%Xh, xh_schwarz => this%Xh_schwarz)
221
222 n = this%dof%size()
223
224 enx = xh_schwarz%lx
225 eny = xh_schwarz%ly
226 enz = xh_schwarz%lz
227 if(.not. msh%gdim .eq. 3) enz=1
228 ns = enx*eny*enz*msh%nelv
229
230 call rone(work2, ns)
231 call rzero(work1, ns)
232
233 ! Sum overlap region (border excluded)
234 ! Cred to PFF for this, very clever
235 call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv)
236 if (neko_bcknd_device .eq. 1) then
237 call device_memcpy(work2, this%work2_d, ns, &
238 host_to_device, sync=.false.)
239 call this%gs_schwarz%op(work2, ns, gs_op_add)
240 call device_memcpy(work2, this%work2_d, ns, &
241 device_to_host, sync=.true.)
242 else
243 call this%gs_schwarz%op(work2, ns, gs_op_add)
244 end if
245 call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv)
246 call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv)
247
248 ! if(.not.if3d) then ! Go back to regular size array
249 ! call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
250 ! else
251 call schwarz_toreg3d(work1, work2, xh%lx, msh%nelv)
252 ! endif
253
254 if (neko_bcknd_device .eq. 1) then
255 call device_memcpy(work1, this%work1_d, n, &
256 host_to_device, sync=.false.)
257 call this%gs_h%op(work1, n, gs_op_add)
258 call device_memcpy(work1, this%work1_d, n, &
259 device_to_host, sync=.true.)
260 else
261 call this%gs_h%op(work1, n, gs_op_add)
262 end if
263
264 k = 1
265 do ie = 1,msh%nelv
266 if (msh%gdim .eq. 2) then
267 call schwarz_setup_schwarz_wt2d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
268 end if
269 if (this%msh%gdim.eq. 3) then
270 call schwarz_setup_schwarz_wt3d_2(this%wt,ie,xh%lx, work1(k), msh%nelv)
271 k = k + xh%lxyz
272 end if
273 end do
274 end associate
275 end subroutine schwarz_setup_wt
276
278 subroutine schwarz_setup_schwarz_wt2d_2(wt,ie,n,work, nelv)
279 integer, intent(in) :: n, nelv
280 real(kind=rp), intent(inout) :: wt(n,4,2,nelv)
281 real(kind=rp), intent(inout) :: work(n,n)
282 integer :: ie,i,j
283 do j = 1, n
284 wt(j,1,1,ie) = 1.0_rp / work(1,j)
285 wt(j,2,1,ie) = 1.0_rp / work(2,j)
286 wt(j,3,1,ie) = 1.0_rp / work(n-1,j)
287 wt(j,4,1,ie) = 1.0_rp / work(n,j)
288 end do
289 do i = 1, n
290 wt(i,1,2,ie) = 1.0_rp / work(i,1)
291 wt(i,2,2,ie) = 1.0_rp / work(i,2)
292 wt(i,3,2,ie) = 1.0_rp / work(i,n-1)
293 wt(i,4,2,ie) = 1.0_rp / work(i,n)
294 end do
295
296 return
297 end subroutine schwarz_setup_schwarz_wt2d_2
298
300 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
301 integer, intent(in) ::n, nelv, ie
302 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
303 real(kind=rp), intent(inout) :: work(n,n,n)
304 integer :: i,j,k
305
306 do k = 1, n
307 do j = 1, n
308 wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k)
309 wt(j,k,2,1,ie) = 1.0_rp / work(2,j,k)
310 wt(j,k,3,1,ie) = 1.0_rp / work(n-1,j,k)
311 wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k)
312 end do
313 end do
314
315 do k = 1, n
316 do i = 1, n
317 wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k)
318 wt(i,k,2,2,ie) = 1.0_rp / work(i,2,k)
319 wt(i,k,3,2,ie) = 1.0_rp / work(i,n-1,k)
320 wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k)
321 end do
322 end do
323
324 do j = 1, n
325 do i = 1, n
326 wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1)
327 wt(i,j,2,3,ie) = 1.0_rp / work(i,j,2)
328 wt(i,j,3,3,ie) = 1.0_rp / work(i,j,n-1)
329 wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n)
330 end do
331 end do
332
333 end subroutine schwarz_setup_schwarz_wt3d_2
334
336 subroutine schwarz_toreg3d(b, a, n, nelv)
337 integer, intent(in) :: n, nelv
338 real(kind=rp), intent(inout) :: a(0:n+1, 0:n+1, 0:n+1, nelv)
339 real(kind=rp), intent(inout) :: b(n,n,n,nelv)
340 integer :: i, j, k, ie
341 do ie = 1, nelv
342 do k = 1, n
343 do j = 1, n
344 do i = 1, n
345 b(i,j,k,ie) = a(i,j,k,ie)
346 end do
347 end do
348 end do
349 end do
350 end subroutine schwarz_toreg3d
351
353 subroutine schwarz_toext3d(a, b, n, nelv)
354 integer, intent(in) :: n, nelv
355 real (kind=rp), intent(inout) :: a(0:n+1,0:n+1,0:n+1,nelv), b(n,n,n,nelv)
356 integer :: i,j,k,ie
357
358 call rzero(a, (n+2)*(n+2)*(n+2)*nelv)
359 do ie = 1, nelv
360 do k = 1, n
361 do j = 1, n
362 do i = 1, n
363 a(i,j,k,ie) = b(i,j,k,ie)
364 end do
365 end do
366 end do
367 end do
368 end subroutine schwarz_toext3d
369
373 subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
374 integer, intent(in) :: l1, l2, nx, ny, nz, nelv
375 real(kind=rp), intent(inout) :: arr1(nx,ny,nz,nelv), arr2(nx,ny,nz,nelv)
376 real(kind=rp), intent(in) :: f1, f2
377 integer :: i, j, k, ie, i0, i1
378 i0=2
379 i1=nx-1
380
381 if(nz .eq. 1) then
382 do ie = 1, nelv
383 do j = i0, i1
384 arr1(l1+1 ,j,1,ie) = f1*arr1(l1+1 ,j,1,ie) &
385 +f2*arr2(l2+1 ,j,1,ie)
386 arr1(nx-l1,j,1,ie) = f1*arr1(nx-l1,j,1,ie) &
387 +f2*arr2(nx-l2,j,1,ie)
388 end do
389 do i = i0, i1
390 arr1(i,l1+1 ,1,ie) = f1*arr1(i,l1+1 ,1,ie) &
391 +f2*arr2(i,l2+1 ,1,ie)
392 arr1(i,ny-l1,1,ie) = f1*arr1(i,ny-l1,1,ie) &
393 +f2*arr2(i,nx-l2,1,ie)
394 end do
395 end do
396 else
397 do ie = 1, nelv
398 do k = i0, i1
399 do j = i0, i1
400 arr1(l1+1 ,j,k,ie) = f1*arr1(l1+1 ,j,k,ie) &
401 +f2*arr2(l2+1 ,j,k,ie)
402 arr1(nx-l1,j,k,ie) = f1*arr1(nx-l1,j,k,ie) &
403 +f2*arr2(nx-l2,j,k,ie)
404 end do
405 end do
406 do k = i0, i1
407 do i = i0, i1
408 arr1(i,l1+1 ,k,ie) = f1*arr1(i,l1+1 ,k,ie) &
409 +f2*arr2(i,l2+1 ,k,ie)
410 arr1(i,nx-l1,k,ie) = f1*arr1(i,nx-l1,k,ie) &
411 +f2*arr2(i,nx-l2,k,ie)
412 end do
413 end do
414 do j = i0, i1
415 do i = i0, i1
416 arr1(i,j,l1+1 ,ie) = f1*arr1(i,j,l1+1 ,ie) &
417 +f2*arr2(i,j,l2+1 ,ie)
418 arr1(i,j,nx-l1,ie) = f1*arr1(i,j,nx-l1,ie) &
419 +f2*arr2(i,j,nx-l2,ie)
420 end do
421 end do
422 end do
423 endif
424 end subroutine schwarz_extrude
425
426 subroutine schwarz_compute(this, e, r)
427 class(schwarz_t), intent(inout) :: this
428 real(kind=rp), dimension(this%dof%size()), intent(inout) :: e, r
429 integer :: n, enx, eny, enz, ns
430 real(kind=rp), parameter :: zero = 0.0_rp
431 real(kind=rp), parameter :: one = 1.0_rp
432 type(c_ptr) :: e_d, r_d
433 associate(work1 => this%work1, work1_d => this%work1_d,&
434 work2 => this%work2, work2_d => this%work2_d)
435
436 n = this%dof%size()
437 enx=this%Xh_schwarz%lx
438 eny=this%Xh_schwarz%ly
439 enz=this%Xh_schwarz%lz
440 if(.not. this%msh%gdim .eq. 3) enz=1
441 ns = enx*eny*enz*this%msh%nelv
442 if (neko_bcknd_device .eq. 1) then
443 r_d = device_get_ptr(r)
444 e_d = device_get_ptr(e)
445 call device_event_record(this%event, glb_cmd_queue)
446 call device_stream_wait_event(aux_cmd_queue, this%event, 0)
447 call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
448 this%msh%nelv, aux_cmd_queue)
449 call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
450 enx,eny,enz, this%msh%nelv,aux_cmd_queue)
451
452 this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
453 call this%gs_schwarz%op(work1, ns, gs_op_add,this%event)
454 call device_event_sync(this%event)
455 call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
456 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
457
458 call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves
459
460 call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
461 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
462 call this%gs_schwarz%op(work2, ns, gs_op_add,this%event)
463 call device_event_sync(this%event)
464
465 call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
466 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
467 call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
468 enx, eny, enz, this%msh%nelv, aux_cmd_queue)
469 call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
470 this%msh%nelv, aux_cmd_queue)
471
472 this%gs_h%bcknd%gs_stream = aux_cmd_queue
473 call this%gs_h%op(e, n, gs_op_add, this%event)
474
475 call this%bclst%apply_scalar(e, n, strm = aux_cmd_queue)
476 call device_col2(e_d,this%wt_d, n, aux_cmd_queue)
477
478 ! switch back to the default stream on the shared gs
479 if (.not. this%local_gs) then
480 call device_event_sync(this%event)
481 this%gs_h%bcknd%gs_stream = glb_cmd_queue
482 end if
483 else
484 call this%bclst%apply_scalar(r, n)
485 call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
486
487 ! exchange interior nodes
488 call schwarz_extrude(work1, 0, zero, work1, 2, one, &
489 enx, eny, enz, this%msh%nelv)
490 call this%gs_schwarz%op(work1, ns, gs_op_add)
491 call schwarz_extrude(work1, 0, one, work1, 2, -one, &
492 enx, eny, enz, this%msh%nelv)
493
494 call this%fdm%compute(work2, work1) ! do local solves
495
496 ! Sum overlap region (border excluded)
497 call schwarz_extrude(work1, 0, zero, work2, 0, one, &
498 enx, eny, enz, this%msh%nelv)
499 call this%gs_schwarz%op(work2, ns, gs_op_add)
500 call schwarz_extrude(work2, 0, one, work1, 0, -one, &
501 enx, eny, enz, this%msh%nelv)
502 call schwarz_extrude(work2, 2, one, work2, 0, one, &
503 enx, eny, enz, this%msh%nelv)
504
505 call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
506
507 ! sum border nodes
508 call this%gs_h%op(e, n, gs_op_add)
509 call this%bclst%apply_scalar(e, n)
510
511 call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
512 end if
513 end associate
514 end subroutine schwarz_compute
515
516 !Apply schwarz weights along the boundary of each element.
517 subroutine schwarz_wt3d(e,wt,n, nelv)
518 integer, intent(in) :: n, nelv
519 real(kind=rp), intent(inout) :: e(n,n,n,nelv)
520 real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
521 integer :: ie, i, j, k
522
523 do ie = 1, nelv
524 do k = 1, n
525 do j = 1, n
526 e(1,j,k,ie) = e(1,j,k,ie) * wt(j,k,1,1,ie)
527 e(2,j,k,ie) = e(2,j,k,ie) * wt(j,k,2,1,ie)
528 e(n-1,j,k,ie) = e(n-1,j,k,ie) * wt(j,k,3,1,ie)
529 e(n,j,k,ie) = e(n,j,k,ie) * wt(j,k,4,1,ie)
530 end do
531 end do
532 do k = 1, n
533 do i = 3, n-2
534 e(i,1,k,ie) = e(i,1,k,ie) * wt(i,k,1,2,ie)
535 e(i,2,k,ie) = e(i,2,k,ie) * wt(i,k,2,2,ie)
536 e(i,n-1,k,ie) = e(i,n-1,k,ie) * wt(i,k,3,2,ie)
537 e(i,n,k,ie) = e(i,n,k,ie) * wt(i,k,4,2,ie)
538 end do
539 end do
540 do j = 3, n-2
541 do i = 3, n-2
542 e(i,j,1,ie) = e(i,j,1,ie) * wt(i,j,1,3,ie)
543 e(i,j,2,ie) = e(i,j,2,ie) * wt(i,j,2,3,ie)
544 e(i,j,n-1,ie) = e(i,j,n-1,ie) * wt(i,j,3,3,ie)
545 e(i,j,n,ie) = e(i,j,n,ie) * wt(i,j,4,3,ie)
546 end do
547 end do
548 end do
549 end subroutine schwarz_wt3d
550end module schwarz
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_schwarz_toreg3d(b_d, a_d, nx, nelv, stream)
subroutine, public device_schwarz_extrude(arr1_d, l1, f1, arr2_d, l2, f2, nx, ny, nz, nelv, stream)
subroutine, public device_schwarz_toext3d(a_d, b_d, nx, nelv, stream)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1295
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1279
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1208
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1249
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Type for the Fast Diagonalization connected with the schwarz overlapping solves.
Definition fdm.f90:61
Gather-scatter.
Definition math.f90:60
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Overlapping schwarz solves.
Definition schwarz.f90:61
subroutine schwarz_toreg3d(b, a, n, nelv)
convert array a from extended size to regular
Definition schwarz.f90:337
subroutine schwarz_setup_wt(this)
setup weights
Definition schwarz.f90:215
subroutine schwarz_init(this, xh, dof, gs_h, bclst, msh)
Definition schwarz.f90:112
subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
Sum values along rows l1, l2 with weights f1, f2 and store along row l1. Helps us avoid complicated c...
Definition schwarz.f90:374
subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 3d, second step.
Definition schwarz.f90:301
subroutine schwarz_compute(this, e, r)
Definition schwarz.f90:427
subroutine schwarz_wt3d(e, wt, n, nelv)
Definition schwarz.f90:518
subroutine schwarz_toext3d(a, b, n, nelv)
convert array a from original size to size extended array with border
Definition schwarz.f90:354
subroutine schwarz_setup_schwarz_wt2d_2(wt, ie, n, work, nelv)
Setup schwarz weights, 2d, second step.
Definition schwarz.f90:279
subroutine schwarz_free(this)
Definition schwarz.f90:173
Defines a function space.
Definition space.f90:34
integer, parameter, public gll
Definition space.f90:49
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
The function space for the SEM solution fields.
Definition space.f90:63