Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gather_scatter.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use neko_config
37 use gs_device, only : gs_device_t
38 use gs_sx, only : gs_sx_t
39 use gs_cpu, only : gs_cpu_t
42 use gs_mpi, only : gs_mpi_t
44 use mesh, only : mesh_t
45 use comm
46 use dofmap, only : dofmap_t
47 use field, only : field_t
48 use num_types, only : rp, dp, i2
50 use stack, only : stack_i4_t
51 use utils, only : neko_error, linear_index
52 use logger, only : neko_log, log_size
54 use device
55 implicit none
56 private
57
58 type, public :: gs_t
59 real(kind=rp), allocatable :: local_gs(:)
60 integer, allocatable :: local_dof_gs(:)
61 integer, allocatable :: local_gs_dof(:)
62 integer, allocatable :: local_blk_len(:)
63 real(kind=rp), allocatable :: shared_gs(:)
64 integer, allocatable :: shared_dof_gs(:)
65 integer, allocatable :: shared_gs_dof(:)
66 integer, allocatable :: shared_blk_len(:)
67 type(dofmap_t), pointer ::dofmap
68 type(htable_i8_t) :: shared_dofs
69 integer :: nlocal
70 integer :: nshared
71 integer :: nlocal_blks
72 integer :: nshared_blks
73 integer :: local_facet_offset
74 integer :: shared_facet_offset
75 class(gs_bcknd_t), allocatable :: bcknd
76 class(gs_comm_t), allocatable :: comm
77 contains
78 procedure, private, pass(gs) :: gs_op_fld
79 procedure, private, pass(gs) :: gs_op_r4
80 procedure, pass(gs) :: gs_op_vector
81 procedure, pass(gs) :: init => gs_init
82 procedure, pass(gs) :: free => gs_free
83 generic :: op => gs_op_fld, gs_op_r4, gs_op_vector
84 end type gs_t
85
86 ! Expose available gather-scatter operation
88
89 ! Expose available gather-scatter backends
91
92 ! Expose available gather-scatter comm. backends
94
95
96contains
97
99 subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
100 class(gs_t), intent(inout) :: gs
101 type(dofmap_t), target, intent(inout) :: dofmap
102 character(len=LOG_SIZE) :: log_buf
103 character(len=20) :: bcknd_str
104 integer, optional :: bcknd, comm_bcknd
105 integer :: i, j, ierr, bcknd_, comm_bcknd_
106 integer(i8) :: glb_nshared, glb_nlocal
107 logical :: use_device_mpi
108 real(kind=rp), allocatable :: tmp(:)
109 type(c_ptr) :: tmp_d = c_null_ptr
110 integer :: strtgy(4) = (/ int(b'00'), int(b'01'), int(b'10'), int(b'11') /)
111 integer :: avg_strtgy, env_len
112 character(len=255) :: env_strtgy
113 real(kind=dp) :: strtgy_time(4)
114
115 call gs%free()
116
117 call neko_log%section('Gather-Scatter')
118
119 gs%dofmap => dofmap
120
121 use_device_mpi = .false.
122 if (present(comm_bcknd)) then
123 comm_bcknd_ = comm_bcknd
124 else
125 if (neko_device_mpi) then
126 comm_bcknd_ = gs_comm_mpigpu
127 use_device_mpi = .true.
128 else
129 comm_bcknd_ = gs_comm_mpi
130 end if
131 end if
132
133 select case (comm_bcknd_)
134 case (gs_comm_mpi)
135 call neko_log%message('Comm : MPI')
136 allocate(gs_mpi_t::gs%comm)
137 case (gs_comm_mpigpu)
138 call neko_log%message('Comm : Device MPI')
139 allocate(gs_device_mpi_t::gs%comm)
140 case default
141 call neko_error('Unknown Gather-scatter comm. backend')
142 end select
143
144 call gs%comm%init_dofs()
145 call gs_init_mapping(gs)
146
147 call gs_schedule(gs)
148
149 glb_nlocal = int(gs%nlocal, i8)
150 glb_nshared = int(gs%nshared, i8)
151
152 if (pe_rank .eq. 0) then
153 call mpi_reduce(mpi_in_place, glb_nlocal, 1, &
154 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
155
156 call mpi_reduce(mpi_in_place, glb_nshared, 1, &
157 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
158 else
159 call mpi_reduce(glb_nlocal, glb_nlocal, 1, &
160 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
161
162 call mpi_reduce(glb_nshared, glb_nshared, 1, &
163 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
164 end if
165
166 write(log_buf, '(A,I12)') 'Avg. internal: ', glb_nlocal/pe_size
167 call neko_log%message(log_buf)
168 write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size
169 call neko_log%message(log_buf)
170
171 if (present(bcknd)) then
172 bcknd_ = bcknd
173 else
174 if (neko_bcknd_sx .eq. 1) then
175 bcknd_ = gs_bcknd_sx
176 else if (neko_bcknd_device .eq. 1) then
177 bcknd_ = gs_bcknd_dev
178 else
179 bcknd_ = gs_bcknd_cpu
180 end if
181 end if
182
183 ! Setup Gather-scatter backend
184 select case(bcknd_)
185 case(gs_bcknd_cpu)
186 allocate(gs_cpu_t::gs%bcknd)
187 bcknd_str = ' std'
188 case(gs_bcknd_dev)
189 allocate(gs_device_t::gs%bcknd)
190 if (neko_bcknd_hip .eq. 1) then
191 bcknd_str = ' hip'
192 else if (neko_bcknd_cuda .eq. 1) then
193 bcknd_str = ' cuda'
194 else if (neko_bcknd_opencl .eq. 1) then
195 bcknd_str = ' opencl'
196 end if
197 case(gs_bcknd_sx)
198 allocate(gs_sx_t::gs%bcknd)
199 bcknd_str = ' sx'
200 case default
201 call neko_error('Unknown Gather-scatter backend')
202 end select
203
204 write(log_buf, '(A)') 'Backend : ' // trim(bcknd_str)
205 call neko_log%message(log_buf)
206
207
208
209 call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
210
211 if (use_device_mpi) then
212 select type(b => gs%bcknd)
213 type is (gs_device_t)
214 b%shared_on_host = .false.
215 end select
216
217 if(pe_size .gt. 1) then
218 ! Select fastest device MPI strategy at runtime
219 select type(c => gs%comm)
220 type is (gs_device_mpi_t)
221 call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, env_len)
222 if (env_len .eq. 0) then
223 allocate(tmp(dofmap%size()))
224 call device_map(tmp, tmp_d, dofmap%size())
225 tmp = 1.0_rp
226 call device_memcpy(tmp, tmp_d, dofmap%size(), &
227 host_to_device, sync=.false.)
228 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
229
230 do i = 1, size(strtgy)
231 c%nb_strtgy = strtgy(i)
232 call device_sync
233 call mpi_barrier(neko_comm)
234 strtgy_time(i) = mpi_wtime()
235 do j = 1, 100
236 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
237 end do
238 strtgy_time(i) = (mpi_wtime() - strtgy_time(i)) / 100d0
239 end do
240
241 call device_deassociate(tmp)
242 call device_free(tmp_d)
243 deallocate(tmp)
244
245 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
246
247 avg_strtgy = minloc(strtgy_time, 1)
248 call mpi_allreduce(mpi_in_place, avg_strtgy, 1, &
249 mpi_integer, mpi_sum, neko_comm)
250 avg_strtgy = avg_strtgy / pe_size
251
252 write(log_buf, '(A,B0.2,A)') 'Avg. strtgy : [', &
253 strtgy(avg_strtgy),']'
254
255 else
256 read(env_strtgy(1:env_len), *) i
257
258 if (i .lt. 1 .or. i .gt. 4) then
259 call neko_error('Invalid gs sync strtgy')
260 end if
261
262 c%nb_strtgy = strtgy(i)
263 avg_strtgy = i
264
265 write(log_buf, '(A,B0.2,A)') 'Env. strtgy : [', &
266 strtgy(avg_strtgy),']'
267 end if
268
269 call neko_log%message(log_buf)
270
271 end select
272 end if
273 end if
274
275 call neko_log%end_section()
276
277 end subroutine gs_init
278
280 subroutine gs_free(gs)
281 class(gs_t), intent(inout) :: gs
282
283 nullify(gs%dofmap)
284
285 if (allocated(gs%local_gs)) then
286 deallocate(gs%local_gs)
287 end if
288
289 if (allocated(gs%local_dof_gs)) then
290 deallocate(gs%local_dof_gs)
291 end if
292
293 if (allocated(gs%local_gs_dof)) then
294 deallocate(gs%local_gs_dof)
295 end if
296
297 if (allocated(gs%local_blk_len)) then
298 deallocate(gs%local_blk_len)
299 end if
300
301 if (allocated(gs%shared_gs)) then
302 deallocate(gs%shared_gs)
303 end if
304
305 if (allocated(gs%shared_dof_gs)) then
306 deallocate(gs%shared_dof_gs)
307 end if
308
309 if (allocated(gs%shared_gs_dof)) then
310 deallocate(gs%shared_gs_dof)
311 end if
312
313 if (allocated(gs%shared_blk_len)) then
314 deallocate(gs%shared_blk_len)
315 end if
316
317 gs%nlocal = 0
318 gs%nshared = 0
319 gs%nlocal_blks = 0
320 gs%nshared_blks = 0
321
322 call gs%shared_dofs%free()
323
324 if (allocated(gs%bcknd)) then
325 call gs%bcknd%free()
326 deallocate(gs%bcknd)
327 end if
328
329 if (allocated(gs%comm)) then
330 call gs%comm%free()
331 deallocate(gs%comm)
332 end if
333
334 end subroutine gs_free
335
337 subroutine gs_init_mapping(gs)
338 type(gs_t), target, intent(inout) :: gs
339 type(mesh_t), pointer :: msh
340 type(dofmap_t), pointer :: dofmap
341 type(stack_i4_t), target :: local_dof, dof_local, shared_dof, dof_shared
342 type(stack_i4_t), target :: local_face_dof, face_dof_local
343 type(stack_i4_t), target :: shared_face_dof, face_dof_shared
344 integer :: i, j, k, l, lx, ly, lz, max_id, max_sid, id, lid, dm_size
345 type(htable_i8_t) :: dm
346 type(htable_i8_t), pointer :: sdm
347
348 dofmap => gs%dofmap
349 msh => dofmap%msh
350 sdm => gs%shared_dofs
351
352 lx = dofmap%Xh%lx
353 ly = dofmap%Xh%ly
354 lz = dofmap%Xh%lz
355 dm_size = dofmap%size()/lx
356
357 call dm%init(dm_size, i)
360 call sdm%init(dofmap%size(), i)
361
362
363 call local_dof%init()
364 call dof_local%init()
365
366 call local_face_dof%init()
367 call face_dof_local%init()
368
369 call shared_dof%init()
370 call dof_shared%init()
371
372 call shared_face_dof%init()
373 call face_dof_shared%init()
374
375 !
376 ! Setup mapping for dofs points
377 !
378
379 max_id = 0
380 max_sid = 0
381 do i = 1, msh%nelv
382 lid = linear_index(1, 1, 1, i, lx, ly, lz)
383 if (dofmap%shared_dof(1, 1, 1, i)) then
384 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, 1, i), max_sid)
385 call shared_dof%push(id)
386 call dof_shared%push(lid)
387 else
388 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, 1, i), max_id)
389 call local_dof%push(id)
390 call dof_local%push(lid)
391 end if
392
393 lid = linear_index(lx, 1, 1, i, lx, ly, lz)
394 if (dofmap%shared_dof(lx, 1, 1, i)) then
395 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, 1, i), max_sid)
396 call shared_dof%push(id)
397 call dof_shared%push(lid)
398 else
399 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, 1, i), max_id)
400 call local_dof%push(id)
401 call dof_local%push(lid)
402 end if
403
404 lid = linear_index(1, ly, 1, i, lx, ly, lz)
405 if (dofmap%shared_dof(1, ly, 1, i)) then
406 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, 1, i), max_sid)
407 call shared_dof%push(id)
408 call dof_shared%push(lid)
409 else
410 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, 1, i), max_id)
411 call local_dof%push(id)
412 call dof_local%push(lid)
413 end if
414
415 lid = linear_index(lx, ly, 1, i, lx, ly, lz)
416 if (dofmap%shared_dof(lx, ly, 1, i)) then
417 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, 1, i), max_sid)
418 call shared_dof%push(id)
419 call dof_shared%push(lid)
420 else
421 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, 1, i), max_id)
422 call local_dof%push(id)
423 call dof_local%push(lid)
424 end if
425 if (lz .gt. 1) then
426 lid = linear_index(1, 1, lz, i, lx, ly, lz)
427 if (dofmap%shared_dof(1, 1, lz, i)) then
428 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, lz, i), max_sid)
429 call shared_dof%push(id)
430 call dof_shared%push(lid)
431 else
432 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, lz, i), max_id)
433 call local_dof%push(id)
434 call dof_local%push(lid)
435 end if
436
437 lid = linear_index(lx, 1, lz, i, lx, ly, lz)
438 if (dofmap%shared_dof(lx, 1, lz, i)) then
439 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, lz, i), max_sid)
440 call shared_dof%push(id)
441 call dof_shared%push(lid)
442 else
443 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, lz, i), max_id)
444 call local_dof%push(id)
445 call dof_local%push(lid)
446 end if
447
448 lid = linear_index(1, ly, lz, i, lx, ly, lz)
449 if (dofmap%shared_dof(1, ly, lz, i)) then
450 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, lz, i), max_sid)
451 call shared_dof%push(id)
452 call dof_shared%push(lid)
453 else
454 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, lz, i), max_id)
455 call local_dof%push(id)
456 call dof_local%push(lid)
457 end if
458
459 lid = linear_index(lx, ly, lz, i, lx, ly, lz)
460 if (dofmap%shared_dof(lx, ly, lz, i)) then
461 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, lz, i), max_sid)
462 call shared_dof%push(id)
463 call dof_shared%push(lid)
464 else
465 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, lz, i), max_id)
466 call local_dof%push(id)
467 call dof_local%push(lid)
468 end if
469 end if
470 end do
471
472 ! Clear local dofmap table
473 call dm%clear()
474
475 if (lz .gt. 1) then
476 !
477 ! Setup mapping for dofs on edges
478 !
479 do i = 1, msh%nelv
480
481 !
482 ! dofs on edges in x-direction
483 !
484 if (dofmap%shared_dof(2, 1, 1, i)) then
485 do j = 2, lx - 1
486 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
487 call shared_dof%push(id)
488 id = linear_index(j, 1, 1, i, lx, ly, lz)
489 call dof_shared%push(id)
490 end do
491 else
492 do j = 2, lx - 1
493 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
494 call local_dof%push(id)
495 id = linear_index(j, 1, 1, i, lx, ly, lz)
496 call dof_local%push(id)
497 end do
498 end if
499 if (dofmap%shared_dof(2, 1, lz, i)) then
500 do j = 2, lx - 1
501 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, lz, i), max_sid)
502 call shared_dof%push(id)
503 id = linear_index(j, 1, lz, i, lx, ly, lz)
504 call dof_shared%push(id)
505 end do
506 else
507 do j = 2, lx - 1
508 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, lz, i), max_id)
509 call local_dof%push(id)
510 id = linear_index(j, 1, lz, i, lx, ly, lz)
511 call dof_local%push(id)
512 end do
513 end if
514
515 if (dofmap%shared_dof(2, ly, 1, i)) then
516 do j = 2, lx - 1
517 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
518 call shared_dof%push(id)
519 id = linear_index(j, ly, 1, i, lx, ly, lz)
520 call dof_shared%push(id)
521 end do
522
523 else
524 do j = 2, lx - 1
525 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
526 call local_dof%push(id)
527 id = linear_index(j, ly, 1, i, lx, ly, lz)
528 call dof_local%push(id)
529 end do
530 end if
531 if (dofmap%shared_dof(2, ly, lz, i)) then
532 do j = 2, lx - 1
533 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, lz, i), max_sid)
534 call shared_dof%push(id)
535 id = linear_index(j, ly, lz, i, lx, ly, lz)
536 call dof_shared%push(id)
537 end do
538 else
539 do j = 2, lx - 1
540 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, lz, i), max_id)
541 call local_dof%push(id)
542 id = linear_index(j, ly, lz, i, lx, ly, lz)
543 call dof_local%push(id)
544 end do
545 end if
546
547 !
548 ! dofs on edges in y-direction
549 !
550 if (dofmap%shared_dof(1, 2, 1, i)) then
551 do k = 2, ly - 1
552 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
553 call shared_dof%push(id)
554 id = linear_index(1, k, 1, i, lx, ly, lz)
555 call dof_shared%push(id)
556 end do
557 else
558 do k = 2, ly - 1
559 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
560 call local_dof%push(id)
561 id = linear_index(1, k, 1, i, lx, ly, lz)
562 call dof_local%push(id)
563 end do
564 end if
565 if (dofmap%shared_dof(1, 2, lz, i)) then
566 do k = 2, ly - 1
567 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, lz, i), max_sid)
568 call shared_dof%push(id)
569 id = linear_index(1, k, lz, i, lx, ly, lz)
570 call dof_shared%push(id)
571 end do
572 else
573 do k = 2, ly - 1
574 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, lz, i), max_id)
575 call local_dof%push(id)
576 id = linear_index(1, k, lz, i, lx, ly, lz)
577 call dof_local%push(id)
578 end do
579 end if
580
581 if (dofmap%shared_dof(lx, 2, 1, i)) then
582 do k = 2, ly - 1
583 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
584 call shared_dof%push(id)
585 id = linear_index(lx, k, 1, i, lx, ly, lz)
586 call dof_shared%push(id)
587 end do
588 else
589 do k = 2, ly - 1
590 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
591 call local_dof%push(id)
592 id = linear_index(lx, k, 1, i, lx, ly, lz)
593 call dof_local%push(id)
594 end do
595 end if
596 if (dofmap%shared_dof(lx, 2, lz, i)) then
597 do k = 2, ly - 1
598 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, lz, i), max_sid)
599 call shared_dof%push(id)
600 id = linear_index(lx, k, lz, i, lx, ly, lz)
601 call dof_shared%push(id)
602 end do
603 else
604 do k = 2, ly - 1
605 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, lz, i), max_id)
606 call local_dof%push(id)
607 id = linear_index(lx, k, lz, i, lx, ly, lz)
608 call dof_local%push(id)
609 end do
610 end if
611 !
612 ! dofs on edges in z-direction
613 !
614 if (dofmap%shared_dof(1, 1, 2, i)) then
615 do l = 2, lz - 1
616 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, l, i), max_sid)
617 call shared_dof%push(id)
618 id = linear_index(1, 1, l, i, lx, ly, lz)
619 call dof_shared%push(id)
620 end do
621 else
622 do l = 2, lz - 1
623 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id)
624 call local_dof%push(id)
625 id = linear_index(1, 1, l, i, lx, ly, lz)
626 call dof_local%push(id)
627 end do
628 end if
629
630 if (dofmap%shared_dof(lx, 1, 2, i)) then
631 do l = 2, lz - 1
632 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid)
633 call shared_dof%push(id)
634 id = linear_index(lx, 1, l, i, lx, ly, lz)
635 call dof_shared%push(id)
636 end do
637 else
638 do l = 2, lz - 1
639 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, l, i), max_id)
640 call local_dof%push(id)
641 id = linear_index(lx, 1, l, i, lx, ly, lz)
642 call dof_local%push(id)
643 end do
644 end if
645
646 if (dofmap%shared_dof(1, ly, 2, i)) then
647 do l = 2, lz - 1
648 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, l, i), max_sid)
649 call shared_dof%push(id)
650 id = linear_index(1, ly, l, i, lx, ly, lz)
651 call dof_shared%push(id)
652 end do
653 else
654 do l = 2, lz - 1
655 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, l, i), max_id)
656 call local_dof%push(id)
657 id = linear_index(1, ly, l, i, lx, ly, lz)
658 call dof_local%push(id)
659 end do
660 end if
661
662 if (dofmap%shared_dof(lx, ly, 2, i)) then
663 do l = 2, lz - 1
664 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid)
665 call shared_dof%push(id)
666 id = linear_index(lx, ly, l, i, lx, ly, lz)
667 call dof_shared%push(id)
668 end do
669 else
670 do l = 2, lz - 1
671 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id)
672 call local_dof%push(id)
673 id = linear_index(lx, ly, l, i, lx, ly, lz)
674 call dof_local%push(id)
675 end do
676 end if
677 end do
678 end if
679
680 ! Clear local dofmap table
681 call dm%clear()
682
683 !
684 ! Setup mapping for dofs on facets
685 !
686 if (lz .eq. 1) then
687 do i = 1, msh%nelv
688
689 !
690 ! dofs on edges in x-direction
691 !
692 if (msh%facet_neigh(3, i) .ne. 0) then
693 if (dofmap%shared_dof(2, 1, 1, i)) then
694 do j = 2, lx - 1
695 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
696 call shared_face_dof%push(id)
697 id = linear_index(j, 1, 1, i, lx, ly, lz)
698 call face_dof_shared%push(id)
699 end do
700 else
701 do j = 2, lx - 1
702 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
703 call local_face_dof%push(id)
704 id = linear_index(j, 1, 1, i, lx, ly, lz)
705 call face_dof_local%push(id)
706 end do
707 end if
708 end if
709
710 if (msh%facet_neigh(4, i) .ne. 0) then
711 if (dofmap%shared_dof(2, ly, 1, i)) then
712 do j = 2, lx - 1
713 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
714 call shared_face_dof%push(id)
715 id = linear_index(j, ly, 1, i, lx, ly, lz)
716 call face_dof_shared%push(id)
717 end do
718
719 else
720 do j = 2, lx - 1
721 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
722 call local_face_dof%push(id)
723 id = linear_index(j, ly, 1, i, lx, ly, lz)
724 call face_dof_local%push(id)
725 end do
726 end if
727 end if
728
729 !
730 ! dofs on edges in y-direction
731 !
732 if (msh%facet_neigh(1, i) .ne. 0) then
733 if (dofmap%shared_dof(1, 2, 1, i)) then
734 do k = 2, ly - 1
735 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
736 call shared_face_dof%push(id)
737 id = linear_index(1, k, 1, i, lx, ly, lz)
738 call face_dof_shared%push(id)
739 end do
740 else
741 do k = 2, ly - 1
742 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
743 call local_face_dof%push(id)
744 id = linear_index(1, k, 1, i, lx, ly, lz)
745 call face_dof_local%push(id)
746 end do
747 end if
748 end if
749
750 if (msh%facet_neigh(2, i) .ne. 0) then
751 if (dofmap%shared_dof(lx, 2, 1, i)) then
752 do k = 2, ly - 1
753 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
754 call shared_face_dof%push(id)
755 id = linear_index(lx, k, 1, i, lx, ly, lz)
756 call face_dof_shared%push(id)
757 end do
758 else
759 do k = 2, ly - 1
760 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
761 call local_face_dof%push(id)
762 id = linear_index(lx, k, 1, i, lx, ly, lz)
763 call face_dof_local%push(id)
764 end do
765 end if
766 end if
767 end do
768 else
769 do i = 1, msh%nelv
770
771 ! Facets in x-direction (s, t)-plane
772 if (msh%facet_neigh(1, i) .ne. 0) then
773 if (dofmap%shared_dof(1, 2, 2, i)) then
774 do l = 2, lz - 1
775 do k = 2, ly - 1
776 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, l, i), max_sid)
777 call shared_face_dof%push(id)
778 id = linear_index(1, k, l, i, lx, ly, lz)
779 call face_dof_shared%push(id)
780 end do
781 end do
782 else
783 do l = 2, lz - 1
784 do k = 2, ly - 1
785 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, l, i), max_id)
786 call local_face_dof%push(id)
787 id = linear_index(1, k, l, i, lx, ly, lz)
788 call face_dof_local%push(id)
789 end do
790 end do
791 end if
792 end if
793
794 if (msh%facet_neigh(2, i) .ne. 0) then
795 if (dofmap%shared_dof(lx, 2, 2, i)) then
796 do l = 2, lz - 1
797 do k = 2, ly - 1
798 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, l, i), max_sid)
799 call shared_face_dof%push(id)
800 id = linear_index(lx, k, l, i, lx, ly, lz)
801 call face_dof_shared%push(id)
802 end do
803 end do
804 else
805 do l = 2, lz - 1
806 do k = 2, ly - 1
807 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, l, i), max_id)
808 call local_face_dof%push(id)
809 id = linear_index(lx, k, l, i, lx, ly, lz)
810 call face_dof_local%push(id)
811 end do
812 end do
813 end if
814 end if
815
816 ! Facets in y-direction (r, t)-plane
817 if (msh%facet_neigh(3, i) .ne. 0) then
818 if (dofmap%shared_dof(2, 1, 2, i)) then
819 do l = 2, lz - 1
820 do j = 2, lx - 1
821 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, l, i), max_sid)
822 call shared_face_dof%push(id)
823 id = linear_index(j, 1, l, i, lx, ly, lz)
824 call face_dof_shared%push(id)
825 end do
826 end do
827 else
828 do l = 2, lz - 1
829 do j = 2, lx - 1
830 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, l, i), max_id)
831 call local_face_dof%push(id)
832 id = linear_index(j, 1, l, i, lx, ly, lz)
833 call face_dof_local%push(id)
834 end do
835 end do
836 end if
837 end if
838
839 if (msh%facet_neigh(4, i) .ne. 0) then
840 if (dofmap%shared_dof(2, ly, 2, i)) then
841 do l = 2, lz - 1
842 do j = 2, lx - 1
843 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, l, i), max_sid)
844 call shared_face_dof%push(id)
845 id = linear_index(j, ly, l, i, lx, ly, lz)
846 call face_dof_shared%push(id)
847 end do
848 end do
849 else
850 do l = 2, lz - 1
851 do j = 2, lx - 1
852 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, l, i), max_id)
853 call local_face_dof%push(id)
854 id = linear_index(j, ly, l, i, lx, ly, lz)
855 call face_dof_local%push(id)
856 end do
857 end do
858 end if
859 end if
860
861 ! Facets in z-direction (r, s)-plane
862 if (msh%facet_neigh(5, i) .ne. 0) then
863 if (dofmap%shared_dof(2, 2, 1, i)) then
864 do k = 2, ly - 1
865 do j = 2, lx - 1
866 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, 1, i), max_sid)
867 call shared_face_dof%push(id)
868 id = linear_index(j, k, 1, i, lx, ly, lz)
869 call face_dof_shared%push(id)
870 end do
871 end do
872 else
873 do k = 2, ly - 1
874 do j = 2, lx - 1
875 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, 1, i), max_id)
876 call local_face_dof%push(id)
877 id = linear_index(j, k, 1, i, lx, ly, lz)
878 call face_dof_local%push(id)
879 end do
880 end do
881 end if
882 end if
883
884 if (msh%facet_neigh(6, i) .ne. 0) then
885 if (dofmap%shared_dof(2, 2, lz, i)) then
886 do k = 2, ly - 1
887 do j = 2, lx - 1
888 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, lz, i), max_sid)
889 call shared_face_dof%push(id)
890 id = linear_index(j, k, lz, i, lx, ly, lz)
891 call face_dof_shared%push(id)
892 end do
893 end do
894 else
895 do k = 2, ly - 1
896 do j = 2, lx - 1
897 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, lz, i), max_id)
898 call local_face_dof%push(id)
899 id = linear_index(j, k, lz, i, lx, ly, lz)
900 call face_dof_local%push(id)
901 end do
902 end do
903 end if
904 end if
905 end do
906 end if
907
908
909 call dm%free()
910
911 gs%nlocal = local_dof%size() + local_face_dof%size()
912 gs%local_facet_offset = local_dof%size() + 1
913
914 ! Finalize local dof to gather-scatter index
915 allocate(gs%local_dof_gs(gs%nlocal))
916
917 ! Add dofs on points and edges
918
919 ! We should use the %array() procedure, which works great for
920 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
921 ! certain data types
922 select type(dof_array => local_dof%data)
923 type is (integer)
924 j = local_dof%size()
925 do i = 1, j
926 gs%local_dof_gs(i) = dof_array(i)
927 end do
928 end select
929 call local_dof%free()
930
931 ! Add dofs on faces
932
933 ! We should use the %array() procedure, which works great for
934 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
935 ! certain data types
936 select type(dof_array => local_face_dof%data)
937 type is (integer)
938 do i = 1, local_face_dof%size()
939 gs%local_dof_gs(i + j) = dof_array(i)
940 end do
941 end select
942 call local_face_dof%free()
943
944 ! Finalize local gather-scatter index to dof
945 allocate(gs%local_gs_dof(gs%nlocal))
946
947 ! Add gather-scatter index on points and edges
948
949 ! We should use the %array() procedure, which works great for
950 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
951 ! certain data types
952 select type(dof_array => dof_local%data)
953 type is (integer)
954 j = dof_local%size()
955 do i = 1, j
956 gs%local_gs_dof(i) = dof_array(i)
957 end do
958 end select
959 call dof_local%free()
960
961 ! We should use the %array() procedure, which works great for
962 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
963 ! certain data types
964 select type(dof_array => face_dof_local%data)
965 type is (integer)
966 do i = 1, face_dof_local%size()
967 gs%local_gs_dof(i+j) = dof_array(i)
968 end do
969 end select
970 call face_dof_local%free()
971
972 call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, &
973 gs%nlocal, 1, gs%nlocal)
974
975 call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, &
976 gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
977
978 ! Allocate buffer for local gs-ops
979 allocate(gs%local_gs(gs%nlocal))
980
981 gs%nshared = shared_dof%size() + shared_face_dof%size()
982 gs%shared_facet_offset = shared_dof%size() + 1
983
984 ! Finalize shared dof to gather-scatter index
985 allocate(gs%shared_dof_gs(gs%nshared))
986
987 ! Add shared dofs on points and edges
988
989 ! We should use the %array() procedure, which works great for
990 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
991 ! certain data types
992 select type(dof_array => shared_dof%data)
993 type is (integer)
994 j = shared_dof%size()
995 do i = 1, j
996 gs%shared_dof_gs(i) = dof_array(i)
997 end do
998 end select
999 call shared_dof%free()
1000
1001 ! Add shared dofs on faces
1002
1003 ! We should use the %array() procedure, which works great for
1004 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1005 ! certain data types
1006 select type(dof_array => shared_face_dof%data)
1007 type is (integer)
1008 do i = 1, shared_face_dof%size()
1009 gs%shared_dof_gs(i + j) = dof_array(i)
1010 end do
1011 end select
1012 call shared_face_dof%free()
1013
1014 ! Finalize shared gather-scatter index to dof
1015 allocate(gs%shared_gs_dof(gs%nshared))
1016
1017 ! Add dofs on points and edges
1018
1019 ! We should use the %array() procedure, which works great for
1020 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1021 ! certain data types
1022 select type(dof_array => dof_shared%data)
1023 type is(integer)
1024 j = dof_shared%size()
1025 do i = 1, j
1026 gs%shared_gs_dof(i) = dof_array(i)
1027 end do
1028 end select
1029 call dof_shared%free()
1030
1031 ! We should use the %array() procedure, which works great for
1032 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1033 ! certain data types
1034 select type(dof_array => face_dof_shared%data)
1035 type is (integer)
1036 do i = 1, face_dof_shared%size()
1037 gs%shared_gs_dof(i + j) = dof_array(i)
1038 end do
1039 end select
1040 call face_dof_shared%free()
1041
1042 ! Allocate buffer for shared gs-ops
1043 allocate(gs%shared_gs(gs%nshared))
1044
1045 if (gs%nshared .gt. 0) then
1046 call gs_qsort_dofmap(gs%shared_dof_gs, gs%shared_gs_dof, &
1047 gs%nshared, 1, gs%nshared)
1048
1049 call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
1050 gs%nshared_blks, gs%nshared, gs%shared_facet_offset)
1051 end if
1052
1053 contains
1054
1056 function gs_mapping_add_dof(map_, dof, max_id) result(id)
1057 type(htable_i8_t), intent(inout) :: map_
1058 integer(kind=i8), intent(inout) :: dof
1059 integer, intent(inout) :: max_id
1060 integer :: id
1061
1062 if (map_%get(dof, id) .gt. 0) then
1063 max_id = max_id + 1
1064 call map_%set(dof, max_id)
1065 id = max_id
1066 end if
1067
1068 end function gs_mapping_add_dof
1069
1071 recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
1072 integer, intent(inout) :: n
1073 integer, dimension(n), intent(inout) :: dg
1074 integer, dimension(n), intent(inout) :: gd
1075 integer :: lo, hi
1076 integer :: tmp, i, j, pivot
1077
1078 i = lo - 1
1079 j = hi + 1
1080 pivot = dg((lo + hi) / 2)
1081 do
1082 do
1083 i = i + 1
1084 if (dg(i) .ge. pivot) exit
1085 end do
1086
1087 do
1088 j = j - 1
1089 if (dg(j) .le. pivot) exit
1090 end do
1091
1092 if (i .lt. j) then
1093 tmp = dg(i)
1094 dg(i) = dg(j)
1095 dg(j) = tmp
1096
1097 tmp = gd(i)
1098 gd(i) = gd(j)
1099 gd(j) = tmp
1100 else if (i .eq. j) then
1101 i = i + 1
1102 exit
1103 else
1104 exit
1105 end if
1106 end do
1107 if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j)
1108 if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi)
1109
1110 end subroutine gs_qsort_dofmap
1111
1113 subroutine gs_find_blks(dg, blk_len, nblks, n, m)
1114 integer, intent(in) :: n
1115 integer, intent(in) :: m
1116 integer, dimension(n), intent(inout) :: dg
1117 integer, allocatable, intent(inout) :: blk_len(:)
1118 integer, intent(inout) :: nblks
1119 integer :: i, j
1120 integer :: id, count
1121 type(stack_i4_t), target :: blks
1122
1123 call blks%init()
1124 i = 1
1125 do while( i .lt. m)
1126 id = dg(i)
1127 count = 1
1128 j = i
1129 do while ( j+1 .le. n .and. dg(j+1) .eq. id)
1130 j = j + 1
1131 count = count + 1
1132 end do
1133 call blks%push(count)
1134 i = j + 1
1135 end do
1136
1137 select type(blk_array => blks%data)
1138 type is(integer)
1139 nblks = blks%size()
1140 allocate(blk_len(nblks))
1141 do i = 1, nblks
1142 blk_len(i) = blk_array(i)
1143 end do
1144 end select
1145 call blks%free()
1146
1147 end subroutine gs_find_blks
1148
1149 end subroutine gs_init_mapping
1150
1152 subroutine gs_schedule(gs)
1153 type(gs_t), target, intent(inout) :: gs
1154 integer(kind=i8), allocatable :: send_buf(:), recv_buf(:)
1155 integer(kind=i2), allocatable :: shared_flg(:), recv_flg(:)
1156 type(htable_iter_i8_t) :: it
1157 type(stack_i4_t) :: send_pe, recv_pe
1158 type(mpi_status) :: status
1159 type(mpi_request) :: send_req, recv_req
1160 integer :: i, j, max_recv, src, dst, ierr, n_recv
1161 integer :: tmp, shared_gs_id
1162 integer :: nshared_unique
1163
1164 nshared_unique = gs%shared_dofs%num_entries()
1165
1166 call it%init(gs%shared_dofs)
1167 allocate(send_buf(nshared_unique))
1168 i = 1
1169 do while(it%next())
1170 send_buf(i) = it%key()
1171 i = i + 1
1172 end do
1173
1174 call send_pe%init()
1175 call recv_pe%init()
1176
1177
1178 !
1179 ! Schedule exchange of shared dofs
1180 !
1181
1182 call mpi_allreduce(nshared_unique, max_recv, 1, &
1183 mpi_integer, mpi_max, neko_comm, ierr)
1184
1185 allocate(recv_buf(max_recv))
1186 allocate(shared_flg(max_recv))
1187 allocate(recv_flg(max_recv))
1188
1190 do i = 1, size(gs%dofmap%msh%neigh_order)
1191 src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size)
1192 dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size)
1193
1194 if (gs%dofmap%msh%neigh(src)) then
1195 call mpi_irecv(recv_buf, max_recv, mpi_integer8, &
1196 src, 0, neko_comm, recv_req, ierr)
1197 end if
1198
1199 if (gs%dofmap%msh%neigh(dst)) then
1200 call mpi_isend(send_buf, nshared_unique, mpi_integer8, &
1201 dst, 0, neko_comm, send_req, ierr)
1202 end if
1203
1204 if (gs%dofmap%msh%neigh(src)) then
1205 call mpi_wait(recv_req, status, ierr)
1206 call mpi_get_count(status, mpi_integer8, n_recv, ierr)
1207
1208 do j = 1, n_recv
1209 shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
1210 if (shared_flg(j) .eq. 0) then
1212 call gs%comm%recv_dof(src)%push(shared_gs_id)
1213 end if
1214 end do
1215
1216 if (gs%comm%recv_dof(src)%size() .gt. 0) then
1217 call recv_pe%push(src)
1218 end if
1219 end if
1220
1221 if (gs%dofmap%msh%neigh(dst)) then
1222 call mpi_wait(send_req, mpi_status_ignore, ierr)
1223 call mpi_irecv(recv_flg, max_recv, mpi_integer2, &
1224 dst, 0, neko_comm, recv_req, ierr)
1225 end if
1226
1227 if (gs%dofmap%msh%neigh(src)) then
1228 call mpi_isend(shared_flg, n_recv, mpi_integer2, &
1229 src, 0, neko_comm, send_req, ierr)
1230 end if
1231
1232 if (gs%dofmap%msh%neigh(dst)) then
1233 call mpi_wait(recv_req, status, ierr)
1234 call mpi_get_count(status, mpi_integer2, n_recv, ierr)
1235
1236 do j = 1, n_recv
1237 if (recv_flg(j) .eq. 0) then
1238 tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
1240 call gs%comm%send_dof(dst)%push(shared_gs_id)
1241 end if
1242 end do
1243
1244 if (gs%comm%send_dof(dst)%size() .gt. 0) then
1245 call send_pe%push(dst)
1246 end if
1247 end if
1248
1249 if (gs%dofmap%msh%neigh(src)) then
1250 call mpi_wait(send_req, mpi_status_ignore, ierr)
1251 end if
1252
1253 end do
1254
1255 call gs%comm%init(send_pe, recv_pe)
1256
1257 call send_pe%free()
1258 call recv_pe%free()
1259
1260 deallocate(send_buf)
1261 deallocate(recv_flg)
1262 deallocate(shared_flg)
1263 !This arrays seems to take massive amounts of memory...
1264 call gs%shared_dofs%free()
1265
1266 end subroutine gs_schedule
1267
1269 subroutine gs_op_fld(gs, u, op, event)
1270 class(gs_t), intent(inout) :: gs
1271 type(field_t), intent(inout) :: u
1272 type(c_ptr), optional, intent(inout) :: event
1273 integer :: n, op
1274
1275 n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
1276 if (present(event)) then
1277 call gs_op_vector(gs, u%x, n, op, event)
1278 else
1279 call gs_op_vector(gs, u%x, n, op)
1280 end if
1281
1282 end subroutine gs_op_fld
1283
1285 subroutine gs_op_r4(gs, u, n, op, event)
1286 class(gs_t), intent(inout) :: gs
1287 integer, intent(in) :: n
1288 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u
1289 type(c_ptr), optional, intent(inout) :: event
1290 integer :: op
1291
1292 if (present(event)) then
1293 call gs_op_vector(gs, u, n, op, event)
1294 else
1295 call gs_op_vector(gs, u, n, op)
1296 end if
1297
1298 end subroutine gs_op_r4
1299
1301 subroutine gs_op_vector(gs, u, n, op, event)
1302 class(gs_t), intent(inout) :: gs
1303 integer, intent(in) :: n
1304 real(kind=rp), dimension(n), intent(inout) :: u
1305 type(c_ptr), optional, intent(inout) :: event
1306 integer :: m, l, op, lo, so
1307
1308 lo = gs%local_facet_offset
1309 so = -gs%shared_facet_offset
1310 m = gs%nlocal
1311 l = gs%nshared
1312
1313 call profiler_start_region("gather_scatter", 5)
1314 ! Gather shared dofs
1315 if (pe_size .gt. 1) then
1316 call profiler_start_region("gs_nbrecv", 13)
1317 call gs%comm%nbrecv()
1318 call profiler_end_region("gs_nbrecv", 13)
1319 call profiler_start_region("gs_gather_shared", 14)
1320 call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
1321 gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.)
1322 call profiler_end_region("gs_gather_shared", 14)
1323 call profiler_start_region("gs_nbsend", 6)
1324 call gs%comm%nbsend(gs%shared_gs, l, &
1325 gs%bcknd%gather_event, gs%bcknd%gs_stream)
1326 call profiler_end_region("gs_nbsend", 6)
1327
1328 end if
1329
1330 ! Gather-scatter local dofs
1331 call profiler_start_region("gs_local", 12)
1332 call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
1333 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.)
1334 call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
1335 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, .false., c_null_ptr)
1336 call profiler_end_region("gs_local", 12)
1337 ! Scatter shared dofs
1338 if (pe_size .gt. 1) then
1339 call profiler_start_region("gs_nbwait", 7)
1340 call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
1341 call profiler_end_region("gs_nbwait", 7)
1342 call profiler_start_region("gs_scatter_shared", 15)
1343 if (present(event)) then
1344 call gs%bcknd%scatter(gs%shared_gs, l,&
1345 gs%shared_dof_gs, u, n, &
1346 gs%shared_gs_dof, gs%nshared_blks, &
1347 gs%shared_blk_len, .true., event)
1348 else
1349 call gs%bcknd%scatter(gs%shared_gs, l,&
1350 gs%shared_dof_gs, u, n, &
1351 gs%shared_gs_dof, gs%nshared_blks, &
1352 gs%shared_blk_len, .true., c_null_ptr)
1353 end if
1354 call profiler_end_region("gs_scatter_shared", 15)
1355 end if
1356
1357 call profiler_end_region("gather_scatter", 5)
1358
1359 end subroutine gs_op_vector
1360
1361end module gather_scatter
subroutine gs_find_blks(dg, blk_len, nblks, n, m)
Find blocks sharing dofs in non-facet data.
recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
Sort the dof lists based on the dof to gather-scatter list.
integer function gs_mapping_add_dof(map_, dof, max_id)
Register a unique dof.
Deassociate a Fortran array from a device pointer.
Definition device.F90:75
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Synchronize a device or stream.
Definition device.F90:87
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
integer pe_size
MPI size of communicator.
Definition comm.F90:31
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Defines a field.
Definition field.f90:34
Gather-scatter.
subroutine gs_init_mapping(gs)
Setup mapping of dofs to gather-scatter operations.
subroutine gs_schedule(gs)
Schedule shared gather-scatter operations.
subroutine gs_free(gs)
Deallocate a gather-scatter kernel.
subroutine gs_op_r4(gs, u, n, op, event)
Gather-scatter operation on a rank 4 array.
subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
Initialize a gather-scatter kernel.
subroutine gs_op_vector(gs, u, n, op, event)
Gather-scatter operation on a vector u with op op.
subroutine gs_op_fld(gs, u, op, event)
Gather-scatter operation on a field u with op op.
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
integer, parameter, public gs_bcknd_cpu
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_sx
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_dev
Definition gs_bcknd.f90:40
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
Defines GPU aware MPI gather-scatter communication.
Generic Gather-scatter backend for accelerators.
Definition gs_device.F90:34
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
Generic Gather-scatter backend for NEC Vector Engines.
Definition gs_sx.f90:34
Implements a hash table ADT.
Definition htable.f90:36
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_cuda
integer, parameter, public i2
Definition num_types.f90:5
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:174
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter communication method.
Definition gs_comm.f90:45
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
Gather-scatter backend for offloading devices.
Definition gs_device.F90:47
Gather-scatter communication using device MPI. The arrays are indexed per PE like send_pe and @ recv_...
Gather-scatter communication using MPI.
Definition gs_mpi.f90:54
Gather-scatter backend for NEC SX-Aurora.
Definition gs_sx.f90:43
Integer*8 based hash table.
Definition htable.f90:92
Iterator for an integer*8 based hash table.
Definition htable.f90:175
Integer based stack.
Definition stack.f90:63