Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gather_scatter.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
38 use gs_device, only : gs_device_t
39 use gs_sx, only : gs_sx_t
40 use gs_cpu, only : gs_cpu_t
44 use gs_mpi, only : gs_mpi_t
48 use mesh, only : mesh_t
49 use comm, only : pe_rank, pe_size, neko_comm
50 use mpi_f08, only : mpi_reduce, mpi_allreduce, mpi_barrier, mpi_in_place, &
51 mpi_wait, mpi_irecv, mpi_isend, mpi_wtime, mpi_sum, mpi_max, &
52 mpi_integer, mpi_integer2, mpi_integer8, mpi_request, mpi_status, &
53 mpi_status_ignore, mpi_get_count
54 use dofmap, only : dofmap_t
55 use field, only : field_t
56 use num_types, only : rp, dp, i2, i8
58 use stack, only : stack_i4_t
59 use utils, only : neko_error, linear_index
60 use logger, only : neko_log, log_size
64 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr
65 implicit none
66 private
67
68 type, public :: gs_t
69 real(kind=rp), allocatable :: local_gs(:)
70 integer, allocatable :: local_dof_gs(:)
71 integer, allocatable :: local_gs_dof(:)
72 integer, allocatable :: local_blk_len(:)
73 integer, allocatable :: local_blk_off(:)
74 real(kind=rp), allocatable :: shared_gs(:)
75 integer, allocatable :: shared_dof_gs(:)
76 integer, allocatable :: shared_gs_dof(:)
77 integer, allocatable :: shared_blk_len(:)
78 integer, allocatable :: shared_blk_off(:)
79 type(dofmap_t), pointer ::dofmap
80 type(htable_i8_t) :: shared_dofs
81 integer :: nlocal
82 integer :: nshared
83 integer :: nlocal_blks
84 integer :: nshared_blks
85 integer :: local_facet_offset
86 integer :: shared_facet_offset
87 class(gs_bcknd_t), allocatable :: bcknd
88 class(gs_comm_t), allocatable :: comm
89 contains
90 procedure, private, pass(gs) :: gs_op_fld
91 procedure, private, pass(gs) :: gs_op_r4
92 procedure, pass(gs) :: gs_op_vector
93 procedure, pass(gs) :: init => gs_init
94 procedure, pass(gs) :: free => gs_free
95 generic :: op => gs_op_fld, gs_op_r4, gs_op_vector
96 end type gs_t
97
98 ! Expose available gather-scatter operation
100
101 ! Expose available gather-scatter backends
103
104 ! Expose available gather-scatter comm. backends
106
107
108contains
109
114 subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
115 class(gs_t), intent(inout) :: gs
116 type(dofmap_t), target, intent(inout) :: dofmap
117 character(len=LOG_SIZE) :: log_buf
118 character(len=20) :: bcknd_str
119 integer, optional :: bcknd, comm_bcknd
120 integer :: i, j, ierr, bcknd_, comm_bcknd_
121 integer(i8) :: glb_nshared, glb_nlocal
122 logical :: use_device_mpi, use_device_nccl, use_device_shmem, use_host_mpi
123 real(kind=rp), allocatable :: tmp(:)
124 type(c_ptr) :: tmp_d = c_null_ptr
125 integer :: strtgy(4) = [int(b'00'), int(b'01'), int(b'10'), int(b'11')]
126 integer :: avg_strtgy, env_len
127 character(len=255) :: env_strtgy, env_gscomm
128 real(kind=dp) :: strtgy_time(4)
129
130 call gs%free()
131
132 call neko_log%section('Gather-Scatter')
133 ! Currently this uses the dofmap which also contains geometric information
134 ! Only connectivity/numbering of points is technically necessary for gs
135 gs%dofmap => dofmap
136
137 use_device_mpi = .false.
138 use_device_nccl = .false.
139 use_device_shmem = .false.
140 use_host_mpi = .false.
141 ! Check if a comm-backend is requested via env. variables
142 call get_environment_variable("NEKO_GS_COMM", env_gscomm, env_len)
143 if (env_len .gt. 0) then
144 if (env_gscomm(1:env_len) .eq. "MPI") then
145 use_host_mpi = .true.
146 else if (env_gscomm(1:env_len) .eq. "MPIGPU") then
147 use_device_mpi = .true.
148 else if (env_gscomm(1:env_len) .eq. "NCCL") then
149 use_device_nccl = .true.
150 else if (env_gscomm(1:env_len) .eq. "SHMEM") then
151 use_device_shmem = .true.
152 else
153 call neko_error('Unknown Gather-scatter comm. backend')
154 end if
155 end if
156
157
158 if (present(comm_bcknd)) then
159 comm_bcknd_ = comm_bcknd
160 else if (use_host_mpi) then
161 comm_bcknd_ = gs_comm_mpi
162 else if (use_device_mpi) then
163 comm_bcknd_ = gs_comm_mpigpu
164 else if (use_device_nccl) then
165 comm_bcknd_ = gs_comm_nccl
166 else if (use_device_shmem) then
167 comm_bcknd_ = gs_comm_nvshmem
168 else
169 if (neko_device_mpi) then
170 comm_bcknd_ = gs_comm_mpigpu
171 use_device_mpi = .true.
172 else
173 comm_bcknd_ = gs_comm_mpi
174 end if
175 end if
176
177 select case (comm_bcknd_)
178 case (gs_comm_mpi)
179 call neko_log%message('Comm : MPI')
180 allocate(gs_mpi_t::gs%comm)
181 case (gs_comm_mpigpu)
182 call neko_log%message('Comm : Device MPI')
183 allocate(gs_device_mpi_t::gs%comm)
184 case (gs_comm_nccl)
185 call neko_log%message('Comm : NCCL')
186 allocate(gs_device_nccl_t::gs%comm)
187 case (gs_comm_nvshmem)
188 call neko_log%message('Comm : NVSHMEM')
189 allocate(gs_device_shmem_t::gs%comm)
190 case default
191 call neko_error('Unknown Gather-scatter comm. backend')
192 end select
193 ! Initialize a stack for each rank containing which dofs to send/recv at
194 ! that rank
195 call gs%comm%init_dofs()
196 ! Initialize mapping between local ids and gather-scatter ids
197 ! based on the global numbering in dofmap
198 call gs_init_mapping(gs)
199 ! Setup buffers and which ranks to send/recv data from based on mapping
200 ! and initializes gs%comm (sets up gs%comm%send_dof and gs%comm%recv_dof and
201 ! recv_pe/send_pe)
202 call gs_schedule(gs)
203 ! Global number of points not needing to be sent over mpi for gs operations
204 ! "Internal points"
205 glb_nlocal = int(gs%nlocal, i8)
206 ! Global number of points needing to be communicated with other pes/ranks
207 ! "external points"
208 glb_nshared = int(gs%nshared, i8)
209 ! Can be thought of a measure of the volume of this rank (glb_nlocal) and
210 ! the surface area (glb_nshared) that is shared with other ranks
211 ! Lots of internal volume compared to surface that needs communication is
212 ! good
213
214 if (pe_rank .eq. 0) then
215 call mpi_reduce(mpi_in_place, glb_nlocal, 1, &
216 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
217
218 call mpi_reduce(mpi_in_place, glb_nshared, 1, &
219 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
220 else
221 call mpi_reduce(glb_nlocal, glb_nlocal, 1, &
222 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
223
224 call mpi_reduce(glb_nshared, glb_nshared, 1, &
225 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
226 end if
227
228 write(log_buf, '(A,I12)') 'Avg. internal: ', glb_nlocal/pe_size
229 call neko_log%message(log_buf)
230 write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size
231 call neko_log%message(log_buf)
232
233 if (present(bcknd)) then
234 bcknd_ = bcknd
235 else
236 if (neko_bcknd_sx .eq. 1) then
237 bcknd_ = gs_bcknd_sx
238 else if (neko_bcknd_device .eq. 1) then
239 bcknd_ = gs_bcknd_dev
240 else
241 bcknd_ = gs_bcknd_cpu
242 end if
243 end if
244
245 ! Setup Gather-scatter backend
246 select case (bcknd_)
247 case (gs_bcknd_cpu)
248 allocate(gs_cpu_t::gs%bcknd)
249 bcknd_str = ' std'
250 case (gs_bcknd_dev)
251 allocate(gs_device_t::gs%bcknd)
252 if (neko_bcknd_hip .eq. 1) then
253 bcknd_str = ' hip'
254 else if (neko_bcknd_cuda .eq. 1) then
255 bcknd_str = ' cuda'
256 else if (neko_bcknd_opencl .eq. 1) then
257 bcknd_str = ' opencl'
258 end if
259 case (gs_bcknd_sx)
260 allocate(gs_sx_t::gs%bcknd)
261 bcknd_str = ' sx'
262 case default
263 call neko_error('Unknown Gather-scatter backend')
264 end select
265
266 write(log_buf, '(A)') 'Backend : ' // trim(bcknd_str)
267 call neko_log%message(log_buf)
268
269
270
271 call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
272
273 if (use_device_mpi .or. use_device_nccl .or. use_device_shmem) then
274 select type (b => gs%bcknd)
275 type is (gs_device_t)
276 b%shared_on_host = .false.
277 end select
278 end if
279
280 if (use_device_mpi) then
281 if (pe_size .gt. 1) then
282 ! Select fastest device MPI strategy at runtime
283 select type (c => gs%comm)
284 type is (gs_device_mpi_t)
285 call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, &
286 env_len)
287 if (env_len .eq. 0) then
288 allocate(tmp(dofmap%size()))
289 call device_map(tmp, tmp_d, dofmap%size())
290 tmp = 1.0_rp
291 call device_memcpy(tmp, tmp_d, dofmap%size(), &
292 host_to_device, sync = .false.)
293 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
294
295 do i = 1, size(strtgy)
296 c%nb_strtgy = strtgy(i)
297 call device_sync
298 call mpi_barrier(neko_comm)
299 strtgy_time(i) = mpi_wtime()
300 do j = 1, 100
301 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
302 end do
303 strtgy_time(i) = (mpi_wtime() - strtgy_time(i)) / 100d0
304 end do
305
306 call device_deassociate(tmp)
307 call device_free(tmp_d)
308 deallocate(tmp)
309
310 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
311
312 avg_strtgy = minloc(strtgy_time, 1)
313 call mpi_allreduce(mpi_in_place, avg_strtgy, 1, &
314 mpi_integer, mpi_sum, neko_comm)
315 avg_strtgy = avg_strtgy / pe_size
316
317 write(log_buf, '(A,B0.2,A)') 'Avg. strtgy : [', &
318 strtgy(avg_strtgy), ']'
319
320 else
321 read(env_strtgy(1:env_len), *) i
322
323 if (i .lt. 1 .or. i .gt. 4) then
324 call neko_error('Invalid gs sync strtgy')
325 end if
326
327 c%nb_strtgy = strtgy(i)
328 avg_strtgy = i
329
330 write(log_buf, '(A,B0.2,A)') 'Env. strtgy : [', &
331 strtgy(avg_strtgy), ']'
332 end if
333
334 call neko_log%message(log_buf)
335
336 end select
337 end if
338 end if
339
340 call neko_log%end_section()
341
342 end subroutine gs_init
343
345 subroutine gs_free(gs)
346 class(gs_t), intent(inout) :: gs
347
348 nullify(gs%dofmap)
349
350 if (allocated(gs%local_gs)) then
351 deallocate(gs%local_gs)
352 end if
353
354 if (allocated(gs%local_dof_gs)) then
355 deallocate(gs%local_dof_gs)
356 end if
357
358 if (allocated(gs%local_gs_dof)) then
359 deallocate(gs%local_gs_dof)
360 end if
361
362 if (allocated(gs%local_blk_len)) then
363 deallocate(gs%local_blk_len)
364 end if
365
366 if (allocated(gs%local_blk_off)) then
367 deallocate(gs%local_blk_off)
368 end if
369
370 if (allocated(gs%shared_gs)) then
371 deallocate(gs%shared_gs)
372 end if
373
374 if (allocated(gs%shared_dof_gs)) then
375 deallocate(gs%shared_dof_gs)
376 end if
377
378 if (allocated(gs%shared_gs_dof)) then
379 deallocate(gs%shared_gs_dof)
380 end if
381
382 if (allocated(gs%shared_blk_len)) then
383 deallocate(gs%shared_blk_len)
384 end if
385
386 if (allocated(gs%shared_blk_off)) then
387 deallocate(gs%shared_blk_off)
388 end if
389
390 gs%nlocal = 0
391 gs%nshared = 0
392 gs%nlocal_blks = 0
393 gs%nshared_blks = 0
394
395 call gs%shared_dofs%free()
396
397 if (allocated(gs%bcknd)) then
398 call gs%bcknd%free()
399 deallocate(gs%bcknd)
400 end if
401
402 if (allocated(gs%comm)) then
403 call gs%comm%free()
404 deallocate(gs%comm)
405 end if
406
407 end subroutine gs_free
408
410 subroutine gs_init_mapping(gs)
411 type(gs_t), target, intent(inout) :: gs
412 type(mesh_t), pointer :: msh
413 type(dofmap_t), pointer :: dofmap
414 type(stack_i4_t), target :: local_dof, dof_local, shared_dof, dof_shared
415 type(stack_i4_t), target :: local_face_dof, face_dof_local
416 type(stack_i4_t), target :: shared_face_dof, face_dof_shared
417 integer :: i, j, k, l, lx, ly, lz, max_id, max_sid, id, lid, dm_size
418 type(htable_i8_t) :: dm
419 type(htable_i8_t), pointer :: sdm
420
421 dofmap => gs%dofmap
422 msh => dofmap%msh
423 sdm => gs%shared_dofs
424
425 lx = dofmap%Xh%lx
426 ly = dofmap%Xh%ly
427 lz = dofmap%Xh%lz
428 dm_size = dofmap%size()/lx
429
430 call dm%init(dm_size, i)
434 call sdm%init(dofmap%size(), i)
435
436
437 call local_dof%init()
438 call dof_local%init()
439
440 call local_face_dof%init()
441 call face_dof_local%init()
442
443 call shared_dof%init()
444 call dof_shared%init()
445
446 call shared_face_dof%init()
447 call face_dof_shared%init()
448
449 !
450 ! Setup mapping for dofs points
451 !
452
453 max_id = 0
454 max_sid = 0
455 do i = 1, msh%nelv
456 ! Local id of vertices
457 lid = linear_index(1, 1, 1, i, lx, ly, lz)
458 ! Check if this dof is shared among ranks or not
459 if (dofmap%shared_dof(1, 1, 1, i)) then
460 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, 1, i), max_sid)
461 !If add unique gather-scatter id to shared_dof stack
462 call shared_dof%push(id)
463 !If add local id to dof_shared stack
464 call dof_shared%push(lid)
465 !Now we have the mapping of local id <-> gather scatter id!
466 else
467 ! Same here, only here we know the point is local
468 ! It will as such not need to be sent to other ranks later
469 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, 1, i), max_id)
470 call local_dof%push(id)
471 call dof_local%push(lid)
472 end if
473 ! This procedure is then repeated for all vertices and edges
474 ! Facets can be treated a little bit differently since they only have one
475 ! neighbor
476
477 lid = linear_index(lx, 1, 1, i, lx, ly, lz)
478 if (dofmap%shared_dof(lx, 1, 1, i)) then
479 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, 1, i), max_sid)
480 call shared_dof%push(id)
481 call dof_shared%push(lid)
482 else
483 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, 1, i), max_id)
484 call local_dof%push(id)
485 call dof_local%push(lid)
486 end if
487
488 lid = linear_index(1, ly, 1, i, lx, ly, lz)
489 if (dofmap%shared_dof(1, ly, 1, i)) then
490 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, 1, i), max_sid)
491 call shared_dof%push(id)
492 call dof_shared%push(lid)
493 else
494 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, 1, i), max_id)
495 call local_dof%push(id)
496 call dof_local%push(lid)
497 end if
498
499 lid = linear_index(lx, ly, 1, i, lx, ly, lz)
500 if (dofmap%shared_dof(lx, ly, 1, i)) then
501 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, 1, i), max_sid)
502 call shared_dof%push(id)
503 call dof_shared%push(lid)
504 else
505 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, 1, i), max_id)
506 call local_dof%push(id)
507 call dof_local%push(lid)
508 end if
509 if (lz .gt. 1) then
510 lid = linear_index(1, 1, lz, i, lx, ly, lz)
511 if (dofmap%shared_dof(1, 1, lz, i)) then
512 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, lz, i), max_sid)
513 call shared_dof%push(id)
514 call dof_shared%push(lid)
515 else
516 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, lz, i), max_id)
517 call local_dof%push(id)
518 call dof_local%push(lid)
519 end if
520
521 lid = linear_index(lx, 1, lz, i, lx, ly, lz)
522 if (dofmap%shared_dof(lx, 1, lz, i)) then
523 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, lz, i), max_sid)
524 call shared_dof%push(id)
525 call dof_shared%push(lid)
526 else
527 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, lz, i), max_id)
528 call local_dof%push(id)
529 call dof_local%push(lid)
530 end if
531
532 lid = linear_index(1, ly, lz, i, lx, ly, lz)
533 if (dofmap%shared_dof(1, ly, lz, i)) then
534 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, lz, i), max_sid)
535 call shared_dof%push(id)
536 call dof_shared%push(lid)
537 else
538 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, lz, i), max_id)
539 call local_dof%push(id)
540 call dof_local%push(lid)
541 end if
542
543 lid = linear_index(lx, ly, lz, i, lx, ly, lz)
544 if (dofmap%shared_dof(lx, ly, lz, i)) then
545 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, lz, i), max_sid)
546 call shared_dof%push(id)
547 call dof_shared%push(lid)
548 else
549 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, lz, i), max_id)
550 call local_dof%push(id)
551 call dof_local%push(lid)
552 end if
553 end if
554 end do
555
556 ! Clear local dofmap table
557 call dm%clear()
558 ! Get gather scatter ids and local ids of edges
559 if (lz .gt. 1) then
560 !
561 ! Setup mapping for dofs on edges
562 !
563 do i = 1, msh%nelv
564
565 !
566 ! dofs on edges in x-direction
567 !
568 if (dofmap%shared_dof(2, 1, 1, i)) then
569 do j = 2, lx - 1
570 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
571 call shared_dof%push(id)
572 id = linear_index(j, 1, 1, i, lx, ly, lz)
573 call dof_shared%push(id)
574 end do
575 else
576 do j = 2, lx - 1
577 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
578 call local_dof%push(id)
579 id = linear_index(j, 1, 1, i, lx, ly, lz)
580 call dof_local%push(id)
581 end do
582 end if
583 if (dofmap%shared_dof(2, 1, lz, i)) then
584 do j = 2, lx - 1
585 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, lz, i), max_sid)
586 call shared_dof%push(id)
587 id = linear_index(j, 1, lz, i, lx, ly, lz)
588 call dof_shared%push(id)
589 end do
590 else
591 do j = 2, lx - 1
592 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, lz, i), max_id)
593 call local_dof%push(id)
594 id = linear_index(j, 1, lz, i, lx, ly, lz)
595 call dof_local%push(id)
596 end do
597 end if
598
599 if (dofmap%shared_dof(2, ly, 1, i)) then
600 do j = 2, lx - 1
601 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
602 call shared_dof%push(id)
603 id = linear_index(j, ly, 1, i, lx, ly, lz)
604 call dof_shared%push(id)
605 end do
606
607 else
608 do j = 2, lx - 1
609 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
610 call local_dof%push(id)
611 id = linear_index(j, ly, 1, i, lx, ly, lz)
612 call dof_local%push(id)
613 end do
614 end if
615 if (dofmap%shared_dof(2, ly, lz, i)) then
616 do j = 2, lx - 1
617 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, lz, i), max_sid)
618 call shared_dof%push(id)
619 id = linear_index(j, ly, lz, i, lx, ly, lz)
620 call dof_shared%push(id)
621 end do
622 else
623 do j = 2, lx - 1
624 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, lz, i), max_id)
625 call local_dof%push(id)
626 id = linear_index(j, ly, lz, i, lx, ly, lz)
627 call dof_local%push(id)
628 end do
629 end if
630
631 !
632 ! dofs on edges in y-direction
633 !
634 if (dofmap%shared_dof(1, 2, 1, i)) then
635 do k = 2, ly - 1
636 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
637 call shared_dof%push(id)
638 id = linear_index(1, k, 1, i, lx, ly, lz)
639 call dof_shared%push(id)
640 end do
641 else
642 do k = 2, ly - 1
643 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
644 call local_dof%push(id)
645 id = linear_index(1, k, 1, i, lx, ly, lz)
646 call dof_local%push(id)
647 end do
648 end if
649 if (dofmap%shared_dof(1, 2, lz, i)) then
650 do k = 2, ly - 1
651 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, lz, i), max_sid)
652 call shared_dof%push(id)
653 id = linear_index(1, k, lz, i, lx, ly, lz)
654 call dof_shared%push(id)
655 end do
656 else
657 do k = 2, ly - 1
658 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, lz, i), max_id)
659 call local_dof%push(id)
660 id = linear_index(1, k, lz, i, lx, ly, lz)
661 call dof_local%push(id)
662 end do
663 end if
664
665 if (dofmap%shared_dof(lx, 2, 1, i)) then
666 do k = 2, ly - 1
667 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
668 call shared_dof%push(id)
669 id = linear_index(lx, k, 1, i, lx, ly, lz)
670 call dof_shared%push(id)
671 end do
672 else
673 do k = 2, ly - 1
674 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
675 call local_dof%push(id)
676 id = linear_index(lx, k, 1, i, lx, ly, lz)
677 call dof_local%push(id)
678 end do
679 end if
680 if (dofmap%shared_dof(lx, 2, lz, i)) then
681 do k = 2, ly - 1
682 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, lz, i), max_sid)
683 call shared_dof%push(id)
684 id = linear_index(lx, k, lz, i, lx, ly, lz)
685 call dof_shared%push(id)
686 end do
687 else
688 do k = 2, ly - 1
689 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, lz, i), max_id)
690 call local_dof%push(id)
691 id = linear_index(lx, k, lz, i, lx, ly, lz)
692 call dof_local%push(id)
693 end do
694 end if
695 !
696 ! dofs on edges in z-direction
697 !
698 if (dofmap%shared_dof(1, 1, 2, i)) then
699 do l = 2, lz - 1
700 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, l, i), max_sid)
701 call shared_dof%push(id)
702 id = linear_index(1, 1, l, i, lx, ly, lz)
703 call dof_shared%push(id)
704 end do
705 else
706 do l = 2, lz - 1
707 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id)
708 call local_dof%push(id)
709 id = linear_index(1, 1, l, i, lx, ly, lz)
710 call dof_local%push(id)
711 end do
712 end if
713
714 if (dofmap%shared_dof(lx, 1, 2, i)) then
715 do l = 2, lz - 1
716 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid)
717 call shared_dof%push(id)
718 id = linear_index(lx, 1, l, i, lx, ly, lz)
719 call dof_shared%push(id)
720 end do
721 else
722 do l = 2, lz - 1
723 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, l, i), max_id)
724 call local_dof%push(id)
725 id = linear_index(lx, 1, l, i, lx, ly, lz)
726 call dof_local%push(id)
727 end do
728 end if
729
730 if (dofmap%shared_dof(1, ly, 2, i)) then
731 do l = 2, lz - 1
732 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, l, i), max_sid)
733 call shared_dof%push(id)
734 id = linear_index(1, ly, l, i, lx, ly, lz)
735 call dof_shared%push(id)
736 end do
737 else
738 do l = 2, lz - 1
739 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, l, i), max_id)
740 call local_dof%push(id)
741 id = linear_index(1, ly, l, i, lx, ly, lz)
742 call dof_local%push(id)
743 end do
744 end if
745
746 if (dofmap%shared_dof(lx, ly, 2, i)) then
747 do l = 2, lz - 1
748 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid)
749 call shared_dof%push(id)
750 id = linear_index(lx, ly, l, i, lx, ly, lz)
751 call dof_shared%push(id)
752 end do
753 else
754 do l = 2, lz - 1
755 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id)
756 call local_dof%push(id)
757 id = linear_index(lx, ly, l, i, lx, ly, lz)
758 call dof_local%push(id)
759 end do
760 end if
761 end do
762 end if
763
764 ! Clear local dofmap table
765 call dm%clear()
766
767 !
768 ! Setup mapping for dofs on facets
769 !
770 ! This is for 2d
771 if (lz .eq. 1) then
772 do i = 1, msh%nelv
773
774 !
775 ! dofs on edges in x-direction
776 !
777 if (msh%facet_neigh(3, i) .ne. 0) then
778 if (dofmap%shared_dof(2, 1, 1, i)) then
779 do j = 2, lx - 1
780 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
781 call shared_face_dof%push(id)
782 id = linear_index(j, 1, 1, i, lx, ly, lz)
783 call face_dof_shared%push(id)
784 end do
785 else
786 do j = 2, lx - 1
787 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
788 call local_face_dof%push(id)
789 id = linear_index(j, 1, 1, i, lx, ly, lz)
790 call face_dof_local%push(id)
791 end do
792 end if
793 end if
794
795 if (msh%facet_neigh(4, i) .ne. 0) then
796 if (dofmap%shared_dof(2, ly, 1, i)) then
797 do j = 2, lx - 1
798 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), &
799 max_sid)
800 call shared_face_dof%push(id)
801 id = linear_index(j, ly, 1, i, lx, ly, lz)
802 call face_dof_shared%push(id)
803 end do
804
805 else
806 do j = 2, lx - 1
807 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), &
808 max_id)
809 call local_face_dof%push(id)
810 id = linear_index(j, ly, 1, i, lx, ly, lz)
811 call face_dof_local%push(id)
812 end do
813 end if
814 end if
815
816 !
817 ! dofs on edges in y-direction
818 !
819 if (msh%facet_neigh(1, i) .ne. 0) then
820 if (dofmap%shared_dof(1, 2, 1, i)) then
821 do k = 2, ly - 1
822 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
823 call shared_face_dof%push(id)
824 id = linear_index(1, k, 1, i, lx, ly, lz)
825 call face_dof_shared%push(id)
826 end do
827 else
828 do k = 2, ly - 1
829 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
830 call local_face_dof%push(id)
831 id = linear_index(1, k, 1, i, lx, ly, lz)
832 call face_dof_local%push(id)
833 end do
834 end if
835 end if
836
837 if (msh%facet_neigh(2, i) .ne. 0) then
838 if (dofmap%shared_dof(lx, 2, 1, i)) then
839 do k = 2, ly - 1
840 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), &
841 max_sid)
842 call shared_face_dof%push(id)
843 id = linear_index(lx, k, 1, i, lx, ly, lz)
844 call face_dof_shared%push(id)
845 end do
846 else
847 do k = 2, ly - 1
848 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), &
849 max_id)
850 call local_face_dof%push(id)
851 id = linear_index(lx, k, 1, i, lx, ly, lz)
852 call face_dof_local%push(id)
853 end do
854 end if
855 end if
856 end do
857 else
858 do i = 1, msh%nelv
859
860 ! Facets in x-direction (s, t)-plane
861 if (msh%facet_neigh(1, i) .ne. 0) then
862 if (dofmap%shared_dof(1, 2, 2, i)) then
863 do l = 2, lz - 1
864 do k = 2, ly - 1
865 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, l, i), &
866 max_sid)
867 call shared_face_dof%push(id)
868 id = linear_index(1, k, l, i, lx, ly, lz)
869 call face_dof_shared%push(id)
870 end do
871 end do
872 else
873 do l = 2, lz - 1
874 do k = 2, ly - 1
875 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, l, i), &
876 max_id)
877 call local_face_dof%push(id)
878 id = linear_index(1, k, l, i, lx, ly, lz)
879 call face_dof_local%push(id)
880 end do
881 end do
882 end if
883 end if
884
885 if (msh%facet_neigh(2, i) .ne. 0) then
886 if (dofmap%shared_dof(lx, 2, 2, i)) then
887 do l = 2, lz - 1
888 do k = 2, ly - 1
889 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, l, i), &
890 max_sid)
891 call shared_face_dof%push(id)
892 id = linear_index(lx, k, l, i, lx, ly, lz)
893 call face_dof_shared%push(id)
894 end do
895 end do
896 else
897 do l = 2, lz - 1
898 do k = 2, ly - 1
899 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, l, i), &
900 max_id)
901 call local_face_dof%push(id)
902 id = linear_index(lx, k, l, i, lx, ly, lz)
903 call face_dof_local%push(id)
904 end do
905 end do
906 end if
907 end if
908
909 ! Facets in y-direction (r, t)-plane
910 if (msh%facet_neigh(3, i) .ne. 0) then
911 if (dofmap%shared_dof(2, 1, 2, i)) then
912 do l = 2, lz - 1
913 do j = 2, lx - 1
914 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, l, i), &
915 max_sid)
916 call shared_face_dof%push(id)
917 id = linear_index(j, 1, l, i, lx, ly, lz)
918 call face_dof_shared%push(id)
919 end do
920 end do
921 else
922 do l = 2, lz - 1
923 do j = 2, lx - 1
924 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, l, i), &
925 max_id)
926 call local_face_dof%push(id)
927 id = linear_index(j, 1, l, i, lx, ly, lz)
928 call face_dof_local%push(id)
929 end do
930 end do
931 end if
932 end if
933
934 if (msh%facet_neigh(4, i) .ne. 0) then
935 if (dofmap%shared_dof(2, ly, 2, i)) then
936 do l = 2, lz - 1
937 do j = 2, lx - 1
938 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, l, i), &
939 max_sid)
940 call shared_face_dof%push(id)
941 id = linear_index(j, ly, l, i, lx, ly, lz)
942 call face_dof_shared%push(id)
943 end do
944 end do
945 else
946 do l = 2, lz - 1
947 do j = 2, lx - 1
948 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, l, i), &
949 max_id)
950 call local_face_dof%push(id)
951 id = linear_index(j, ly, l, i, lx, ly, lz)
952 call face_dof_local%push(id)
953 end do
954 end do
955 end if
956 end if
957
958 ! Facets in z-direction (r, s)-plane
959 if (msh%facet_neigh(5, i) .ne. 0) then
960 if (dofmap%shared_dof(2, 2, 1, i)) then
961 do k = 2, ly - 1
962 do j = 2, lx - 1
963 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, 1, i), &
964 max_sid)
965 call shared_face_dof%push(id)
966 id = linear_index(j, k, 1, i, lx, ly, lz)
967 call face_dof_shared%push(id)
968 end do
969 end do
970 else
971 do k = 2, ly - 1
972 do j = 2, lx - 1
973 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, 1, i), &
974 max_id)
975 call local_face_dof%push(id)
976 id = linear_index(j, k, 1, i, lx, ly, lz)
977 call face_dof_local%push(id)
978 end do
979 end do
980 end if
981 end if
982
983 if (msh%facet_neigh(6, i) .ne. 0) then
984 if (dofmap%shared_dof(2, 2, lz, i)) then
985 do k = 2, ly - 1
986 do j = 2, lx - 1
987 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, lz, i), &
988 max_sid)
989 call shared_face_dof%push(id)
990 id = linear_index(j, k, lz, i, lx, ly, lz)
991 call face_dof_shared%push(id)
992 end do
993 end do
994 else
995 do k = 2, ly - 1
996 do j = 2, lx - 1
997 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, lz, i), &
998 max_id)
999 call local_face_dof%push(id)
1000 id = linear_index(j, k, lz, i, lx, ly, lz)
1001 call face_dof_local%push(id)
1002 end do
1003 end do
1004 end if
1005 end if
1006 end do
1007 end if
1008
1009
1010 call dm%free()
1011
1012 gs%nlocal = local_dof%size() + local_face_dof%size()
1013 gs%local_facet_offset = local_dof%size() + 1
1014
1015 ! Finalize local dof to gather-scatter index
1016 allocate(gs%local_dof_gs(gs%nlocal))
1017
1018 ! Add dofs on points and edges
1019
1020 ! We should use the %array() procedure, which works great for
1021 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1022 ! certain data types
1023 select type (dof_array => local_dof%data)
1024 type is (integer)
1025 j = local_dof%size()
1026 do i = 1, j
1027 gs%local_dof_gs(i) = dof_array(i)
1028 end do
1029 end select
1030 call local_dof%free()
1031
1032 ! Add dofs on faces
1033
1034 ! We should use the %array() procedure, which works great for
1035 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1036 ! certain data types
1037 select type (dof_array => local_face_dof%data)
1038 type is (integer)
1039 do i = 1, local_face_dof%size()
1040 gs%local_dof_gs(i + j) = dof_array(i)
1041 end do
1042 end select
1043 call local_face_dof%free()
1044
1045 ! Finalize local gather-scatter index to dof
1046 allocate(gs%local_gs_dof(gs%nlocal))
1047
1048 ! Add gather-scatter index on points and edges
1049
1050 ! We should use the %array() procedure, which works great for
1051 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1052 ! certain data types
1053 select type (dof_array => dof_local%data)
1054 type is (integer)
1055 j = dof_local%size()
1056 do i = 1, j
1057 gs%local_gs_dof(i) = dof_array(i)
1058 end do
1059 end select
1060 call dof_local%free()
1061
1062 ! We should use the %array() procedure, which works great for
1063 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1064 ! certain data types
1065 select type (dof_array => face_dof_local%data)
1066 type is (integer)
1067 do i = 1, face_dof_local%size()
1068 gs%local_gs_dof(i+j) = dof_array(i)
1069 end do
1070 end select
1071 call face_dof_local%free()
1072
1073 call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, &
1074 gs%nlocal, 1, gs%nlocal)
1075
1076 call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, &
1077 gs%local_blk_off, gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
1078
1079 ! Allocate buffer for local gs-ops
1080 allocate(gs%local_gs(gs%nlocal))
1081
1082 gs%nshared = shared_dof%size() + shared_face_dof%size()
1083 gs%shared_facet_offset = shared_dof%size() + 1
1084
1085 ! Finalize shared dof to gather-scatter index
1086 allocate(gs%shared_dof_gs(gs%nshared))
1087
1088 ! Add shared dofs on points and edges
1089
1090 ! We should use the %array() procedure, which works great for
1091 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1092 ! certain data types
1093 select type (dof_array => shared_dof%data)
1094 type is (integer)
1095 j = shared_dof%size()
1096 do i = 1, j
1097 gs%shared_dof_gs(i) = dof_array(i)
1098 end do
1099 end select
1100 call shared_dof%free()
1101
1102 ! Add shared dofs on faces
1103
1104 ! We should use the %array() procedure, which works great for
1105 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1106 ! certain data types
1107 select type (dof_array => shared_face_dof%data)
1108 type is (integer)
1109 do i = 1, shared_face_dof%size()
1110 gs%shared_dof_gs(i + j) = dof_array(i)
1111 end do
1112 end select
1113 call shared_face_dof%free()
1114
1115 ! Finalize shared gather-scatter index to dof
1116 allocate(gs%shared_gs_dof(gs%nshared))
1117
1118 ! Add dofs on points and edges
1119
1120 ! We should use the %array() procedure, which works great for
1121 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1122 ! certain data types
1123 select type (dof_array => dof_shared%data)
1124 type is (integer)
1125 j = dof_shared%size()
1126 do i = 1, j
1127 gs%shared_gs_dof(i) = dof_array(i)
1128 end do
1129 end select
1130 call dof_shared%free()
1131
1132 ! We should use the %array() procedure, which works great for
1133 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1134 ! certain data types
1135 select type (dof_array => face_dof_shared%data)
1136 type is (integer)
1137 do i = 1, face_dof_shared%size()
1138 gs%shared_gs_dof(i + j) = dof_array(i)
1139 end do
1140 end select
1141 call face_dof_shared%free()
1142
1143 ! Allocate buffer for shared gs-ops
1144 allocate(gs%shared_gs(gs%nshared))
1145
1146 if (gs%nshared .gt. 0) then
1147 call gs_qsort_dofmap(gs%shared_dof_gs, gs%shared_gs_dof, &
1148 gs%nshared, 1, gs%nshared)
1149
1150 call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
1151 gs%shared_blk_off, gs%nshared_blks, gs%nshared, &
1152 gs%shared_facet_offset)
1153 end if
1154
1155 contains
1156
1165 function gs_mapping_add_dof(map_, dof, max_id) result(id)
1166 type(htable_i8_t), intent(inout) :: map_
1167 integer(kind=i8), intent(inout) :: dof
1168 integer, intent(inout) :: max_id
1169 integer :: id
1170
1171 if (map_%get(dof, id) .gt. 0) then
1172 max_id = max_id + 1
1173 call map_%set(dof, max_id)
1174 id = max_id
1175 end if
1176
1177 end function gs_mapping_add_dof
1178
1180 recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
1181 integer, intent(inout) :: n
1182 integer, dimension(n), intent(inout) :: dg
1183 integer, dimension(n), intent(inout) :: gd
1184 integer :: lo, hi
1185 integer :: tmp, i, j, pivot
1186
1187 i = lo - 1
1188 j = hi + 1
1189 pivot = dg((lo + hi) / 2)
1190 do
1191 do
1192 i = i + 1
1193 if (dg(i) .ge. pivot) exit
1194 end do
1195
1196 do
1197 j = j - 1
1198 if (dg(j) .le. pivot) exit
1199 end do
1200
1201 if (i .lt. j) then
1202 tmp = dg(i)
1203 dg(i) = dg(j)
1204 dg(j) = tmp
1205
1206 tmp = gd(i)
1207 gd(i) = gd(j)
1208 gd(j) = tmp
1209 else if (i .eq. j) then
1210 i = i + 1
1211 exit
1212 else
1213 exit
1214 end if
1215 end do
1216 if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j)
1217 if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi)
1218
1219 end subroutine gs_qsort_dofmap
1220
1222 subroutine gs_find_blks(dg, blk_len, blk_off, nblks, n, m)
1223 integer, intent(in) :: n
1224 integer, intent(in) :: m
1225 integer, dimension(n), intent(inout) :: dg
1226 integer, allocatable, intent(inout) :: blk_len(:)
1227 integer, allocatable, intent(inout) :: blk_off(:)
1228 integer, intent(inout) :: nblks
1229 integer :: i, j
1230 integer :: id, count
1231 type(stack_i4_t), target :: blks
1232
1233 call blks%init()
1234 i = 1
1235 do while (i .lt. m)
1236 id = dg(i)
1237 count = 1
1238 j = i
1239 do while ( j+1 .le. n .and. dg(j+1) .eq. id)
1240 j = j + 1
1241 count = count + 1
1242 end do
1243 call blks%push(count)
1244 i = j + 1
1245 end do
1246
1247 select type (blk_array => blks%data)
1248 type is (integer)
1249 nblks = blks%size()
1250 allocate(blk_len(nblks))
1251 do i = 1, nblks
1252 blk_len(i) = blk_array(i)
1253 end do
1254 allocate(blk_off(nblks))
1255 blk_off(1) = 0
1256 do i = 2, nblks
1257 blk_off(i) = blk_off(i - 1) + blk_len(i - 1)
1258 end do
1259 end select
1260 call blks%free()
1261
1262 end subroutine gs_find_blks
1263
1264 end subroutine gs_init_mapping
1265
1267 subroutine gs_schedule(gs)
1268 type(gs_t), target, intent(inout) :: gs
1269 integer(kind=i8), allocatable :: send_buf(:), recv_buf(:)
1270 integer(kind=i2), allocatable :: shared_flg(:), recv_flg(:)
1271 type(htable_iter_i8_t) :: it
1272 type(stack_i4_t) :: send_pe, recv_pe
1273 type(mpi_status) :: status
1274 type(mpi_request) :: send_req, recv_req
1275 integer :: i, j, max_recv, src, dst, ierr, n_recv
1276 integer :: tmp, shared_gs_id
1277 integer :: nshared_unique
1278
1279 nshared_unique = gs%shared_dofs%num_entries()
1280
1281 call it%init(gs%shared_dofs)
1282 allocate(send_buf(nshared_unique))
1283 i = 1
1284 do while (it%next())
1285 send_buf(i) = it%key()
1286 i = i + 1
1287 end do
1288
1289 call send_pe%init()
1290 call recv_pe%init()
1291
1292
1293 !
1294 ! Schedule exchange of shared dofs
1295 !
1296
1297 call mpi_allreduce(nshared_unique, max_recv, 1, &
1298 mpi_integer, mpi_max, neko_comm, ierr)
1299
1300 allocate(recv_buf(max_recv))
1301 allocate(shared_flg(max_recv))
1302 allocate(recv_flg(max_recv))
1303
1305 do i = 1, size(gs%dofmap%msh%neigh_order)
1306 src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size)
1307 dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size)
1308
1309 if (gs%dofmap%msh%neigh(src)) then
1310 call mpi_irecv(recv_buf, max_recv, mpi_integer8, &
1311 src, 0, neko_comm, recv_req, ierr)
1312 end if
1313
1314 if (gs%dofmap%msh%neigh(dst)) then
1315 call mpi_isend(send_buf, nshared_unique, mpi_integer8, &
1316 dst, 0, neko_comm, send_req, ierr)
1317 end if
1318
1319 if (gs%dofmap%msh%neigh(src)) then
1320 call mpi_wait(recv_req, status, ierr)
1321 call mpi_get_count(status, mpi_integer8, n_recv, ierr)
1322
1323 do j = 1, n_recv
1324 shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
1325 if (shared_flg(j) .eq. 0) then
1327 call gs%comm%recv_dof(src)%push(shared_gs_id)
1328 end if
1329 end do
1330
1331 if (gs%comm%recv_dof(src)%size() .gt. 0) then
1332 call recv_pe%push(src)
1333 end if
1334 end if
1335
1336 if (gs%dofmap%msh%neigh(dst)) then
1337 call mpi_wait(send_req, mpi_status_ignore, ierr)
1338 call mpi_irecv(recv_flg, max_recv, mpi_integer2, &
1339 dst, 0, neko_comm, recv_req, ierr)
1340 end if
1341
1342 if (gs%dofmap%msh%neigh(src)) then
1343 call mpi_isend(shared_flg, n_recv, mpi_integer2, &
1344 src, 0, neko_comm, send_req, ierr)
1345 end if
1346
1347 if (gs%dofmap%msh%neigh(dst)) then
1348 call mpi_wait(recv_req, status, ierr)
1349 call mpi_get_count(status, mpi_integer2, n_recv, ierr)
1350
1351 do j = 1, n_recv
1352 if (recv_flg(j) .eq. 0) then
1353 tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
1355 call gs%comm%send_dof(dst)%push(shared_gs_id)
1356 end if
1357 end do
1358
1359 if (gs%comm%send_dof(dst)%size() .gt. 0) then
1360 call send_pe%push(dst)
1361 end if
1362 end if
1363
1364 if (gs%dofmap%msh%neigh(src)) then
1365 call mpi_wait(send_req, mpi_status_ignore, ierr)
1366 end if
1367
1368 end do
1369
1370 call gs%comm%init(send_pe, recv_pe)
1371
1372 call send_pe%free()
1373 call recv_pe%free()
1374
1375 deallocate(send_buf)
1376 deallocate(recv_flg)
1377 deallocate(shared_flg)
1378 !This arrays seems to take massive amounts of memory...
1379 call gs%shared_dofs%free()
1380
1381 end subroutine gs_schedule
1382
1384 subroutine gs_op_fld(gs, u, op, event)
1385 class(gs_t), intent(inout) :: gs
1386 type(field_t), intent(inout) :: u
1387 type(c_ptr), optional, intent(inout) :: event
1388 integer :: n, op
1389
1390 n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
1391 if (present(event)) then
1392 call gs_op_vector(gs, u%x, n, op, event)
1393 else
1394 call gs_op_vector(gs, u%x, n, op)
1395 end if
1396
1397 end subroutine gs_op_fld
1398
1400 subroutine gs_op_r4(gs, u, n, op, event)
1401 class(gs_t), intent(inout) :: gs
1402 integer, intent(in) :: n
1403 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u
1404 type(c_ptr), optional, intent(inout) :: event
1405 integer :: op
1406
1407 if (present(event)) then
1408 call gs_op_vector(gs, u, n, op, event)
1409 else
1410 call gs_op_vector(gs, u, n, op)
1411 end if
1412
1413 end subroutine gs_op_r4
1414
1416 subroutine gs_op_vector(gs, u, n, op, event)
1417 class(gs_t), intent(inout) :: gs
1418 integer, intent(in) :: n
1419 real(kind=rp), dimension(n), intent(inout) :: u
1420 type(c_ptr), optional, intent(inout) :: event
1421 integer :: m, l, op, lo, so
1422
1423 lo = gs%local_facet_offset
1424 so = -gs%shared_facet_offset
1425 m = gs%nlocal
1426 l = gs%nshared
1427
1428 call profiler_start_region("gather_scatter", 5)
1429 ! Gather shared dofs
1430 if (pe_size .gt. 1) then
1431 call profiler_start_region("gs_nbrecv", 13)
1432 call gs%comm%nbrecv()
1433 call profiler_end_region("gs_nbrecv", 13)
1434 call profiler_start_region("gs_gather_shared", 14)
1435 call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
1436 gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, &
1437 gs%shared_blk_off, op, .true.)
1438 call profiler_end_region("gs_gather_shared", 14)
1439 call profiler_start_region("gs_nbsend", 6)
1440 call gs%comm%nbsend(gs%shared_gs, l, &
1441 gs%bcknd%gather_event, gs%bcknd%gs_stream)
1442 call profiler_end_region("gs_nbsend", 6)
1443
1444 end if
1445
1446 ! Gather-scatter local dofs
1447 call profiler_start_region("gs_local", 12)
1448 call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
1449 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, gs%local_blk_off, &
1450 op, .false.)
1451 call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
1452 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, gs%local_blk_off, &
1453 .false., c_null_ptr)
1454 call profiler_end_region("gs_local", 12)
1455 ! Scatter shared dofs
1456 if (pe_size .gt. 1) then
1457 call profiler_start_region("gs_nbwait", 7)
1458 call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
1459 call profiler_end_region("gs_nbwait", 7)
1460 call profiler_start_region("gs_scatter_shared", 15)
1461 if (present(event)) then
1462 call gs%bcknd%scatter(gs%shared_gs, l,&
1463 gs%shared_dof_gs, u, n, &
1464 gs%shared_gs_dof, gs%nshared_blks, &
1465 gs%shared_blk_len, gs%shared_blk_off, .true., event)
1466 else
1467 call gs%bcknd%scatter(gs%shared_gs, l,&
1468 gs%shared_dof_gs, u, n, &
1469 gs%shared_gs_dof, gs%nshared_blks, &
1470 gs%shared_blk_len, gs%shared_blk_off, .true., c_null_ptr)
1471 end if
1472 call profiler_end_region("gs_scatter_shared", 15)
1473 end if
1474
1475 call profiler_end_region("gather_scatter", 5)
1476
1477 end subroutine gs_op_vector
1478
1479end module gather_scatter
recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
Sort the dof lists based on the dof to gather-scatter list.
subroutine gs_find_blks(dg, blk_len, blk_off, nblks, n, m)
Find blocks sharing dofs in non-facet data.
integer function gs_mapping_add_dof(map_, dof, max_id)
Register a unique dof Takes the unique id dof and checks if it is in the htable map_ If it is we retu...
Deassociate a Fortran array from a device pointer.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:107
Definition comm.F90:1
integer, public pe_size
MPI size of communicator.
Definition comm.F90:59
integer, public pe_rank
MPI rank.
Definition comm.F90:56
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Defines a field.
Definition field.f90:34
Gather-scatter.
subroutine gs_init_mapping(gs)
Setup mapping of dofs to gather-scatter operations.
subroutine gs_schedule(gs)
Schedule shared gather-scatter operations.
subroutine gs_free(gs)
Deallocate a gather-scatter kernel.
subroutine gs_op_r4(gs, u, n, op, event)
Gather-scatter operation on a rank 4 array.
subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
Initialize a gather-scatter kernel.
subroutine gs_op_vector(gs, u, n, op, event)
Gather-scatter operation on a vector u with op op.
subroutine gs_op_fld(gs, u, op, event)
Gather-scatter operation on a field u with op op.
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
integer, parameter, public gs_bcknd_cpu
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_sx
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_dev
Definition gs_bcknd.f90:40
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
integer, parameter, public gs_comm_nvshmem
Definition gs_comm.f90:42
integer, parameter, public gs_comm_nccl
Definition gs_comm.f90:42
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
Defines GPU aware MPI gather-scatter communication.
Defines NCCL based gather-scatter communication.
Defines GPU aware MPI gather-scatter communication.
Generic Gather-scatter backend for accelerators.
Definition gs_device.F90:34
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
Generic Gather-scatter backend for NEC Vector Engines.
Definition gs_sx.f90:34
Implements a hash table ADT.
Definition htable.f90:36
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:77
integer, parameter, public log_size
Definition log.f90:46
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_cuda
integer, parameter, public i2
Definition num_types.f90:5
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:107
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter communication method.
Definition gs_comm.f90:46
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
Gather-scatter backend for offloading devices.
Definition gs_device.F90:48
Gather-scatter communication using device MPI. The arrays are indexed per PE like send_pe and @ recv_...
Gather-scatter communication using NCCL The arrays are indexed per PE like send_pe and @ recv_pe.
Gather-scatter communication using device SHMEM. The arrays are indexed per PE like send_pe and @ rec...
Gather-scatter communication using MPI.
Definition gs_mpi.f90:62
Gather-scatter backend for NEC SX-Aurora.
Definition gs_sx.f90:43
Integer*8 based hash table.
Definition htable.f90:92
Iterator for an integer*8 based hash table.
Definition htable.f90:175
Integer based stack.
Definition stack.f90:63