Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
gather_scatter.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use neko_config
37 use gs_device, only : gs_device_t
38 use gs_sx, only : gs_sx_t
39 use gs_cpu, only : gs_cpu_t
43 use gs_mpi, only : gs_mpi_t
47 use mesh, only : mesh_t
48 use comm
49 use dofmap, only : dofmap_t
50 use field, only : field_t
51 use num_types, only : rp, dp, i2, i8
53 use stack, only : stack_i4_t
54 use utils, only : neko_error, linear_index
55 use logger, only : neko_log, log_size
57 use device
58 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr
59 implicit none
60 private
61
62 type, public :: gs_t
63 real(kind=rp), allocatable :: local_gs(:)
64 integer, allocatable :: local_dof_gs(:)
65 integer, allocatable :: local_gs_dof(:)
66 integer, allocatable :: local_blk_len(:)
67 real(kind=rp), allocatable :: shared_gs(:)
68 integer, allocatable :: shared_dof_gs(:)
69 integer, allocatable :: shared_gs_dof(:)
70 integer, allocatable :: shared_blk_len(:)
71 type(dofmap_t), pointer ::dofmap
72 type(htable_i8_t) :: shared_dofs
73 integer :: nlocal
74 integer :: nshared
75 integer :: nlocal_blks
76 integer :: nshared_blks
77 integer :: local_facet_offset
78 integer :: shared_facet_offset
79 class(gs_bcknd_t), allocatable :: bcknd
80 class(gs_comm_t), allocatable :: comm
81 contains
82 procedure, private, pass(gs) :: gs_op_fld
83 procedure, private, pass(gs) :: gs_op_r4
84 procedure, pass(gs) :: gs_op_vector
85 procedure, pass(gs) :: init => gs_init
86 procedure, pass(gs) :: free => gs_free
87 generic :: op => gs_op_fld, gs_op_r4, gs_op_vector
88 end type gs_t
89
90 ! Expose available gather-scatter operation
92
93 ! Expose available gather-scatter backends
95
96 ! Expose available gather-scatter comm. backends
98
99
100contains
101
106 subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
107 class(gs_t), intent(inout) :: gs
108 type(dofmap_t), target, intent(inout) :: dofmap
109 character(len=LOG_SIZE) :: log_buf
110 character(len=20) :: bcknd_str
111 integer, optional :: bcknd, comm_bcknd
112 integer :: i, j, ierr, bcknd_, comm_bcknd_
113 integer(i8) :: glb_nshared, glb_nlocal
114 logical :: use_device_mpi, use_device_nccl, use_device_shmem, use_host_mpi
115 real(kind=rp), allocatable :: tmp(:)
116 type(c_ptr) :: tmp_d = c_null_ptr
117 integer :: strtgy(4) = (/ int(b'00'), int(b'01'), int(b'10'), int(b'11') /)
118 integer :: avg_strtgy, env_len
119 character(len=255) :: env_strtgy, env_gscomm
120 real(kind=dp) :: strtgy_time(4)
121
122 call gs%free()
123
124 call neko_log%section('Gather-Scatter')
125 ! Currently this uses the dofmap which also contains geometric information
126 ! Only connectivity/numbering of points is technically necessary for gs
127 gs%dofmap => dofmap
128
129 use_device_mpi = .false.
130 use_device_nccl = .false.
131 use_device_shmem = .false.
132 use_host_mpi = .false.
133 ! Check if a comm-backend is requested via env. variables
134 call get_environment_variable("NEKO_GS_COMM", env_gscomm, env_len)
135 if (env_len .gt. 0) then
136 if (env_gscomm(1:env_len) .eq. "MPI") then
137 use_host_mpi = .true.
138 else if (env_gscomm(1:env_len) .eq. "MPIGPU") then
139 use_device_mpi = .true.
140 else if (env_gscomm(1:env_len) .eq. "NCCL") then
141 use_device_nccl = .true.
142 else if (env_gscomm(1:env_len) .eq. "SHMEM") then
143 use_device_shmem = .true.
144 else
145 call neko_error('Unknown Gather-scatter comm. backend')
146 end if
147 end if
148
149
150 if (present(comm_bcknd)) then
151 comm_bcknd_ = comm_bcknd
152 else if (use_host_mpi) then
153 comm_bcknd_ = gs_comm_mpi
154 else if (use_device_mpi) then
155 comm_bcknd_ = gs_comm_mpigpu
156 else if (use_device_nccl) then
157 comm_bcknd_ = gs_comm_nccl
158 else if (use_device_shmem) then
159 comm_bcknd_ = gs_comm_nvshmem
160 else
161 if (neko_device_mpi) then
162 comm_bcknd_ = gs_comm_mpigpu
163 use_device_mpi = .true.
164 else
165 comm_bcknd_ = gs_comm_mpi
166 end if
167 end if
168
169 select case (comm_bcknd_)
170 case (gs_comm_mpi)
171 call neko_log%message('Comm : MPI')
172 allocate(gs_mpi_t::gs%comm)
173 case (gs_comm_mpigpu)
174 call neko_log%message('Comm : Device MPI')
175 allocate(gs_device_mpi_t::gs%comm)
176 case (gs_comm_nccl)
177 call neko_log%message('Comm : NCCL')
178 allocate(gs_device_nccl_t::gs%comm)
179 case (gs_comm_nvshmem)
180 call neko_log%message('Comm : NVSHMEM')
181 allocate(gs_device_shmem_t::gs%comm)
182 case default
183 call neko_error('Unknown Gather-scatter comm. backend')
184 end select
185 ! Initialize a stack for each rank containing which dofs to send/recv at
186 ! that rank
187 call gs%comm%init_dofs()
188 ! Initialize mapping between local ids and gather-scatter ids
189 ! based on the global numbering in dofmap
190 call gs_init_mapping(gs)
191 ! Setup buffers and which ranks to send/recv data from based on mapping
192 ! and initializes gs%comm (sets up gs%comm%send_dof and gs%comm%recv_dof and
193 ! recv_pe/send_pe)
194 call gs_schedule(gs)
195 ! Global number of points not needing to be sent over mpi for gs operations
196 ! "Internal points"
197 glb_nlocal = int(gs%nlocal, i8)
198 ! Global number of points needing to be communicated with other pes/ranks
199 ! "external points"
200 glb_nshared = int(gs%nshared, i8)
201 ! Can be thought of a measure of the volume of this rank (glb_nlocal) and
202 ! the surface area (glb_nshared) that is shared with other ranks
203 ! Lots of internal volume compared to surface that needs communication is
204 ! good
205
206 if (pe_rank .eq. 0) then
207 call mpi_reduce(mpi_in_place, glb_nlocal, 1, &
208 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
209
210 call mpi_reduce(mpi_in_place, glb_nshared, 1, &
211 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
212 else
213 call mpi_reduce(glb_nlocal, glb_nlocal, 1, &
214 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
215
216 call mpi_reduce(glb_nshared, glb_nshared, 1, &
217 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
218 end if
219
220 write(log_buf, '(A,I12)') 'Avg. internal: ', glb_nlocal/pe_size
221 call neko_log%message(log_buf)
222 write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size
223 call neko_log%message(log_buf)
224
225 if (present(bcknd)) then
226 bcknd_ = bcknd
227 else
228 if (neko_bcknd_sx .eq. 1) then
229 bcknd_ = gs_bcknd_sx
230 else if (neko_bcknd_device .eq. 1) then
231 bcknd_ = gs_bcknd_dev
232 else
233 bcknd_ = gs_bcknd_cpu
234 end if
235 end if
236
237 ! Setup Gather-scatter backend
238 select case(bcknd_)
239 case(gs_bcknd_cpu)
240 allocate(gs_cpu_t::gs%bcknd)
241 bcknd_str = ' std'
242 case(gs_bcknd_dev)
243 allocate(gs_device_t::gs%bcknd)
244 if (neko_bcknd_hip .eq. 1) then
245 bcknd_str = ' hip'
246 else if (neko_bcknd_cuda .eq. 1) then
247 bcknd_str = ' cuda'
248 else if (neko_bcknd_opencl .eq. 1) then
249 bcknd_str = ' opencl'
250 end if
251 case(gs_bcknd_sx)
252 allocate(gs_sx_t::gs%bcknd)
253 bcknd_str = ' sx'
254 case default
255 call neko_error('Unknown Gather-scatter backend')
256 end select
257
258 write(log_buf, '(A)') 'Backend : ' // trim(bcknd_str)
259 call neko_log%message(log_buf)
260
261
262
263 call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
264
265 if (use_device_mpi .or. use_device_nccl .or. use_device_shmem) then
266 select type(b => gs%bcknd)
267 type is (gs_device_t)
268 b%shared_on_host = .false.
269 end select
270 end if
271
272 if (use_device_mpi) then
273 if(pe_size .gt. 1) then
274 ! Select fastest device MPI strategy at runtime
275 select type(c => gs%comm)
276 type is (gs_device_mpi_t)
277 call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, env_len)
278 if (env_len .eq. 0) then
279 allocate(tmp(dofmap%size()))
280 call device_map(tmp, tmp_d, dofmap%size())
281 tmp = 1.0_rp
282 call device_memcpy(tmp, tmp_d, dofmap%size(), &
283 host_to_device, sync=.false.)
284 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
285
286 do i = 1, size(strtgy)
287 c%nb_strtgy = strtgy(i)
288 call device_sync
289 call mpi_barrier(neko_comm)
290 strtgy_time(i) = mpi_wtime()
291 do j = 1, 100
292 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
293 end do
294 strtgy_time(i) = (mpi_wtime() - strtgy_time(i)) / 100d0
295 end do
296
297 call device_deassociate(tmp)
298 call device_free(tmp_d)
299 deallocate(tmp)
300
301 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
302
303 avg_strtgy = minloc(strtgy_time, 1)
304 call mpi_allreduce(mpi_in_place, avg_strtgy, 1, &
305 mpi_integer, mpi_sum, neko_comm)
306 avg_strtgy = avg_strtgy / pe_size
307
308 write(log_buf, '(A,B0.2,A)') 'Avg. strtgy : [', &
309 strtgy(avg_strtgy),']'
310
311 else
312 read(env_strtgy(1:env_len), *) i
313
314 if (i .lt. 1 .or. i .gt. 4) then
315 call neko_error('Invalid gs sync strtgy')
316 end if
317
318 c%nb_strtgy = strtgy(i)
319 avg_strtgy = i
320
321 write(log_buf, '(A,B0.2,A)') 'Env. strtgy : [', &
322 strtgy(avg_strtgy),']'
323 end if
324
325 call neko_log%message(log_buf)
326
327 end select
328 end if
329 end if
330
331 call neko_log%end_section()
332
333 end subroutine gs_init
334
336 subroutine gs_free(gs)
337 class(gs_t), intent(inout) :: gs
338
339 nullify(gs%dofmap)
340
341 if (allocated(gs%local_gs)) then
342 deallocate(gs%local_gs)
343 end if
344
345 if (allocated(gs%local_dof_gs)) then
346 deallocate(gs%local_dof_gs)
347 end if
348
349 if (allocated(gs%local_gs_dof)) then
350 deallocate(gs%local_gs_dof)
351 end if
352
353 if (allocated(gs%local_blk_len)) then
354 deallocate(gs%local_blk_len)
355 end if
356
357 if (allocated(gs%shared_gs)) then
358 deallocate(gs%shared_gs)
359 end if
360
361 if (allocated(gs%shared_dof_gs)) then
362 deallocate(gs%shared_dof_gs)
363 end if
364
365 if (allocated(gs%shared_gs_dof)) then
366 deallocate(gs%shared_gs_dof)
367 end if
368
369 if (allocated(gs%shared_blk_len)) then
370 deallocate(gs%shared_blk_len)
371 end if
372
373 gs%nlocal = 0
374 gs%nshared = 0
375 gs%nlocal_blks = 0
376 gs%nshared_blks = 0
377
378 call gs%shared_dofs%free()
379
380 if (allocated(gs%bcknd)) then
381 call gs%bcknd%free()
382 deallocate(gs%bcknd)
383 end if
384
385 if (allocated(gs%comm)) then
386 call gs%comm%free()
387 deallocate(gs%comm)
388 end if
389
390 end subroutine gs_free
391
393 subroutine gs_init_mapping(gs)
394 type(gs_t), target, intent(inout) :: gs
395 type(mesh_t), pointer :: msh
396 type(dofmap_t), pointer :: dofmap
397 type(stack_i4_t), target :: local_dof, dof_local, shared_dof, dof_shared
398 type(stack_i4_t), target :: local_face_dof, face_dof_local
399 type(stack_i4_t), target :: shared_face_dof, face_dof_shared
400 integer :: i, j, k, l, lx, ly, lz, max_id, max_sid, id, lid, dm_size
401 type(htable_i8_t) :: dm
402 type(htable_i8_t), pointer :: sdm
403
404 dofmap => gs%dofmap
405 msh => dofmap%msh
406 sdm => gs%shared_dofs
407
408 lx = dofmap%Xh%lx
409 ly = dofmap%Xh%ly
410 lz = dofmap%Xh%lz
411 dm_size = dofmap%size()/lx
412
413 call dm%init(dm_size, i)
417 call sdm%init(dofmap%size(), i)
418
419
420 call local_dof%init()
421 call dof_local%init()
422
423 call local_face_dof%init()
424 call face_dof_local%init()
425
426 call shared_dof%init()
427 call dof_shared%init()
428
429 call shared_face_dof%init()
430 call face_dof_shared%init()
431
432 !
433 ! Setup mapping for dofs points
434 !
435
436 max_id = 0
437 max_sid = 0
438 do i = 1, msh%nelv
439 ! Local id of vertices
440 lid = linear_index(1, 1, 1, i, lx, ly, lz)
441 ! Check if this dof is shared among ranks or not
442 if (dofmap%shared_dof(1, 1, 1, i)) then
443 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, 1, i), max_sid)
444 !If add unique gather-scatter id to shared_dof stack
445 call shared_dof%push(id)
446 !If add local id to dof_shared stack
447 call dof_shared%push(lid)
448 !Now we have the mapping of local id <-> gather scatter id!
449 else
450 ! Same here, only here we know the point is local
451 ! It will as such not need to be sent to other ranks later
452 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, 1, i), max_id)
453 call local_dof%push(id)
454 call dof_local%push(lid)
455 end if
456 ! This procedure is then repeated for all vertices and edges
457 ! Facets can be treated a little bit differently since they only have one
458 ! neighbor
459
460 lid = linear_index(lx, 1, 1, i, lx, ly, lz)
461 if (dofmap%shared_dof(lx, 1, 1, i)) then
462 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, 1, i), max_sid)
463 call shared_dof%push(id)
464 call dof_shared%push(lid)
465 else
466 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, 1, i), max_id)
467 call local_dof%push(id)
468 call dof_local%push(lid)
469 end if
470
471 lid = linear_index(1, ly, 1, i, lx, ly, lz)
472 if (dofmap%shared_dof(1, ly, 1, i)) then
473 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, 1, i), max_sid)
474 call shared_dof%push(id)
475 call dof_shared%push(lid)
476 else
477 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, 1, i), max_id)
478 call local_dof%push(id)
479 call dof_local%push(lid)
480 end if
481
482 lid = linear_index(lx, ly, 1, i, lx, ly, lz)
483 if (dofmap%shared_dof(lx, ly, 1, i)) then
484 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, 1, i), max_sid)
485 call shared_dof%push(id)
486 call dof_shared%push(lid)
487 else
488 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, 1, i), max_id)
489 call local_dof%push(id)
490 call dof_local%push(lid)
491 end if
492 if (lz .gt. 1) then
493 lid = linear_index(1, 1, lz, i, lx, ly, lz)
494 if (dofmap%shared_dof(1, 1, lz, i)) then
495 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, lz, i), max_sid)
496 call shared_dof%push(id)
497 call dof_shared%push(lid)
498 else
499 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, lz, i), max_id)
500 call local_dof%push(id)
501 call dof_local%push(lid)
502 end if
503
504 lid = linear_index(lx, 1, lz, i, lx, ly, lz)
505 if (dofmap%shared_dof(lx, 1, lz, i)) then
506 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, lz, i), max_sid)
507 call shared_dof%push(id)
508 call dof_shared%push(lid)
509 else
510 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, lz, i), max_id)
511 call local_dof%push(id)
512 call dof_local%push(lid)
513 end if
514
515 lid = linear_index(1, ly, lz, i, lx, ly, lz)
516 if (dofmap%shared_dof(1, ly, lz, i)) then
517 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, lz, i), max_sid)
518 call shared_dof%push(id)
519 call dof_shared%push(lid)
520 else
521 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, lz, i), max_id)
522 call local_dof%push(id)
523 call dof_local%push(lid)
524 end if
525
526 lid = linear_index(lx, ly, lz, i, lx, ly, lz)
527 if (dofmap%shared_dof(lx, ly, lz, i)) then
528 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, lz, i), max_sid)
529 call shared_dof%push(id)
530 call dof_shared%push(lid)
531 else
532 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, lz, i), max_id)
533 call local_dof%push(id)
534 call dof_local%push(lid)
535 end if
536 end if
537 end do
538
539 ! Clear local dofmap table
540 call dm%clear()
541 ! Get gather scatter ids and local ids of edges
542 if (lz .gt. 1) then
543 !
544 ! Setup mapping for dofs on edges
545 !
546 do i = 1, msh%nelv
547
548 !
549 ! dofs on edges in x-direction
550 !
551 if (dofmap%shared_dof(2, 1, 1, i)) then
552 do j = 2, lx - 1
553 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
554 call shared_dof%push(id)
555 id = linear_index(j, 1, 1, i, lx, ly, lz)
556 call dof_shared%push(id)
557 end do
558 else
559 do j = 2, lx - 1
560 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
561 call local_dof%push(id)
562 id = linear_index(j, 1, 1, i, lx, ly, lz)
563 call dof_local%push(id)
564 end do
565 end if
566 if (dofmap%shared_dof(2, 1, lz, i)) then
567 do j = 2, lx - 1
568 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, lz, i), max_sid)
569 call shared_dof%push(id)
570 id = linear_index(j, 1, lz, i, lx, ly, lz)
571 call dof_shared%push(id)
572 end do
573 else
574 do j = 2, lx - 1
575 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, lz, i), max_id)
576 call local_dof%push(id)
577 id = linear_index(j, 1, lz, i, lx, ly, lz)
578 call dof_local%push(id)
579 end do
580 end if
581
582 if (dofmap%shared_dof(2, ly, 1, i)) then
583 do j = 2, lx - 1
584 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
585 call shared_dof%push(id)
586 id = linear_index(j, ly, 1, i, lx, ly, lz)
587 call dof_shared%push(id)
588 end do
589
590 else
591 do j = 2, lx - 1
592 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
593 call local_dof%push(id)
594 id = linear_index(j, ly, 1, i, lx, ly, lz)
595 call dof_local%push(id)
596 end do
597 end if
598 if (dofmap%shared_dof(2, ly, lz, i)) then
599 do j = 2, lx - 1
600 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, lz, i), max_sid)
601 call shared_dof%push(id)
602 id = linear_index(j, ly, lz, i, lx, ly, lz)
603 call dof_shared%push(id)
604 end do
605 else
606 do j = 2, lx - 1
607 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, lz, i), max_id)
608 call local_dof%push(id)
609 id = linear_index(j, ly, lz, i, lx, ly, lz)
610 call dof_local%push(id)
611 end do
612 end if
613
614 !
615 ! dofs on edges in y-direction
616 !
617 if (dofmap%shared_dof(1, 2, 1, i)) then
618 do k = 2, ly - 1
619 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
620 call shared_dof%push(id)
621 id = linear_index(1, k, 1, i, lx, ly, lz)
622 call dof_shared%push(id)
623 end do
624 else
625 do k = 2, ly - 1
626 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
627 call local_dof%push(id)
628 id = linear_index(1, k, 1, i, lx, ly, lz)
629 call dof_local%push(id)
630 end do
631 end if
632 if (dofmap%shared_dof(1, 2, lz, i)) then
633 do k = 2, ly - 1
634 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, lz, i), max_sid)
635 call shared_dof%push(id)
636 id = linear_index(1, k, lz, i, lx, ly, lz)
637 call dof_shared%push(id)
638 end do
639 else
640 do k = 2, ly - 1
641 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, lz, i), max_id)
642 call local_dof%push(id)
643 id = linear_index(1, k, lz, i, lx, ly, lz)
644 call dof_local%push(id)
645 end do
646 end if
647
648 if (dofmap%shared_dof(lx, 2, 1, i)) then
649 do k = 2, ly - 1
650 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
651 call shared_dof%push(id)
652 id = linear_index(lx, k, 1, i, lx, ly, lz)
653 call dof_shared%push(id)
654 end do
655 else
656 do k = 2, ly - 1
657 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
658 call local_dof%push(id)
659 id = linear_index(lx, k, 1, i, lx, ly, lz)
660 call dof_local%push(id)
661 end do
662 end if
663 if (dofmap%shared_dof(lx, 2, lz, i)) then
664 do k = 2, ly - 1
665 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, lz, i), max_sid)
666 call shared_dof%push(id)
667 id = linear_index(lx, k, lz, i, lx, ly, lz)
668 call dof_shared%push(id)
669 end do
670 else
671 do k = 2, ly - 1
672 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, lz, i), max_id)
673 call local_dof%push(id)
674 id = linear_index(lx, k, lz, i, lx, ly, lz)
675 call dof_local%push(id)
676 end do
677 end if
678 !
679 ! dofs on edges in z-direction
680 !
681 if (dofmap%shared_dof(1, 1, 2, i)) then
682 do l = 2, lz - 1
683 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, l, i), max_sid)
684 call shared_dof%push(id)
685 id = linear_index(1, 1, l, i, lx, ly, lz)
686 call dof_shared%push(id)
687 end do
688 else
689 do l = 2, lz - 1
690 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id)
691 call local_dof%push(id)
692 id = linear_index(1, 1, l, i, lx, ly, lz)
693 call dof_local%push(id)
694 end do
695 end if
696
697 if (dofmap%shared_dof(lx, 1, 2, i)) then
698 do l = 2, lz - 1
699 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid)
700 call shared_dof%push(id)
701 id = linear_index(lx, 1, l, i, lx, ly, lz)
702 call dof_shared%push(id)
703 end do
704 else
705 do l = 2, lz - 1
706 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, l, i), max_id)
707 call local_dof%push(id)
708 id = linear_index(lx, 1, l, i, lx, ly, lz)
709 call dof_local%push(id)
710 end do
711 end if
712
713 if (dofmap%shared_dof(1, ly, 2, i)) then
714 do l = 2, lz - 1
715 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, l, i), max_sid)
716 call shared_dof%push(id)
717 id = linear_index(1, ly, l, i, lx, ly, lz)
718 call dof_shared%push(id)
719 end do
720 else
721 do l = 2, lz - 1
722 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, l, i), max_id)
723 call local_dof%push(id)
724 id = linear_index(1, ly, l, i, lx, ly, lz)
725 call dof_local%push(id)
726 end do
727 end if
728
729 if (dofmap%shared_dof(lx, ly, 2, i)) then
730 do l = 2, lz - 1
731 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid)
732 call shared_dof%push(id)
733 id = linear_index(lx, ly, l, i, lx, ly, lz)
734 call dof_shared%push(id)
735 end do
736 else
737 do l = 2, lz - 1
738 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id)
739 call local_dof%push(id)
740 id = linear_index(lx, ly, l, i, lx, ly, lz)
741 call dof_local%push(id)
742 end do
743 end if
744 end do
745 end if
746
747 ! Clear local dofmap table
748 call dm%clear()
749
750 !
751 ! Setup mapping for dofs on facets
752 !
753 ! This is for 2d
754 if (lz .eq. 1) then
755 do i = 1, msh%nelv
756
757 !
758 ! dofs on edges in x-direction
759 !
760 if (msh%facet_neigh(3, i) .ne. 0) then
761 if (dofmap%shared_dof(2, 1, 1, i)) then
762 do j = 2, lx - 1
763 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
764 call shared_face_dof%push(id)
765 id = linear_index(j, 1, 1, i, lx, ly, lz)
766 call face_dof_shared%push(id)
767 end do
768 else
769 do j = 2, lx - 1
770 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
771 call local_face_dof%push(id)
772 id = linear_index(j, 1, 1, i, lx, ly, lz)
773 call face_dof_local%push(id)
774 end do
775 end if
776 end if
777
778 if (msh%facet_neigh(4, i) .ne. 0) then
779 if (dofmap%shared_dof(2, ly, 1, i)) then
780 do j = 2, lx - 1
781 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
782 call shared_face_dof%push(id)
783 id = linear_index(j, ly, 1, i, lx, ly, lz)
784 call face_dof_shared%push(id)
785 end do
786
787 else
788 do j = 2, lx - 1
789 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
790 call local_face_dof%push(id)
791 id = linear_index(j, ly, 1, i, lx, ly, lz)
792 call face_dof_local%push(id)
793 end do
794 end if
795 end if
796
797 !
798 ! dofs on edges in y-direction
799 !
800 if (msh%facet_neigh(1, i) .ne. 0) then
801 if (dofmap%shared_dof(1, 2, 1, i)) then
802 do k = 2, ly - 1
803 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
804 call shared_face_dof%push(id)
805 id = linear_index(1, k, 1, i, lx, ly, lz)
806 call face_dof_shared%push(id)
807 end do
808 else
809 do k = 2, ly - 1
810 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
811 call local_face_dof%push(id)
812 id = linear_index(1, k, 1, i, lx, ly, lz)
813 call face_dof_local%push(id)
814 end do
815 end if
816 end if
817
818 if (msh%facet_neigh(2, i) .ne. 0) then
819 if (dofmap%shared_dof(lx, 2, 1, i)) then
820 do k = 2, ly - 1
821 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
822 call shared_face_dof%push(id)
823 id = linear_index(lx, k, 1, i, lx, ly, lz)
824 call face_dof_shared%push(id)
825 end do
826 else
827 do k = 2, ly - 1
828 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
829 call local_face_dof%push(id)
830 id = linear_index(lx, k, 1, i, lx, ly, lz)
831 call face_dof_local%push(id)
832 end do
833 end if
834 end if
835 end do
836 else
837 do i = 1, msh%nelv
838
839 ! Facets in x-direction (s, t)-plane
840 if (msh%facet_neigh(1, i) .ne. 0) then
841 if (dofmap%shared_dof(1, 2, 2, i)) then
842 do l = 2, lz - 1
843 do k = 2, ly - 1
844 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, l, i), max_sid)
845 call shared_face_dof%push(id)
846 id = linear_index(1, k, l, i, lx, ly, lz)
847 call face_dof_shared%push(id)
848 end do
849 end do
850 else
851 do l = 2, lz - 1
852 do k = 2, ly - 1
853 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, l, i), max_id)
854 call local_face_dof%push(id)
855 id = linear_index(1, k, l, i, lx, ly, lz)
856 call face_dof_local%push(id)
857 end do
858 end do
859 end if
860 end if
861
862 if (msh%facet_neigh(2, i) .ne. 0) then
863 if (dofmap%shared_dof(lx, 2, 2, i)) then
864 do l = 2, lz - 1
865 do k = 2, ly - 1
866 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, l, i), max_sid)
867 call shared_face_dof%push(id)
868 id = linear_index(lx, k, l, i, lx, ly, lz)
869 call face_dof_shared%push(id)
870 end do
871 end do
872 else
873 do l = 2, lz - 1
874 do k = 2, ly - 1
875 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, l, i), max_id)
876 call local_face_dof%push(id)
877 id = linear_index(lx, k, l, i, lx, ly, lz)
878 call face_dof_local%push(id)
879 end do
880 end do
881 end if
882 end if
883
884 ! Facets in y-direction (r, t)-plane
885 if (msh%facet_neigh(3, i) .ne. 0) then
886 if (dofmap%shared_dof(2, 1, 2, i)) then
887 do l = 2, lz - 1
888 do j = 2, lx - 1
889 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, l, i), max_sid)
890 call shared_face_dof%push(id)
891 id = linear_index(j, 1, l, i, lx, ly, lz)
892 call face_dof_shared%push(id)
893 end do
894 end do
895 else
896 do l = 2, lz - 1
897 do j = 2, lx - 1
898 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, l, i), max_id)
899 call local_face_dof%push(id)
900 id = linear_index(j, 1, l, i, lx, ly, lz)
901 call face_dof_local%push(id)
902 end do
903 end do
904 end if
905 end if
906
907 if (msh%facet_neigh(4, i) .ne. 0) then
908 if (dofmap%shared_dof(2, ly, 2, i)) then
909 do l = 2, lz - 1
910 do j = 2, lx - 1
911 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, l, i), max_sid)
912 call shared_face_dof%push(id)
913 id = linear_index(j, ly, l, i, lx, ly, lz)
914 call face_dof_shared%push(id)
915 end do
916 end do
917 else
918 do l = 2, lz - 1
919 do j = 2, lx - 1
920 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, l, i), max_id)
921 call local_face_dof%push(id)
922 id = linear_index(j, ly, l, i, lx, ly, lz)
923 call face_dof_local%push(id)
924 end do
925 end do
926 end if
927 end if
928
929 ! Facets in z-direction (r, s)-plane
930 if (msh%facet_neigh(5, i) .ne. 0) then
931 if (dofmap%shared_dof(2, 2, 1, i)) then
932 do k = 2, ly - 1
933 do j = 2, lx - 1
934 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, 1, i), max_sid)
935 call shared_face_dof%push(id)
936 id = linear_index(j, k, 1, i, lx, ly, lz)
937 call face_dof_shared%push(id)
938 end do
939 end do
940 else
941 do k = 2, ly - 1
942 do j = 2, lx - 1
943 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, 1, i), max_id)
944 call local_face_dof%push(id)
945 id = linear_index(j, k, 1, i, lx, ly, lz)
946 call face_dof_local%push(id)
947 end do
948 end do
949 end if
950 end if
951
952 if (msh%facet_neigh(6, i) .ne. 0) then
953 if (dofmap%shared_dof(2, 2, lz, i)) then
954 do k = 2, ly - 1
955 do j = 2, lx - 1
956 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, lz, i), max_sid)
957 call shared_face_dof%push(id)
958 id = linear_index(j, k, lz, i, lx, ly, lz)
959 call face_dof_shared%push(id)
960 end do
961 end do
962 else
963 do k = 2, ly - 1
964 do j = 2, lx - 1
965 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, lz, i), max_id)
966 call local_face_dof%push(id)
967 id = linear_index(j, k, lz, i, lx, ly, lz)
968 call face_dof_local%push(id)
969 end do
970 end do
971 end if
972 end if
973 end do
974 end if
975
976
977 call dm%free()
978
979 gs%nlocal = local_dof%size() + local_face_dof%size()
980 gs%local_facet_offset = local_dof%size() + 1
981
982 ! Finalize local dof to gather-scatter index
983 allocate(gs%local_dof_gs(gs%nlocal))
984
985 ! Add dofs on points and edges
986
987 ! We should use the %array() procedure, which works great for
988 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
989 ! certain data types
990 select type(dof_array => local_dof%data)
991 type is (integer)
992 j = local_dof%size()
993 do i = 1, j
994 gs%local_dof_gs(i) = dof_array(i)
995 end do
996 end select
997 call local_dof%free()
998
999 ! Add dofs on faces
1000
1001 ! We should use the %array() procedure, which works great for
1002 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1003 ! certain data types
1004 select type(dof_array => local_face_dof%data)
1005 type is (integer)
1006 do i = 1, local_face_dof%size()
1007 gs%local_dof_gs(i + j) = dof_array(i)
1008 end do
1009 end select
1010 call local_face_dof%free()
1011
1012 ! Finalize local gather-scatter index to dof
1013 allocate(gs%local_gs_dof(gs%nlocal))
1014
1015 ! Add gather-scatter index on points and edges
1016
1017 ! We should use the %array() procedure, which works great for
1018 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1019 ! certain data types
1020 select type(dof_array => dof_local%data)
1021 type is (integer)
1022 j = dof_local%size()
1023 do i = 1, j
1024 gs%local_gs_dof(i) = dof_array(i)
1025 end do
1026 end select
1027 call dof_local%free()
1028
1029 ! We should use the %array() procedure, which works great for
1030 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1031 ! certain data types
1032 select type(dof_array => face_dof_local%data)
1033 type is (integer)
1034 do i = 1, face_dof_local%size()
1035 gs%local_gs_dof(i+j) = dof_array(i)
1036 end do
1037 end select
1038 call face_dof_local%free()
1039
1040 call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, &
1041 gs%nlocal, 1, gs%nlocal)
1042
1043 call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, &
1044 gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
1045
1046 ! Allocate buffer for local gs-ops
1047 allocate(gs%local_gs(gs%nlocal))
1048
1049 gs%nshared = shared_dof%size() + shared_face_dof%size()
1050 gs%shared_facet_offset = shared_dof%size() + 1
1051
1052 ! Finalize shared dof to gather-scatter index
1053 allocate(gs%shared_dof_gs(gs%nshared))
1054
1055 ! Add shared dofs on points and edges
1056
1057 ! We should use the %array() procedure, which works great for
1058 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1059 ! certain data types
1060 select type(dof_array => shared_dof%data)
1061 type is (integer)
1062 j = shared_dof%size()
1063 do i = 1, j
1064 gs%shared_dof_gs(i) = dof_array(i)
1065 end do
1066 end select
1067 call shared_dof%free()
1068
1069 ! Add shared dofs on faces
1070
1071 ! We should use the %array() procedure, which works great for
1072 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1073 ! certain data types
1074 select type(dof_array => shared_face_dof%data)
1075 type is (integer)
1076 do i = 1, shared_face_dof%size()
1077 gs%shared_dof_gs(i + j) = dof_array(i)
1078 end do
1079 end select
1080 call shared_face_dof%free()
1081
1082 ! Finalize shared gather-scatter index to dof
1083 allocate(gs%shared_gs_dof(gs%nshared))
1084
1085 ! Add dofs on points and edges
1086
1087 ! We should use the %array() procedure, which works great for
1088 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1089 ! certain data types
1090 select type(dof_array => dof_shared%data)
1091 type is(integer)
1092 j = dof_shared%size()
1093 do i = 1, j
1094 gs%shared_gs_dof(i) = dof_array(i)
1095 end do
1096 end select
1097 call dof_shared%free()
1098
1099 ! We should use the %array() procedure, which works great for
1100 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1101 ! certain data types
1102 select type(dof_array => face_dof_shared%data)
1103 type is (integer)
1104 do i = 1, face_dof_shared%size()
1105 gs%shared_gs_dof(i + j) = dof_array(i)
1106 end do
1107 end select
1108 call face_dof_shared%free()
1109
1110 ! Allocate buffer for shared gs-ops
1111 allocate(gs%shared_gs(gs%nshared))
1112
1113 if (gs%nshared .gt. 0) then
1114 call gs_qsort_dofmap(gs%shared_dof_gs, gs%shared_gs_dof, &
1115 gs%nshared, 1, gs%nshared)
1116
1117 call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
1118 gs%nshared_blks, gs%nshared, gs%shared_facet_offset)
1119 end if
1120
1121 contains
1122
1131 function gs_mapping_add_dof(map_, dof, max_id) result(id)
1132 type(htable_i8_t), intent(inout) :: map_
1133 integer(kind=i8), intent(inout) :: dof
1134 integer, intent(inout) :: max_id
1135 integer :: id
1136
1137 if (map_%get(dof, id) .gt. 0) then
1138 max_id = max_id + 1
1139 call map_%set(dof, max_id)
1140 id = max_id
1141 end if
1142
1143 end function gs_mapping_add_dof
1144
1146 recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
1147 integer, intent(inout) :: n
1148 integer, dimension(n), intent(inout) :: dg
1149 integer, dimension(n), intent(inout) :: gd
1150 integer :: lo, hi
1151 integer :: tmp, i, j, pivot
1152
1153 i = lo - 1
1154 j = hi + 1
1155 pivot = dg((lo + hi) / 2)
1156 do
1157 do
1158 i = i + 1
1159 if (dg(i) .ge. pivot) exit
1160 end do
1161
1162 do
1163 j = j - 1
1164 if (dg(j) .le. pivot) exit
1165 end do
1166
1167 if (i .lt. j) then
1168 tmp = dg(i)
1169 dg(i) = dg(j)
1170 dg(j) = tmp
1171
1172 tmp = gd(i)
1173 gd(i) = gd(j)
1174 gd(j) = tmp
1175 else if (i .eq. j) then
1176 i = i + 1
1177 exit
1178 else
1179 exit
1180 end if
1181 end do
1182 if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j)
1183 if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi)
1184
1185 end subroutine gs_qsort_dofmap
1186
1188 subroutine gs_find_blks(dg, blk_len, nblks, n, m)
1189 integer, intent(in) :: n
1190 integer, intent(in) :: m
1191 integer, dimension(n), intent(inout) :: dg
1192 integer, allocatable, intent(inout) :: blk_len(:)
1193 integer, intent(inout) :: nblks
1194 integer :: i, j
1195 integer :: id, count
1196 type(stack_i4_t), target :: blks
1197
1198 call blks%init()
1199 i = 1
1200 do while( i .lt. m)
1201 id = dg(i)
1202 count = 1
1203 j = i
1204 do while ( j+1 .le. n .and. dg(j+1) .eq. id)
1205 j = j + 1
1206 count = count + 1
1207 end do
1208 call blks%push(count)
1209 i = j + 1
1210 end do
1211
1212 select type(blk_array => blks%data)
1213 type is(integer)
1214 nblks = blks%size()
1215 allocate(blk_len(nblks))
1216 do i = 1, nblks
1217 blk_len(i) = blk_array(i)
1218 end do
1219 end select
1220 call blks%free()
1221
1222 end subroutine gs_find_blks
1223
1224 end subroutine gs_init_mapping
1225
1227 subroutine gs_schedule(gs)
1228 type(gs_t), target, intent(inout) :: gs
1229 integer(kind=i8), allocatable :: send_buf(:), recv_buf(:)
1230 integer(kind=i2), allocatable :: shared_flg(:), recv_flg(:)
1231 type(htable_iter_i8_t) :: it
1232 type(stack_i4_t) :: send_pe, recv_pe
1233 type(mpi_status) :: status
1234 type(mpi_request) :: send_req, recv_req
1235 integer :: i, j, max_recv, src, dst, ierr, n_recv
1236 integer :: tmp, shared_gs_id
1237 integer :: nshared_unique
1238
1239 nshared_unique = gs%shared_dofs%num_entries()
1240
1241 call it%init(gs%shared_dofs)
1242 allocate(send_buf(nshared_unique))
1243 i = 1
1244 do while(it%next())
1245 send_buf(i) = it%key()
1246 i = i + 1
1247 end do
1248
1249 call send_pe%init()
1250 call recv_pe%init()
1251
1252
1253 !
1254 ! Schedule exchange of shared dofs
1255 !
1256
1257 call mpi_allreduce(nshared_unique, max_recv, 1, &
1258 mpi_integer, mpi_max, neko_comm, ierr)
1259
1260 allocate(recv_buf(max_recv))
1261 allocate(shared_flg(max_recv))
1262 allocate(recv_flg(max_recv))
1263
1265 do i = 1, size(gs%dofmap%msh%neigh_order)
1266 src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size)
1267 dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size)
1268
1269 if (gs%dofmap%msh%neigh(src)) then
1270 call mpi_irecv(recv_buf, max_recv, mpi_integer8, &
1271 src, 0, neko_comm, recv_req, ierr)
1272 end if
1273
1274 if (gs%dofmap%msh%neigh(dst)) then
1275 call mpi_isend(send_buf, nshared_unique, mpi_integer8, &
1276 dst, 0, neko_comm, send_req, ierr)
1277 end if
1278
1279 if (gs%dofmap%msh%neigh(src)) then
1280 call mpi_wait(recv_req, status, ierr)
1281 call mpi_get_count(status, mpi_integer8, n_recv, ierr)
1282
1283 do j = 1, n_recv
1284 shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
1285 if (shared_flg(j) .eq. 0) then
1287 call gs%comm%recv_dof(src)%push(shared_gs_id)
1288 end if
1289 end do
1290
1291 if (gs%comm%recv_dof(src)%size() .gt. 0) then
1292 call recv_pe%push(src)
1293 end if
1294 end if
1295
1296 if (gs%dofmap%msh%neigh(dst)) then
1297 call mpi_wait(send_req, mpi_status_ignore, ierr)
1298 call mpi_irecv(recv_flg, max_recv, mpi_integer2, &
1299 dst, 0, neko_comm, recv_req, ierr)
1300 end if
1301
1302 if (gs%dofmap%msh%neigh(src)) then
1303 call mpi_isend(shared_flg, n_recv, mpi_integer2, &
1304 src, 0, neko_comm, send_req, ierr)
1305 end if
1306
1307 if (gs%dofmap%msh%neigh(dst)) then
1308 call mpi_wait(recv_req, status, ierr)
1309 call mpi_get_count(status, mpi_integer2, n_recv, ierr)
1310
1311 do j = 1, n_recv
1312 if (recv_flg(j) .eq. 0) then
1313 tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
1315 call gs%comm%send_dof(dst)%push(shared_gs_id)
1316 end if
1317 end do
1318
1319 if (gs%comm%send_dof(dst)%size() .gt. 0) then
1320 call send_pe%push(dst)
1321 end if
1322 end if
1323
1324 if (gs%dofmap%msh%neigh(src)) then
1325 call mpi_wait(send_req, mpi_status_ignore, ierr)
1326 end if
1327
1328 end do
1329
1330 call gs%comm%init(send_pe, recv_pe)
1331
1332 call send_pe%free()
1333 call recv_pe%free()
1334
1335 deallocate(send_buf)
1336 deallocate(recv_flg)
1337 deallocate(shared_flg)
1338 !This arrays seems to take massive amounts of memory...
1339 call gs%shared_dofs%free()
1340
1341 end subroutine gs_schedule
1342
1344 subroutine gs_op_fld(gs, u, op, event)
1345 class(gs_t), intent(inout) :: gs
1346 type(field_t), intent(inout) :: u
1347 type(c_ptr), optional, intent(inout) :: event
1348 integer :: n, op
1349
1350 n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
1351 if (present(event)) then
1352 call gs_op_vector(gs, u%x, n, op, event)
1353 else
1354 call gs_op_vector(gs, u%x, n, op)
1355 end if
1356
1357 end subroutine gs_op_fld
1358
1360 subroutine gs_op_r4(gs, u, n, op, event)
1361 class(gs_t), intent(inout) :: gs
1362 integer, intent(in) :: n
1363 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u
1364 type(c_ptr), optional, intent(inout) :: event
1365 integer :: op
1366
1367 if (present(event)) then
1368 call gs_op_vector(gs, u, n, op, event)
1369 else
1370 call gs_op_vector(gs, u, n, op)
1371 end if
1372
1373 end subroutine gs_op_r4
1374
1376 subroutine gs_op_vector(gs, u, n, op, event)
1377 class(gs_t), intent(inout) :: gs
1378 integer, intent(in) :: n
1379 real(kind=rp), dimension(n), intent(inout) :: u
1380 type(c_ptr), optional, intent(inout) :: event
1381 integer :: m, l, op, lo, so
1382
1383 lo = gs%local_facet_offset
1384 so = -gs%shared_facet_offset
1385 m = gs%nlocal
1386 l = gs%nshared
1387
1388 call profiler_start_region("gather_scatter", 5)
1389 ! Gather shared dofs
1390 if (pe_size .gt. 1) then
1391 call profiler_start_region("gs_nbrecv", 13)
1392 call gs%comm%nbrecv()
1393 call profiler_end_region("gs_nbrecv", 13)
1394 call profiler_start_region("gs_gather_shared", 14)
1395 call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
1396 gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.)
1397 call profiler_end_region("gs_gather_shared", 14)
1398 call profiler_start_region("gs_nbsend", 6)
1399 call gs%comm%nbsend(gs%shared_gs, l, &
1400 gs%bcknd%gather_event, gs%bcknd%gs_stream)
1401 call profiler_end_region("gs_nbsend", 6)
1402
1403 end if
1404
1405 ! Gather-scatter local dofs
1406 call profiler_start_region("gs_local", 12)
1407 call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
1408 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.)
1409 call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
1410 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, .false., c_null_ptr)
1411 call profiler_end_region("gs_local", 12)
1412 ! Scatter shared dofs
1413 if (pe_size .gt. 1) then
1414 call profiler_start_region("gs_nbwait", 7)
1415 call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
1416 call profiler_end_region("gs_nbwait", 7)
1417 call profiler_start_region("gs_scatter_shared", 15)
1418 if (present(event)) then
1419 call gs%bcknd%scatter(gs%shared_gs, l,&
1420 gs%shared_dof_gs, u, n, &
1421 gs%shared_gs_dof, gs%nshared_blks, &
1422 gs%shared_blk_len, .true., event)
1423 else
1424 call gs%bcknd%scatter(gs%shared_gs, l,&
1425 gs%shared_dof_gs, u, n, &
1426 gs%shared_gs_dof, gs%nshared_blks, &
1427 gs%shared_blk_len, .true., c_null_ptr)
1428 end if
1429 call profiler_end_region("gs_scatter_shared", 15)
1430 end if
1431
1432 call profiler_end_region("gather_scatter", 5)
1433
1434 end subroutine gs_op_vector
1435
1436end module gather_scatter
subroutine gs_find_blks(dg, blk_len, nblks, n, m)
Find blocks sharing dofs in non-facet data.
recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
Sort the dof lists based on the dof to gather-scatter list.
integer function gs_mapping_add_dof(map_, dof, max_id)
Register a unique dof Takes the unique id dof and checks if it is in the htable map_ If it is we retu...
Deassociate a Fortran array from a device pointer.
Definition device.F90:89
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Synchronize a device or stream.
Definition device.F90:101
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:51
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:38
integer pe_size
MPI size of communicator.
Definition comm.F90:54
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Defines a field.
Definition field.f90:34
Gather-scatter.
subroutine gs_init_mapping(gs)
Setup mapping of dofs to gather-scatter operations.
subroutine gs_schedule(gs)
Schedule shared gather-scatter operations.
subroutine gs_free(gs)
Deallocate a gather-scatter kernel.
subroutine gs_op_r4(gs, u, n, op, event)
Gather-scatter operation on a rank 4 array.
subroutine gs_init(gs, dofmap, bcknd, comm_bcknd)
Initialize a gather-scatter kernel.
subroutine gs_op_vector(gs, u, n, op, event)
Gather-scatter operation on a vector u with op op.
subroutine gs_op_fld(gs, u, op, event)
Gather-scatter operation on a field u with op op.
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
integer, parameter, public gs_bcknd_cpu
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_sx
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_dev
Definition gs_bcknd.f90:40
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
integer, parameter, public gs_comm_mpigpu
Definition gs_comm.f90:42
integer, parameter, public gs_comm_mpi
Definition gs_comm.f90:42
integer, parameter, public gs_comm_nvshmem
Definition gs_comm.f90:42
integer, parameter, public gs_comm_nccl
Definition gs_comm.f90:42
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
Defines GPU aware MPI gather-scatter communication.
Defines NCCL based gather-scatter communication.
Defines GPU aware MPI gather-scatter communication.
Generic Gather-scatter backend for accelerators.
Definition gs_device.F90:34
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
Generic Gather-scatter backend for NEC Vector Engines.
Definition gs_sx.f90:34
Implements a hash table ADT.
Definition htable.f90:36
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_cuda
integer, parameter, public i2
Definition num_types.f90:5
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:174
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter communication method.
Definition gs_comm.f90:46
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
Gather-scatter backend for offloading devices.
Definition gs_device.F90:47
Gather-scatter communication using device MPI. The arrays are indexed per PE like send_pe and @ recv_...
Gather-scatter communication using NCCL The arrays are indexed per PE like send_pe and @ recv_pe.
Gather-scatter communication using device SHMEM. The arrays are indexed per PE like send_pe and @ rec...
Gather-scatter communication using MPI.
Definition gs_mpi.f90:59
Gather-scatter backend for NEC SX-Aurora.
Definition gs_sx.f90:43
Integer*8 based hash table.
Definition htable.f90:92
Iterator for an integer*8 based hash table.
Definition htable.f90:175
Integer based stack.
Definition stack.f90:63