Neko 0.9.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gather_scatter.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use neko_config
37 use gs_device, only : gs_device_t
38 use gs_sx, only : gs_sx_t
39 use gs_cpu, only : gs_cpu_t
41 use gs_comm, only : gs_comm_t
42 use gs_mpi, only : gs_mpi_t
44 use mesh, only : mesh_t
45 use comm
46 use dofmap, only : dofmap_t
47 use field, only : field_t
48 use num_types, only : rp, dp, i2
50 use stack, only : stack_i4_t
51 use utils, only : neko_error, linear_index
52 use logger, only : neko_log, log_size
54 use device
55 implicit none
56 private
57
58 type, public :: gs_t
59 real(kind=rp), allocatable :: local_gs(:)
60 integer, allocatable :: local_dof_gs(:)
61 integer, allocatable :: local_gs_dof(:)
62 integer, allocatable :: local_blk_len(:)
63 real(kind=rp), allocatable :: shared_gs(:)
64 integer, allocatable :: shared_dof_gs(:)
65 integer, allocatable :: shared_gs_dof(:)
66 integer, allocatable :: shared_blk_len(:)
67 type(dofmap_t), pointer ::dofmap
68 type(htable_i8_t) :: shared_dofs
69 integer :: nlocal
70 integer :: nshared
71 integer :: nlocal_blks
72 integer :: nshared_blks
73 integer :: local_facet_offset
74 integer :: shared_facet_offset
75 class(gs_bcknd_t), allocatable :: bcknd
76 class(gs_comm_t), allocatable :: comm
77 contains
78 procedure, private, pass(gs) :: gs_op_fld
79 procedure, private, pass(gs) :: gs_op_r4
80 procedure, pass(gs) :: gs_op_vector
81 procedure, pass(gs) :: init => gs_init
82 procedure, pass(gs) :: free => gs_free
83 generic :: op => gs_op_fld, gs_op_r4, gs_op_vector
84 end type gs_t
85
87
88contains
89
91 subroutine gs_init(gs, dofmap, bcknd)
92 class(gs_t), intent(inout) :: gs
93 type(dofmap_t), target, intent(inout) :: dofmap
94 character(len=LOG_SIZE) :: log_buf
95 character(len=20) :: bcknd_str
96 integer, optional :: bcknd
97 integer :: i, j, ierr, bcknd_
98 integer(i8) :: glb_nshared, glb_nlocal
99 logical :: use_device_mpi
100 real(kind=rp), allocatable :: tmp(:)
101 type(c_ptr) :: tmp_d = c_null_ptr
102 integer :: strtgy(4) = (/ int(b'00'), int(b'01'), int(b'10'), int(b'11') /)
103 integer :: avg_strtgy, env_len
104 character(len=255) :: env_strtgy
105 real(kind=dp) :: strtgy_time(4)
106
107 call gs%free()
108
109 call neko_log%section('Gather-Scatter')
110
111 gs%dofmap => dofmap
112
113 ! Here one could use some heuristic or autotuning to select comm method,
114 ! such as only using device MPI when there is enough data.
115 !use_device_mpi = NEKO_DEVICE_MPI .and. gs%nshared .gt. 20000
116 use_device_mpi = neko_device_mpi
117
118 if (use_device_mpi) then
119 call neko_log%message('Comm : Device MPI')
120 allocate(gs_device_mpi_t::gs%comm)
121 else
122 call neko_log%message('Comm : MPI')
123 allocate(gs_mpi_t::gs%comm)
124 end if
125
126 call gs%comm%init_dofs()
127 call gs_init_mapping(gs)
128
129 call gs_schedule(gs)
130
131 glb_nlocal = int(gs%nlocal, i8)
132 glb_nshared = int(gs%nshared, i8)
133
134 if (pe_rank .eq. 0) then
135 call mpi_reduce(mpi_in_place, glb_nlocal, 1, &
136 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
137
138 call mpi_reduce(mpi_in_place, glb_nshared, 1, &
139 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
140 else
141 call mpi_reduce(glb_nlocal, glb_nlocal, 1, &
142 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
143
144 call mpi_reduce(glb_nshared, glb_nshared, 1, &
145 mpi_integer8, mpi_sum, 0, neko_comm, ierr)
146 end if
147
148 write(log_buf, '(A,I12)') 'Avg. internal: ', glb_nlocal/pe_size
149 call neko_log%message(log_buf)
150 write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size
151 call neko_log%message(log_buf)
152
153 if (present(bcknd)) then
154 bcknd_ = bcknd
155 else
156 if (neko_bcknd_sx .eq. 1) then
157 bcknd_ = gs_bcknd_sx
158 else if (neko_bcknd_device .eq. 1) then
159 bcknd_ = gs_bcknd_dev
160 else
161 bcknd_ = gs_bcknd_cpu
162 end if
163 end if
164
165 ! Setup Gather-scatter backend
166 select case(bcknd_)
167 case(gs_bcknd_cpu)
168 allocate(gs_cpu_t::gs%bcknd)
169 bcknd_str = ' std'
170 case(gs_bcknd_dev)
171 allocate(gs_device_t::gs%bcknd)
172 if (neko_bcknd_hip .eq. 1) then
173 bcknd_str = ' hip'
174 else if (neko_bcknd_cuda .eq. 1) then
175 bcknd_str = ' cuda'
176 else if (neko_bcknd_opencl .eq. 1) then
177 bcknd_str = ' opencl'
178 end if
179 case(gs_bcknd_sx)
180 allocate(gs_sx_t::gs%bcknd)
181 bcknd_str = ' sx'
182 case default
183 call neko_error('Unknown Gather-scatter backend')
184 end select
185
186 write(log_buf, '(A)') 'Backend : ' // trim(bcknd_str)
187 call neko_log%message(log_buf)
188
189
190
191 call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
192
193 if (use_device_mpi) then
194 select type(b => gs%bcknd)
195 type is (gs_device_t)
196 b%shared_on_host = .false.
197 end select
198
199 if(pe_size .gt. 1) then
200 ! Select fastest device MPI strategy at runtime
201 select type(c => gs%comm)
202 type is (gs_device_mpi_t)
203 call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, env_len)
204 if (env_len .eq. 0) then
205 allocate(tmp(dofmap%size()))
206 call device_map(tmp, tmp_d, dofmap%size())
207 tmp = 1.0_rp
208 call device_memcpy(tmp, tmp_d, dofmap%size(), &
209 host_to_device, sync=.false.)
210 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
211
212 do i = 1, size(strtgy)
213 c%nb_strtgy = strtgy(i)
214 call device_sync
215 call mpi_barrier(neko_comm)
216 strtgy_time(i) = mpi_wtime()
217 do j = 1, 100
218 call gs_op_vector(gs, tmp, dofmap%size(), gs_op_add)
219 end do
220 strtgy_time(i) = (mpi_wtime() - strtgy_time(i)) / 100d0
221 end do
222
223 call device_deassociate(tmp)
224 call device_free(tmp_d)
225 deallocate(tmp)
226
227 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
228
229 avg_strtgy = minloc(strtgy_time, 1)
230 call mpi_allreduce(mpi_in_place, avg_strtgy, 1, &
231 mpi_integer, mpi_sum, neko_comm)
232 avg_strtgy = avg_strtgy / pe_size
233
234 write(log_buf, '(A,B0.2,A)') 'Avg. strtgy : [', &
235 strtgy(avg_strtgy),']'
236
237 else
238 read(env_strtgy(1:env_len), *) i
239
240 if (i .lt. 1 .or. i .gt. 4) then
241 call neko_error('Invalid gs sync strtgy')
242 end if
243
244 c%nb_strtgy = strtgy(i)
245 avg_strtgy = i
246
247 write(log_buf, '(A,B0.2,A)') 'Env. strtgy : [', &
248 strtgy(avg_strtgy),']'
249 end if
250
251 call neko_log%message(log_buf)
252
253 end select
254 end if
255 end if
256
257 call neko_log%end_section()
258
259 end subroutine gs_init
260
262 subroutine gs_free(gs)
263 class(gs_t), intent(inout) :: gs
264
265 nullify(gs%dofmap)
266
267 if (allocated(gs%local_gs)) then
268 deallocate(gs%local_gs)
269 end if
270
271 if (allocated(gs%local_dof_gs)) then
272 deallocate(gs%local_dof_gs)
273 end if
274
275 if (allocated(gs%local_gs_dof)) then
276 deallocate(gs%local_gs_dof)
277 end if
278
279 if (allocated(gs%local_blk_len)) then
280 deallocate(gs%local_blk_len)
281 end if
282
283 if (allocated(gs%shared_gs)) then
284 deallocate(gs%shared_gs)
285 end if
286
287 if (allocated(gs%shared_dof_gs)) then
288 deallocate(gs%shared_dof_gs)
289 end if
290
291 if (allocated(gs%shared_gs_dof)) then
292 deallocate(gs%shared_gs_dof)
293 end if
294
295 if (allocated(gs%shared_blk_len)) then
296 deallocate(gs%shared_blk_len)
297 end if
298
299 gs%nlocal = 0
300 gs%nshared = 0
301 gs%nlocal_blks = 0
302 gs%nshared_blks = 0
303
304 call gs%shared_dofs%free()
305
306 if (allocated(gs%bcknd)) then
307 call gs%bcknd%free()
308 deallocate(gs%bcknd)
309 end if
310
311 if (allocated(gs%comm)) then
312 call gs%comm%free()
313 deallocate(gs%comm)
314 end if
315
316 end subroutine gs_free
317
319 subroutine gs_init_mapping(gs)
320 type(gs_t), target, intent(inout) :: gs
321 type(mesh_t), pointer :: msh
322 type(dofmap_t), pointer :: dofmap
323 type(stack_i4_t), target :: local_dof, dof_local, shared_dof, dof_shared
324 type(stack_i4_t), target :: local_face_dof, face_dof_local
325 type(stack_i4_t), target :: shared_face_dof, face_dof_shared
326 integer :: i, j, k, l, lx, ly, lz, max_id, max_sid, id, lid, dm_size
327 type(htable_i8_t) :: dm
328 type(htable_i8_t), pointer :: sdm
329
330 dofmap => gs%dofmap
331 msh => dofmap%msh
332 sdm => gs%shared_dofs
333
334 lx = dofmap%Xh%lx
335 ly = dofmap%Xh%ly
336 lz = dofmap%Xh%lz
337 dm_size = dofmap%size()/lx
338
339 call dm%init(dm_size, i)
342 call sdm%init(dofmap%size(), i)
343
344
345 call local_dof%init()
346 call dof_local%init()
347
348 call local_face_dof%init()
349 call face_dof_local%init()
350
351 call shared_dof%init()
352 call dof_shared%init()
353
354 call shared_face_dof%init()
355 call face_dof_shared%init()
356
357 !
358 ! Setup mapping for dofs points
359 !
360
361 max_id = 0
362 max_sid = 0
363 do i = 1, msh%nelv
364 lid = linear_index(1, 1, 1, i, lx, ly, lz)
365 if (dofmap%shared_dof(1, 1, 1, i)) then
366 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, 1, i), max_sid)
367 call shared_dof%push(id)
368 call dof_shared%push(lid)
369 else
370 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, 1, i), max_id)
371 call local_dof%push(id)
372 call dof_local%push(lid)
373 end if
374
375 lid = linear_index(lx, 1, 1, i, lx, ly, lz)
376 if (dofmap%shared_dof(lx, 1, 1, i)) then
377 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, 1, i), max_sid)
378 call shared_dof%push(id)
379 call dof_shared%push(lid)
380 else
381 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, 1, i), max_id)
382 call local_dof%push(id)
383 call dof_local%push(lid)
384 end if
385
386 lid = linear_index(1, ly, 1, i, lx, ly, lz)
387 if (dofmap%shared_dof(1, ly, 1, i)) then
388 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, 1, i), max_sid)
389 call shared_dof%push(id)
390 call dof_shared%push(lid)
391 else
392 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, 1, i), max_id)
393 call local_dof%push(id)
394 call dof_local%push(lid)
395 end if
396
397 lid = linear_index(lx, ly, 1, i, lx, ly, lz)
398 if (dofmap%shared_dof(lx, ly, 1, i)) then
399 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, 1, i), max_sid)
400 call shared_dof%push(id)
401 call dof_shared%push(lid)
402 else
403 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, 1, i), max_id)
404 call local_dof%push(id)
405 call dof_local%push(lid)
406 end if
407 if (lz .gt. 1) then
408 lid = linear_index(1, 1, lz, i, lx, ly, lz)
409 if (dofmap%shared_dof(1, 1, lz, i)) then
410 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, lz, i), max_sid)
411 call shared_dof%push(id)
412 call dof_shared%push(lid)
413 else
414 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, lz, i), max_id)
415 call local_dof%push(id)
416 call dof_local%push(lid)
417 end if
418
419 lid = linear_index(lx, 1, lz, i, lx, ly, lz)
420 if (dofmap%shared_dof(lx, 1, lz, i)) then
421 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, lz, i), max_sid)
422 call shared_dof%push(id)
423 call dof_shared%push(lid)
424 else
425 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, lz, i), max_id)
426 call local_dof%push(id)
427 call dof_local%push(lid)
428 end if
429
430 lid = linear_index(1, ly, lz, i, lx, ly, lz)
431 if (dofmap%shared_dof(1, ly, lz, i)) then
432 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, lz, i), max_sid)
433 call shared_dof%push(id)
434 call dof_shared%push(lid)
435 else
436 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, lz, i), max_id)
437 call local_dof%push(id)
438 call dof_local%push(lid)
439 end if
440
441 lid = linear_index(lx, ly, lz, i, lx, ly, lz)
442 if (dofmap%shared_dof(lx, ly, lz, i)) then
443 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, lz, i), max_sid)
444 call shared_dof%push(id)
445 call dof_shared%push(lid)
446 else
447 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, lz, i), max_id)
448 call local_dof%push(id)
449 call dof_local%push(lid)
450 end if
451 end if
452 end do
453
454 ! Clear local dofmap table
455 call dm%clear()
456
457 if (lz .gt. 1) then
458 !
459 ! Setup mapping for dofs on edges
460 !
461 do i = 1, msh%nelv
462
463 !
464 ! dofs on edges in x-direction
465 !
466 if (dofmap%shared_dof(2, 1, 1, i)) then
467 do j = 2, lx - 1
468 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
469 call shared_dof%push(id)
470 id = linear_index(j, 1, 1, i, lx, ly, lz)
471 call dof_shared%push(id)
472 end do
473 else
474 do j = 2, lx - 1
475 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
476 call local_dof%push(id)
477 id = linear_index(j, 1, 1, i, lx, ly, lz)
478 call dof_local%push(id)
479 end do
480 end if
481 if (dofmap%shared_dof(2, 1, lz, i)) then
482 do j = 2, lx - 1
483 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, lz, i), max_sid)
484 call shared_dof%push(id)
485 id = linear_index(j, 1, lz, i, lx, ly, lz)
486 call dof_shared%push(id)
487 end do
488 else
489 do j = 2, lx - 1
490 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, lz, i), max_id)
491 call local_dof%push(id)
492 id = linear_index(j, 1, lz, i, lx, ly, lz)
493 call dof_local%push(id)
494 end do
495 end if
496
497 if (dofmap%shared_dof(2, ly, 1, i)) then
498 do j = 2, lx - 1
499 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
500 call shared_dof%push(id)
501 id = linear_index(j, ly, 1, i, lx, ly, lz)
502 call dof_shared%push(id)
503 end do
504
505 else
506 do j = 2, lx - 1
507 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
508 call local_dof%push(id)
509 id = linear_index(j, ly, 1, i, lx, ly, lz)
510 call dof_local%push(id)
511 end do
512 end if
513 if (dofmap%shared_dof(2, ly, lz, i)) then
514 do j = 2, lx - 1
515 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, lz, i), max_sid)
516 call shared_dof%push(id)
517 id = linear_index(j, ly, lz, i, lx, ly, lz)
518 call dof_shared%push(id)
519 end do
520 else
521 do j = 2, lx - 1
522 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, lz, i), max_id)
523 call local_dof%push(id)
524 id = linear_index(j, ly, lz, i, lx, ly, lz)
525 call dof_local%push(id)
526 end do
527 end if
528
529 !
530 ! dofs on edges in y-direction
531 !
532 if (dofmap%shared_dof(1, 2, 1, i)) then
533 do k = 2, ly - 1
534 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
535 call shared_dof%push(id)
536 id = linear_index(1, k, 1, i, lx, ly, lz)
537 call dof_shared%push(id)
538 end do
539 else
540 do k = 2, ly - 1
541 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
542 call local_dof%push(id)
543 id = linear_index(1, k, 1, i, lx, ly, lz)
544 call dof_local%push(id)
545 end do
546 end if
547 if (dofmap%shared_dof(1, 2, lz, i)) then
548 do k = 2, ly - 1
549 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, lz, i), max_sid)
550 call shared_dof%push(id)
551 id = linear_index(1, k, lz, i, lx, ly, lz)
552 call dof_shared%push(id)
553 end do
554 else
555 do k = 2, ly - 1
556 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, lz, i), max_id)
557 call local_dof%push(id)
558 id = linear_index(1, k, lz, i, lx, ly, lz)
559 call dof_local%push(id)
560 end do
561 end if
562
563 if (dofmap%shared_dof(lx, 2, 1, i)) then
564 do k = 2, ly - 1
565 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
566 call shared_dof%push(id)
567 id = linear_index(lx, k, 1, i, lx, ly, lz)
568 call dof_shared%push(id)
569 end do
570 else
571 do k = 2, ly - 1
572 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
573 call local_dof%push(id)
574 id = linear_index(lx, k, 1, i, lx, ly, lz)
575 call dof_local%push(id)
576 end do
577 end if
578 if (dofmap%shared_dof(lx, 2, lz, i)) then
579 do k = 2, ly - 1
580 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, lz, i), max_sid)
581 call shared_dof%push(id)
582 id = linear_index(lx, k, lz, i, lx, ly, lz)
583 call dof_shared%push(id)
584 end do
585 else
586 do k = 2, ly - 1
587 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, lz, i), max_id)
588 call local_dof%push(id)
589 id = linear_index(lx, k, lz, i, lx, ly, lz)
590 call dof_local%push(id)
591 end do
592 end if
593 !
594 ! dofs on edges in z-direction
595 !
596 if (dofmap%shared_dof(1, 1, 2, i)) then
597 do l = 2, lz - 1
598 id = gs_mapping_add_dof(sdm, dofmap%dof(1, 1, l, i), max_sid)
599 call shared_dof%push(id)
600 id = linear_index(1, 1, l, i, lx, ly, lz)
601 call dof_shared%push(id)
602 end do
603 else
604 do l = 2, lz - 1
605 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id)
606 call local_dof%push(id)
607 id = linear_index(1, 1, l, i, lx, ly, lz)
608 call dof_local%push(id)
609 end do
610 end if
611
612 if (dofmap%shared_dof(lx, 1, 2, i)) then
613 do l = 2, lz - 1
614 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid)
615 call shared_dof%push(id)
616 id = linear_index(lx, 1, l, i, lx, ly, lz)
617 call dof_shared%push(id)
618 end do
619 else
620 do l = 2, lz - 1
621 id = gs_mapping_add_dof(dm, dofmap%dof(lx, 1, l, i), max_id)
622 call local_dof%push(id)
623 id = linear_index(lx, 1, l, i, lx, ly, lz)
624 call dof_local%push(id)
625 end do
626 end if
627
628 if (dofmap%shared_dof(1, ly, 2, i)) then
629 do l = 2, lz - 1
630 id = gs_mapping_add_dof(sdm, dofmap%dof(1, ly, l, i), max_sid)
631 call shared_dof%push(id)
632 id = linear_index(1, ly, l, i, lx, ly, lz)
633 call dof_shared%push(id)
634 end do
635 else
636 do l = 2, lz - 1
637 id = gs_mapping_add_dof(dm, dofmap%dof(1, ly, l, i), max_id)
638 call local_dof%push(id)
639 id = linear_index(1, ly, l, i, lx, ly, lz)
640 call dof_local%push(id)
641 end do
642 end if
643
644 if (dofmap%shared_dof(lx, ly, 2, i)) then
645 do l = 2, lz - 1
646 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid)
647 call shared_dof%push(id)
648 id = linear_index(lx, ly, l, i, lx, ly, lz)
649 call dof_shared%push(id)
650 end do
651 else
652 do l = 2, lz - 1
653 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id)
654 call local_dof%push(id)
655 id = linear_index(lx, ly, l, i, lx, ly, lz)
656 call dof_local%push(id)
657 end do
658 end if
659 end do
660 end if
661
662 ! Clear local dofmap table
663 call dm%clear()
664
665 !
666 ! Setup mapping for dofs on facets
667 !
668 if (lz .eq. 1) then
669 do i = 1, msh%nelv
670
671 !
672 ! dofs on edges in x-direction
673 !
674 if (msh%facet_neigh(3, i) .ne. 0) then
675 if (dofmap%shared_dof(2, 1, 1, i)) then
676 do j = 2, lx - 1
677 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, 1, i), max_sid)
678 call shared_face_dof%push(id)
679 id = linear_index(j, 1, 1, i, lx, ly, lz)
680 call face_dof_shared%push(id)
681 end do
682 else
683 do j = 2, lx - 1
684 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, 1, i), max_id)
685 call local_face_dof%push(id)
686 id = linear_index(j, 1, 1, i, lx, ly, lz)
687 call face_dof_local%push(id)
688 end do
689 end if
690 end if
691
692 if (msh%facet_neigh(4, i) .ne. 0) then
693 if (dofmap%shared_dof(2, ly, 1, i)) then
694 do j = 2, lx - 1
695 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, 1, i), max_sid)
696 call shared_face_dof%push(id)
697 id = linear_index(j, ly, 1, i, lx, ly, lz)
698 call face_dof_shared%push(id)
699 end do
700
701 else
702 do j = 2, lx - 1
703 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
704 call local_face_dof%push(id)
705 id = linear_index(j, ly, 1, i, lx, ly, lz)
706 call face_dof_local%push(id)
707 end do
708 end if
709 end if
710
711 !
712 ! dofs on edges in y-direction
713 !
714 if (msh%facet_neigh(1, i) .ne. 0) then
715 if (dofmap%shared_dof(1, 2, 1, i)) then
716 do k = 2, ly - 1
717 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, 1, i), max_sid)
718 call shared_face_dof%push(id)
719 id = linear_index(1, k, 1, i, lx, ly, lz)
720 call face_dof_shared%push(id)
721 end do
722 else
723 do k = 2, ly - 1
724 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, 1, i), max_id)
725 call local_face_dof%push(id)
726 id = linear_index(1, k, 1, i, lx, ly, lz)
727 call face_dof_local%push(id)
728 end do
729 end if
730 end if
731
732 if (msh%facet_neigh(2, i) .ne. 0) then
733 if (dofmap%shared_dof(lx, 2, 1, i)) then
734 do k = 2, ly - 1
735 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, 1, i), max_sid)
736 call shared_face_dof%push(id)
737 id = linear_index(lx, k, 1, i, lx, ly, lz)
738 call face_dof_shared%push(id)
739 end do
740 else
741 do k = 2, ly - 1
742 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, 1, i), max_id)
743 call local_face_dof%push(id)
744 id = linear_index(lx, k, 1, i, lx, ly, lz)
745 call face_dof_local%push(id)
746 end do
747 end if
748 end if
749 end do
750 else
751 do i = 1, msh%nelv
752
753 ! Facets in x-direction (s, t)-plane
754 if (msh%facet_neigh(1, i) .ne. 0) then
755 if (dofmap%shared_dof(1, 2, 2, i)) then
756 do l = 2, lz - 1
757 do k = 2, ly - 1
758 id = gs_mapping_add_dof(sdm, dofmap%dof(1, k, l, i), max_sid)
759 call shared_face_dof%push(id)
760 id = linear_index(1, k, l, i, lx, ly, lz)
761 call face_dof_shared%push(id)
762 end do
763 end do
764 else
765 do l = 2, lz - 1
766 do k = 2, ly - 1
767 id = gs_mapping_add_dof(dm, dofmap%dof(1, k, l, i), max_id)
768 call local_face_dof%push(id)
769 id = linear_index(1, k, l, i, lx, ly, lz)
770 call face_dof_local%push(id)
771 end do
772 end do
773 end if
774 end if
775
776 if (msh%facet_neigh(2, i) .ne. 0) then
777 if (dofmap%shared_dof(lx, 2, 2, i)) then
778 do l = 2, lz - 1
779 do k = 2, ly - 1
780 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, k, l, i), max_sid)
781 call shared_face_dof%push(id)
782 id = linear_index(lx, k, l, i, lx, ly, lz)
783 call face_dof_shared%push(id)
784 end do
785 end do
786 else
787 do l = 2, lz - 1
788 do k = 2, ly - 1
789 id = gs_mapping_add_dof(dm, dofmap%dof(lx, k, l, i), max_id)
790 call local_face_dof%push(id)
791 id = linear_index(lx, k, l, i, lx, ly, lz)
792 call face_dof_local%push(id)
793 end do
794 end do
795 end if
796 end if
797
798 ! Facets in y-direction (r, t)-plane
799 if (msh%facet_neigh(3, i) .ne. 0) then
800 if (dofmap%shared_dof(2, 1, 2, i)) then
801 do l = 2, lz - 1
802 do j = 2, lx - 1
803 id = gs_mapping_add_dof(sdm, dofmap%dof(j, 1, l, i), max_sid)
804 call shared_face_dof%push(id)
805 id = linear_index(j, 1, l, i, lx, ly, lz)
806 call face_dof_shared%push(id)
807 end do
808 end do
809 else
810 do l = 2, lz - 1
811 do j = 2, lx - 1
812 id = gs_mapping_add_dof(dm, dofmap%dof(j, 1, l, i), max_id)
813 call local_face_dof%push(id)
814 id = linear_index(j, 1, l, i, lx, ly, lz)
815 call face_dof_local%push(id)
816 end do
817 end do
818 end if
819 end if
820
821 if (msh%facet_neigh(4, i) .ne. 0) then
822 if (dofmap%shared_dof(2, ly, 2, i)) then
823 do l = 2, lz - 1
824 do j = 2, lx - 1
825 id = gs_mapping_add_dof(sdm, dofmap%dof(j, ly, l, i), max_sid)
826 call shared_face_dof%push(id)
827 id = linear_index(j, ly, l, i, lx, ly, lz)
828 call face_dof_shared%push(id)
829 end do
830 end do
831 else
832 do l = 2, lz - 1
833 do j = 2, lx - 1
834 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, l, i), max_id)
835 call local_face_dof%push(id)
836 id = linear_index(j, ly, l, i, lx, ly, lz)
837 call face_dof_local%push(id)
838 end do
839 end do
840 end if
841 end if
842
843 ! Facets in z-direction (r, s)-plane
844 if (msh%facet_neigh(5, i) .ne. 0) then
845 if (dofmap%shared_dof(2, 2, 1, i)) then
846 do k = 2, ly - 1
847 do j = 2, lx - 1
848 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, 1, i), max_sid)
849 call shared_face_dof%push(id)
850 id = linear_index(j, k, 1, i, lx, ly, lz)
851 call face_dof_shared%push(id)
852 end do
853 end do
854 else
855 do k = 2, ly - 1
856 do j = 2, lx - 1
857 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, 1, i), max_id)
858 call local_face_dof%push(id)
859 id = linear_index(j, k, 1, i, lx, ly, lz)
860 call face_dof_local%push(id)
861 end do
862 end do
863 end if
864 end if
865
866 if (msh%facet_neigh(6, i) .ne. 0) then
867 if (dofmap%shared_dof(2, 2, lz, i)) then
868 do k = 2, ly - 1
869 do j = 2, lx - 1
870 id = gs_mapping_add_dof(sdm, dofmap%dof(j, k, lz, i), max_sid)
871 call shared_face_dof%push(id)
872 id = linear_index(j, k, lz, i, lx, ly, lz)
873 call face_dof_shared%push(id)
874 end do
875 end do
876 else
877 do k = 2, ly - 1
878 do j = 2, lx - 1
879 id = gs_mapping_add_dof(dm, dofmap%dof(j, k, lz, i), max_id)
880 call local_face_dof%push(id)
881 id = linear_index(j, k, lz, i, lx, ly, lz)
882 call face_dof_local%push(id)
883 end do
884 end do
885 end if
886 end if
887 end do
888 end if
889
890
891 call dm%free()
892
893 gs%nlocal = local_dof%size() + local_face_dof%size()
894 gs%local_facet_offset = local_dof%size() + 1
895
896 ! Finalize local dof to gather-scatter index
897 allocate(gs%local_dof_gs(gs%nlocal))
898
899 ! Add dofs on points and edges
900
901 ! We should use the %array() procedure, which works great for
902 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
903 ! certain data types
904 select type(dof_array => local_dof%data)
905 type is (integer)
906 j = local_dof%size()
907 do i = 1, j
908 gs%local_dof_gs(i) = dof_array(i)
909 end do
910 end select
911 call local_dof%free()
912
913 ! Add dofs on faces
914
915 ! We should use the %array() procedure, which works great for
916 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
917 ! certain data types
918 select type(dof_array => local_face_dof%data)
919 type is (integer)
920 do i = 1, local_face_dof%size()
921 gs%local_dof_gs(i + j) = dof_array(i)
922 end do
923 end select
924 call local_face_dof%free()
925
926 ! Finalize local gather-scatter index to dof
927 allocate(gs%local_gs_dof(gs%nlocal))
928
929 ! Add gather-scatter index on points and edges
930
931 ! We should use the %array() procedure, which works great for
932 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
933 ! certain data types
934 select type(dof_array => dof_local%data)
935 type is (integer)
936 j = dof_local%size()
937 do i = 1, j
938 gs%local_gs_dof(i) = dof_array(i)
939 end do
940 end select
941 call dof_local%free()
942
943 ! We should use the %array() procedure, which works great for
944 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
945 ! certain data types
946 select type(dof_array => face_dof_local%data)
947 type is (integer)
948 do i = 1, face_dof_local%size()
949 gs%local_gs_dof(i+j) = dof_array(i)
950 end do
951 end select
952 call face_dof_local%free()
953
954 call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, &
955 gs%nlocal, 1, gs%nlocal)
956
957 call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, &
958 gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
959
960 ! Allocate buffer for local gs-ops
961 allocate(gs%local_gs(gs%nlocal))
962
963 gs%nshared = shared_dof%size() + shared_face_dof%size()
964 gs%shared_facet_offset = shared_dof%size() + 1
965
966 ! Finalize shared dof to gather-scatter index
967 allocate(gs%shared_dof_gs(gs%nshared))
968
969 ! Add shared dofs on points and edges
970
971 ! We should use the %array() procedure, which works great for
972 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
973 ! certain data types
974 select type(dof_array => shared_dof%data)
975 type is (integer)
976 j = shared_dof%size()
977 do i = 1, j
978 gs%shared_dof_gs(i) = dof_array(i)
979 end do
980 end select
981 call shared_dof%free()
982
983 ! Add shared dofs on faces
984
985 ! We should use the %array() procedure, which works great for
986 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
987 ! certain data types
988 select type(dof_array => shared_face_dof%data)
989 type is (integer)
990 do i = 1, shared_face_dof%size()
991 gs%shared_dof_gs(i + j) = dof_array(i)
992 end do
993 end select
994 call shared_face_dof%free()
995
996 ! Finalize shared gather-scatter index to dof
997 allocate(gs%shared_gs_dof(gs%nshared))
998
999 ! Add dofs on points and edges
1000
1001 ! We should use the %array() procedure, which works great for
1002 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1003 ! certain data types
1004 select type(dof_array => dof_shared%data)
1005 type is(integer)
1006 j = dof_shared%size()
1007 do i = 1, j
1008 gs%shared_gs_dof(i) = dof_array(i)
1009 end do
1010 end select
1011 call dof_shared%free()
1012
1013 ! We should use the %array() procedure, which works great for
1014 ! GNU, Intel and NEC, but it breaks horribly on Cray when using
1015 ! certain data types
1016 select type(dof_array => face_dof_shared%data)
1017 type is (integer)
1018 do i = 1, face_dof_shared%size()
1019 gs%shared_gs_dof(i + j) = dof_array(i)
1020 end do
1021 end select
1022 call face_dof_shared%free()
1023
1024 ! Allocate buffer for shared gs-ops
1025 allocate(gs%shared_gs(gs%nshared))
1026
1027 if (gs%nshared .gt. 0) then
1028 call gs_qsort_dofmap(gs%shared_dof_gs, gs%shared_gs_dof, &
1029 gs%nshared, 1, gs%nshared)
1030
1031 call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
1032 gs%nshared_blks, gs%nshared, gs%shared_facet_offset)
1033 end if
1034
1035 contains
1036
1038 function gs_mapping_add_dof(map_, dof, max_id) result(id)
1039 type(htable_i8_t), intent(inout) :: map_
1040 integer(kind=i8), intent(inout) :: dof
1041 integer, intent(inout) :: max_id
1042 integer :: id
1043
1044 if (map_%get(dof, id) .gt. 0) then
1045 max_id = max_id + 1
1046 call map_%set(dof, max_id)
1047 id = max_id
1048 end if
1049
1050 end function gs_mapping_add_dof
1051
1053 recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
1054 integer, intent(inout) :: n
1055 integer, dimension(n), intent(inout) :: dg
1056 integer, dimension(n), intent(inout) :: gd
1057 integer :: lo, hi
1058 integer :: tmp, i, j, pivot
1059
1060 i = lo - 1
1061 j = hi + 1
1062 pivot = dg((lo + hi) / 2)
1063 do
1064 do
1065 i = i + 1
1066 if (dg(i) .ge. pivot) exit
1067 end do
1068
1069 do
1070 j = j - 1
1071 if (dg(j) .le. pivot) exit
1072 end do
1073
1074 if (i .lt. j) then
1075 tmp = dg(i)
1076 dg(i) = dg(j)
1077 dg(j) = tmp
1078
1079 tmp = gd(i)
1080 gd(i) = gd(j)
1081 gd(j) = tmp
1082 else if (i .eq. j) then
1083 i = i + 1
1084 exit
1085 else
1086 exit
1087 end if
1088 end do
1089 if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j)
1090 if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi)
1091
1092 end subroutine gs_qsort_dofmap
1093
1095 subroutine gs_find_blks(dg, blk_len, nblks, n, m)
1096 integer, intent(in) :: n
1097 integer, intent(in) :: m
1098 integer, dimension(n), intent(inout) :: dg
1099 integer, allocatable, intent(inout) :: blk_len(:)
1100 integer, intent(inout) :: nblks
1101 integer :: i, j
1102 integer :: id, count
1103 type(stack_i4_t), target :: blks
1104
1105 call blks%init()
1106 i = 1
1107 do while( i .lt. m)
1108 id = dg(i)
1109 count = 1
1110 j = i
1111 do while ( j+1 .le. n .and. dg(j+1) .eq. id)
1112 j = j + 1
1113 count = count + 1
1114 end do
1115 call blks%push(count)
1116 i = j + 1
1117 end do
1118
1119 select type(blk_array => blks%data)
1120 type is(integer)
1121 nblks = blks%size()
1122 allocate(blk_len(nblks))
1123 do i = 1, nblks
1124 blk_len(i) = blk_array(i)
1125 end do
1126 end select
1127 call blks%free()
1128
1129 end subroutine gs_find_blks
1130
1131 end subroutine gs_init_mapping
1132
1134 subroutine gs_schedule(gs)
1135 type(gs_t), target, intent(inout) :: gs
1136 integer(kind=i8), allocatable :: send_buf(:), recv_buf(:)
1137 integer(kind=i2), allocatable :: shared_flg(:), recv_flg(:)
1138 type(htable_iter_i8_t) :: it
1139 type(stack_i4_t) :: send_pe, recv_pe
1140 type(mpi_status) :: status
1141 type(mpi_request) :: send_req, recv_req
1142 integer :: i, j, max_recv, src, dst, ierr, n_recv
1143 integer :: tmp, shared_gs_id
1144 integer :: nshared_unique
1145
1146 nshared_unique = gs%shared_dofs%num_entries()
1147
1148 call it%init(gs%shared_dofs)
1149 allocate(send_buf(nshared_unique))
1150 i = 1
1151 do while(it%next())
1152 send_buf(i) = it%key()
1153 i = i + 1
1154 end do
1155
1156 call send_pe%init()
1157 call recv_pe%init()
1158
1159
1160 !
1161 ! Schedule exchange of shared dofs
1162 !
1163
1164 call mpi_allreduce(nshared_unique, max_recv, 1, &
1165 mpi_integer, mpi_max, neko_comm, ierr)
1166
1167 allocate(recv_buf(max_recv))
1168 allocate(shared_flg(max_recv))
1169 allocate(recv_flg(max_recv))
1170
1172 do i = 1, size(gs%dofmap%msh%neigh_order)
1173 src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size)
1174 dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size)
1175
1176 if (gs%dofmap%msh%neigh(src)) then
1177 call mpi_irecv(recv_buf, max_recv, mpi_integer8, &
1178 src, 0, neko_comm, recv_req, ierr)
1179 end if
1180
1181 if (gs%dofmap%msh%neigh(dst)) then
1182 call mpi_isend(send_buf, nshared_unique, mpi_integer8, &
1183 dst, 0, neko_comm, send_req, ierr)
1184 end if
1185
1186 if (gs%dofmap%msh%neigh(src)) then
1187 call mpi_wait(recv_req, status, ierr)
1188 call mpi_get_count(status, mpi_integer8, n_recv, ierr)
1189
1190 do j = 1, n_recv
1191 shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
1192 if (shared_flg(j) .eq. 0) then
1194 call gs%comm%recv_dof(src)%push(shared_gs_id)
1195 end if
1196 end do
1197
1198 if (gs%comm%recv_dof(src)%size() .gt. 0) then
1199 call recv_pe%push(src)
1200 end if
1201 end if
1202
1203 if (gs%dofmap%msh%neigh(dst)) then
1204 call mpi_wait(send_req, mpi_status_ignore, ierr)
1205 call mpi_irecv(recv_flg, max_recv, mpi_integer2, &
1206 dst, 0, neko_comm, recv_req, ierr)
1207 end if
1208
1209 if (gs%dofmap%msh%neigh(src)) then
1210 call mpi_isend(shared_flg, n_recv, mpi_integer2, &
1211 src, 0, neko_comm, send_req, ierr)
1212 end if
1213
1214 if (gs%dofmap%msh%neigh(dst)) then
1215 call mpi_wait(recv_req, status, ierr)
1216 call mpi_get_count(status, mpi_integer2, n_recv, ierr)
1217
1218 do j = 1, n_recv
1219 if (recv_flg(j) .eq. 0) then
1220 tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
1222 call gs%comm%send_dof(dst)%push(shared_gs_id)
1223 end if
1224 end do
1225
1226 if (gs%comm%send_dof(dst)%size() .gt. 0) then
1227 call send_pe%push(dst)
1228 end if
1229 end if
1230
1231 if (gs%dofmap%msh%neigh(src)) then
1232 call mpi_wait(send_req, mpi_status_ignore, ierr)
1233 end if
1234
1235 end do
1236
1237 call gs%comm%init(send_pe, recv_pe)
1238
1239 call send_pe%free()
1240 call recv_pe%free()
1241
1242 deallocate(send_buf)
1243 deallocate(recv_flg)
1244 deallocate(shared_flg)
1245 !This arrays seems to take massive amounts of memory...
1246 call gs%shared_dofs%free()
1247
1248 end subroutine gs_schedule
1249
1251 subroutine gs_op_fld(gs, u, op, event)
1252 class(gs_t), intent(inout) :: gs
1253 type(field_t), intent(inout) :: u
1254 type(c_ptr), optional, intent(inout) :: event
1255 integer :: n, op
1256
1257 n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
1258 if (present(event)) then
1259 call gs_op_vector(gs, u%x, n, op, event)
1260 else
1261 call gs_op_vector(gs, u%x, n, op)
1262 end if
1263
1264 end subroutine gs_op_fld
1265
1267 subroutine gs_op_r4(gs, u, n, op, event)
1268 class(gs_t), intent(inout) :: gs
1269 integer, intent(in) :: n
1270 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u
1271 type(c_ptr), optional, intent(inout) :: event
1272 integer :: op
1273
1274 if (present(event)) then
1275 call gs_op_vector(gs, u, n, op, event)
1276 else
1277 call gs_op_vector(gs, u, n, op)
1278 end if
1279
1280 end subroutine gs_op_r4
1281
1283 subroutine gs_op_vector(gs, u, n, op, event)
1284 class(gs_t), intent(inout) :: gs
1285 integer, intent(in) :: n
1286 real(kind=rp), dimension(n), intent(inout) :: u
1287 type(c_ptr), optional, intent(inout) :: event
1288 integer :: m, l, op, lo, so
1289
1290 lo = gs%local_facet_offset
1291 so = -gs%shared_facet_offset
1292 m = gs%nlocal
1293 l = gs%nshared
1294
1295 call profiler_start_region("gather_scatter", 5)
1296 ! Gather shared dofs
1297 if (pe_size .gt. 1) then
1298 call profiler_start_region("gs_nbrecv", 13)
1299 call gs%comm%nbrecv()
1300 call profiler_end_region("gs_nbrecv", 13)
1301 call profiler_start_region("gs_gather_shared", 14)
1302 call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
1303 gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.)
1304 call profiler_end_region("gs_gather_shared", 14)
1305 call profiler_start_region("gs_nbsend", 6)
1306 call gs%comm%nbsend(gs%shared_gs, l, &
1307 gs%bcknd%gather_event, gs%bcknd%gs_stream)
1308 call profiler_end_region("gs_nbsend", 6)
1309
1310 end if
1311
1312 ! Gather-scatter local dofs
1313 call profiler_start_region("gs_local", 12)
1314 call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
1315 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.)
1316 call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
1317 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, .false., c_null_ptr)
1318 call profiler_end_region("gs_local", 12)
1319 ! Scatter shared dofs
1320 if (pe_size .gt. 1) then
1321 call profiler_start_region("gs_nbwait", 7)
1322 call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
1323 call profiler_end_region("gs_nbwait", 7)
1324 call profiler_start_region("gs_scatter_shared", 15)
1325 if (present(event)) then
1326 call gs%bcknd%scatter(gs%shared_gs, l,&
1327 gs%shared_dof_gs, u, n, &
1328 gs%shared_gs_dof, gs%nshared_blks, &
1329 gs%shared_blk_len, .true., event)
1330 else
1331 call gs%bcknd%scatter(gs%shared_gs, l,&
1332 gs%shared_dof_gs, u, n, &
1333 gs%shared_gs_dof, gs%nshared_blks, &
1334 gs%shared_blk_len, .true., c_null_ptr)
1335 end if
1336 call profiler_end_region("gs_scatter_shared", 15)
1337 end if
1338
1339 call profiler_end_region("gather_scatter", 5)
1340
1341 end subroutine gs_op_vector
1342
1343end module gather_scatter
subroutine gs_find_blks(dg, blk_len, nblks, n, m)
Find blocks sharing dofs in non-facet data.
recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
Sort the dof lists based on the dof to gather-scatter list.
integer function gs_mapping_add_dof(map_, dof, max_id)
Register a unique dof.
Deassociate a Fortran array from a device pointer.
Definition device.F90:75
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Synchronize a device or stream.
Definition device.F90:87
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
integer pe_size
MPI size of communicator.
Definition comm.F90:31
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Defines a field.
Definition field.f90:34
Gather-scatter.
subroutine gs_init_mapping(gs)
Setup mapping of dofs to gather-scatter operations.
subroutine gs_init(gs, dofmap, bcknd)
Initialize a gather-scatter kernel.
subroutine gs_schedule(gs)
Schedule shared gather-scatter operations.
subroutine gs_free(gs)
Deallocate a gather-scatter kernel.
subroutine gs_op_r4(gs, u, n, op, event)
Gather-scatter operation on a rank 4 array.
subroutine gs_op_vector(gs, u, n, op, event)
Gather-scatter operation on a vector u with op op.
subroutine gs_op_fld(gs, u, op, event)
Gather-scatter operation on a field u with op op.
Defines a gather-scatter backend.
Definition gs_bcknd.f90:34
integer, parameter, public gs_bcknd_cpu
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_sx
Definition gs_bcknd.f90:40
integer, parameter, public gs_bcknd_dev
Definition gs_bcknd.f90:40
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
Generic Gather-scatter backend for CPUs.
Definition gs_cpu.f90:34
Defines GPU aware MPI gather-scatter communication.
Generic Gather-scatter backend for accelerators.
Definition gs_device.F90:34
Defines MPI gather-scatter communication.
Definition gs_mpi.f90:34
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
Generic Gather-scatter backend for NEC Vector Engines.
Definition gs_sx.f90:34
Implements a hash table ADT.
Definition htable.f90:36
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_cuda
integer, parameter, public i2
Definition num_types.f90:5
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:78
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:109
Implements a dynamic stack ADT.
Definition stack.f90:35
Utilities.
Definition utils.f90:35
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:174
Gather-scatter backend.
Definition gs_bcknd.f90:44
Gather-scatter communication method.
Definition gs_comm.f90:45
Gather-scatter backend for CPUs.
Definition gs_cpu.f90:43
Gather-scatter backend for offloading devices.
Definition gs_device.F90:47
Gather-scatter communication using device MPI. The arrays are indexed per PE like send_pe and @ recv_...
Gather-scatter communication using MPI.
Definition gs_mpi.f90:54
Gather-scatter backend for NEC SX-Aurora.
Definition gs_sx.f90:43
Integer*8 based hash table.
Definition htable.f90:92
Iterator for an integer*8 based hash table.
Definition htable.f90:175
Integer based stack.
Definition stack.f90:63