Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gs_caf.F90
Go to the documentation of this file.
1! Copyright (c) 2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gs_caf
35 use num_types, only : rp
36 use gs_comm, only : gs_comm_t
38 use stack, only : stack_i4_t
39 use comm, only : pe_size
40 use, intrinsic :: iso_c_binding
41#ifdef HAVE_COARRAY_EVENTS
42 use, intrinsic :: iso_fortran_env, only : atomic_int_kind, event_type
43#else
44 use, intrinsic :: iso_fortran_env, only : atomic_int_kind
45#endif
46 use utils, only : neko_error
47 implicit none
48 private
49
50 ! Signaling mode constants. Selected at first init via the
51 ! NEKO_GS_CAF_SIGNALING environment variable
52 ! ("sync", "atomic", or "event").
53 integer, parameter, public :: gs_caf_signal_sync = 1
54 integer, parameter, public :: gs_caf_signal_atomic = 2
55 integer, parameter, public :: gs_caf_signal_event = 3
56
57#ifdef HAVE_COARRAY
58 ! Module-level receive coarray, shared by all gs_caf_t instances.
59 ! F2008 forbids a derived type from adding a coarray ultimate
60 ! component when its parent type has none, so the coarray buffer is
61 ! held at module scope rather than as a component of gs_caf_t.
62 !
63 ! The buffer is double-buffered: it is allocated to twice the global
64 ! max receive count so that consecutive rounds write to alternating
65 ! halves. In sync mode this eliminates back-pressure entirely (the
66 ! receiver may still be unpacking the previous round, but from a
67 ! different half, so no overwrite hazard exists). In atomic and event
68 ! modes the same property is used to relax the back-pressure spin to
69 ! a one-round tolerance -- the next overwrite is two rounds away, so
70 ! the receiver only needs to be at most one round behind.
71 ! gs_caf_buf_size is the size of one half.
72 !
73 ! Multiple gs_caf_t instances may coexist (each carrying its own
74 ! offset bookkeeping) provided they are used strictly sequentially
75 ! -- no overlapping nbsend/nbwait rounds across instances. The buffer
76 ! is grown on demand to fit the largest gs ever initialised; it is
77 ! never shrunk and is retained for the program lifetime.
78 real(kind=rp), allocatable :: gs_caf_recv_buf(:)[:]
79 integer :: gs_caf_buf_size = 0
80
81 ! Active signaling mode; bound on the first gs_caf_t init from the
82 ! NEKO_GS_CAF_SIGNALING environment variable. Subsequent instances
83 ! must use the same mode (the env var is read once).
84 integer :: gs_caf_mode = 0
85
86 ! Atomic-mode signaling counters, indexed by remote rank.
87 ! gs_caf_data_ready(s_rank) on image r counts rounds image s has put
88 ! into r so far. gs_caf_buf_ready(r_rank) on image s counts rounds
89 ! image r has finished unpacking from s so far. Allocated only in
90 ! atomic mode and shared by all instances.
91 integer(kind=atomic_int_kind), allocatable :: gs_caf_data_ready(:)[:]
92 integer(kind=atomic_int_kind), allocatable :: gs_caf_buf_ready(:)[:]
93
94 ! Local caches of "rounds we have sent to / received from each remote
95 ! rank" -- size pe_size per image, indexed by remote rank. Updated
96 ! locally on every atomic_define / wait completion in nbsend / nbwait.
97 ! Reading these to baseline a new gs_caf_t avoids any remote
98 ! atomic_ref during init, which Cray CCE has historically deadlocked
99 ! on. The values match the remote atomic counters at quiescent
100 ! points (i.e. between gs ops) for symmetric, lockstep gs traffic.
101 integer, allocatable :: gs_caf_send_count(:)
102 integer, allocatable :: gs_caf_recv_count(:)
103
104#ifdef HAVE_COARRAY_EVENTS
105 ! Event-mode signaling. The data_ready event accumulates one post per
106 ! sender per round; buf_ready is the back-channel from receiver to
107 ! sender. Events are scalar coarrays whose count cannot distinguish
108 ! posts coming from different gs_caf_t instances, so event mode is
109 ! restricted to a single live instance at a time.
110 !
111 ! The events are allocatable rather than static module-scope coarrays
112 ! because Cray CCE has historically had layout issues with mixing
113 ! module-scope static coarrays of derived type with allocatable
114 ! coarrays on the symmetric heap; an explicit allocate side-steps
115 ! that.
116 type(event_type), allocatable :: gs_caf_data_ready_ev[:]
117 type(event_type), allocatable :: gs_caf_buf_ready_ev[:]
118 logical :: gs_caf_event_in_use = .false.
119#endif
120#endif
121
129 type, public, extends(gs_comm_t) :: gs_caf_t
131 real(kind=rp), allocatable :: send_buf(:)
133 integer, allocatable :: send_len(:), recv_len(:)
135 integer, allocatable :: send_offset(:), recv_offset(:)
137 integer, allocatable :: dest_offset(:)
139 integer, allocatable :: send_img(:), recv_img(:)
143 integer, allocatable :: sync_img(:)
146 logical :: send_started = .false.
150 integer :: parity = 0
151 contains
152 procedure, pass(this) :: init => gs_caf_init
153 procedure, pass(this) :: free => gs_caf_free
154 procedure, pass(this) :: nbsend => gs_nbsend_caf
155 procedure, pass(this) :: nbrecv => gs_nbrecv_caf
156 procedure, pass(this) :: nbwait => gs_nbwait_caf
157 end type gs_caf_t
158
159contains
160
162 subroutine gs_caf_init(this, send_pe, recv_pe)
163 class(gs_caf_t), intent(inout) :: this
164 type(stack_i4_t), intent(inout) :: send_pe
165 type(stack_i4_t), intent(inout) :: recv_pe
166#ifdef HAVE_COARRAY
167 integer, allocatable :: dest_xchg(:)[:]
168 logical, allocatable :: in_neigh(:)
169 integer :: i, nsend, nrecv, send_total, recv_total, max_total, n_neigh
170 integer :: me, env_len
171 character(len=64) :: env_val
172
173 ! Bind the signaling mode on the first init.
174 if (gs_caf_mode .eq. 0) then
175 call get_environment_variable("NEKO_GS_CAF_SIGNALING", env_val, env_len)
176 if (env_len .gt. 0 .and. env_val(1:env_len) .eq. "atomic") then
177 gs_caf_mode = gs_caf_signal_atomic
178 allocate(gs_caf_data_ready(0:pe_size - 1)[*])
179 allocate(gs_caf_buf_ready(0:pe_size - 1)[*])
180 allocate(gs_caf_send_count(0:pe_size - 1))
181 allocate(gs_caf_recv_count(0:pe_size - 1))
182 gs_caf_send_count = 0
183 gs_caf_recv_count = 0
184 ! F2008 forbids mixing atomic and non-atomic accesses on the
185 ! same variable, so initialise via atomic_define rather than
186 ! a regular array assignment.
187 do i = 0, pe_size - 1
188 call atomic_define(gs_caf_data_ready(i), 0_atomic_int_kind)
189 call atomic_define(gs_caf_buf_ready(i), 0_atomic_int_kind)
190 end do
191 else if (env_len .gt. 0 .and. env_val(1:env_len) .eq. "event") then
192#ifdef HAVE_COARRAY_EVENTS
193 gs_caf_mode = gs_caf_signal_event
194#else
195 call neko_error("NEKO_GS_CAF_SIGNALING=event requires a Fortran " // &
196 "compiler with coarray events support")
197#endif
198 else
199 gs_caf_mode = gs_caf_signal_sync
200 end if
201 end if
202
203#ifdef HAVE_COARRAY_EVENTS
204 ! Allocate the shared event coarrays once, lazily, on the first
205 ! event-mode init. The in-use guard against overlapping gs ops is
206 ! enforced in nbsend/nbwait, not here -- multiple gs_caf_t instances
207 ! may be initialised back-to-back.
208 if (gs_caf_mode .eq. gs_caf_signal_event) then
209 if (.not. allocated(gs_caf_data_ready_ev)) then
210 allocate(gs_caf_data_ready_ev[*])
211 allocate(gs_caf_buf_ready_ev[*])
212 end if
213 end if
214#endif
215
216 call this%init_order(send_pe, recv_pe)
217
218 nsend = size(this%send_pe)
219 nrecv = size(this%recv_pe)
220
221 allocate(this%send_len(nsend), this%send_offset(nsend), &
222 this%send_img(nsend), this%dest_offset(nsend))
223 allocate(this%recv_len(nrecv), this%recv_offset(nrecv), &
224 this%recv_img(nrecv))
225
226 ! Local receive layout
227 recv_total = 0
228 do i = 1, nrecv
229 this%recv_len(i) = this%recv_dof(this%recv_pe(i))%size()
230 this%recv_offset(i) = recv_total
231 recv_total = recv_total + this%recv_len(i)
232 this%recv_img(i) = this%recv_pe(i) + 1
233 end do
234
235 ! Local send layout (concatenated per-peer slabs in one buffer)
236 send_total = 0
237 do i = 1, nsend
238 this%send_len(i) = this%send_dof(this%send_pe(i))%size()
239 this%send_offset(i) = send_total
240 send_total = send_total + this%send_len(i)
241 this%send_img(i) = this%send_pe(i) + 1
242 end do
243 allocate(this%send_buf(max(1, send_total)))
244
245 ! Symmetric coarray sized to twice the global max total receive
246 ! count (double buffering). gs_caf_buf_size tracks the size of one
247 ! half. Grow the shared buffer on demand; allocate / deallocate of
248 ! an allocatable coarray is implicitly collective and acts as a
249 ! global sync.
250 max_total = recv_total
251 call co_max(max_total)
252 max_total = max(1, max_total)
253 if (max_total .gt. gs_caf_buf_size) then
254 if (allocated(gs_caf_recv_buf)) deallocate(gs_caf_recv_buf)
255 allocate(gs_caf_recv_buf(2 * max_total)[*])
256 gs_caf_buf_size = max_total
257 end if
258
259 ! Tell each sender at what offset in our recv_buf to place their slab,
260 ! and learn at what offset in each receiver's recv_buf our slab should go.
261 ! Each image puts its own offset for each sender into a slot on the
262 ! sender's image indexed by our rank; after sync_all, each image reads
263 ! the offsets directly from its local copy.
264 me = this_image()
265 allocate(dest_xchg(0:pe_size - 1)[*])
266 do i = 1, nrecv
267 dest_xchg(me - 1)[this%recv_img(i)] = this%recv_offset(i)
268 end do
269 sync all
270 do i = 1, nsend
271 this%dest_offset(i) = dest_xchg(this%send_pe(i))
272 end do
273 deallocate(dest_xchg)
274
275 if (gs_caf_mode .eq. gs_caf_signal_sync) then
276 ! Sync image set = union of send and recv peers. Both endpoints of
277 ! every neighbour pair must include each other so the pairwise
278 ! sync images statements match up.
279 allocate(in_neigh(0:pe_size - 1))
280 in_neigh = .false.
281 do i = 1, nsend
282 in_neigh(this%send_pe(i)) = .true.
283 end do
284 do i = 1, nrecv
285 in_neigh(this%recv_pe(i)) = .true.
286 end do
287 n_neigh = count(in_neigh)
288 allocate(this%sync_img(n_neigh))
289 n_neigh = 0
290 do i = 0, pe_size - 1
291 if (in_neigh(i)) then
292 n_neigh = n_neigh + 1
293 this%sync_img(n_neigh) = i + 1
294 end if
295 end do
296 deallocate(in_neigh)
297 end if ! atomic & event modes: no per-instance state to allocate
298
299 ! Ensure recv_buf is allocated and (atomic mode) baselines are stable
300 ! on every image before any signalling activity begins.
301 sync all
302#else
303 call neko_error("Coarray Fortran support not built; reconfigure with " // &
304 "a coarray-capable Fortran compiler")
305#endif
306 end subroutine gs_caf_init
307
311 subroutine gs_caf_free(this)
312 class(gs_caf_t), intent(inout) :: this
313#ifdef HAVE_COARRAY
314 if (allocated(this%send_buf)) deallocate(this%send_buf)
315 if (allocated(this%send_len)) deallocate(this%send_len)
316 if (allocated(this%recv_len)) deallocate(this%recv_len)
317 if (allocated(this%send_offset)) deallocate(this%send_offset)
318 if (allocated(this%recv_offset)) deallocate(this%recv_offset)
319 if (allocated(this%dest_offset)) deallocate(this%dest_offset)
320 if (allocated(this%send_img)) deallocate(this%send_img)
321 if (allocated(this%recv_img)) deallocate(this%recv_img)
322 if (allocated(this%sync_img)) deallocate(this%sync_img)
323
324 call this%free_order()
325 call this%free_dofs()
326#endif
327 end subroutine gs_caf_free
328
334 subroutine gs_nbsend_caf(this, u, n, deps, strm)
335 class(gs_caf_t), intent(inout) :: this
336 integer, intent(in) :: n
337 real(kind=rp), dimension(n), intent(inout) :: u
338 type(c_ptr), intent(inout) :: deps
339 type(c_ptr), intent(inout) :: strm
340#ifdef HAVE_COARRAY
341 integer :: i, j, dst, off, dimg, ndst, doff, half_off
342 integer, pointer :: sp(:)
343 integer(kind=atomic_int_kind) :: flag
344 integer :: me_rank
345
346 half_off = this%parity * gs_caf_buf_size
347
348 if (gs_caf_mode .eq. gs_caf_signal_sync) then
349 do i = 1, size(this%send_pe)
350 dst = this%send_pe(i)
351 off = this%send_offset(i)
352 ndst = this%send_len(i)
353 dimg = this%send_img(i)
354 doff = this%dest_offset(i)
355 sp => this%send_dof(dst)%array()
356 do concurrent(j = 1:ndst)
357 this%send_buf(off + j) = u(sp(j))
358 end do
359 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
360 = this%send_buf(off + 1 : off + ndst)
361 end do
362#ifdef HAVE_COARRAY_EVENTS
363 else if (gs_caf_mode .eq. gs_caf_signal_event) then
364 ! Event mode shares one set of module-level event coarrays among
365 ! all instances and cannot disambiguate posts from concurrent gs
366 ! ops, so we must guarantee non-overlapping nbsend/nbwait windows.
367 if (gs_caf_event_in_use) then
368 call neko_error("Event-mode coarray gather-scatter does not " // &
369 "support overlapping gs ops on different instances")
370 end if
371 gs_caf_event_in_use = .true.
372
373 ! Wait for all receivers to have credited their buffers (skipped
374 ! on the first nbsend; there are no credits posted yet).
375 if (this%send_started) then
376 if (size(this%send_pe) .gt. 0) then
377 event wait(gs_caf_buf_ready_ev, until_count=size(this%send_pe))
378 end if
379 else
380 this%send_started = .true.
381 end if
382
383 do i = 1, size(this%send_pe)
384 dst = this%send_pe(i)
385 off = this%send_offset(i)
386 ndst = this%send_len(i)
387 dimg = this%send_img(i)
388 doff = this%dest_offset(i)
389 sp => this%send_dof(dst)%array()
390 do concurrent(j = 1:ndst)
391 this%send_buf(off + j) = u(sp(j))
392 end do
393 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
394 = this%send_buf(off + 1 : off + ndst)
395 ! event post is meant to act as an image-control statement
396 ! that establishes segment ordering with the matching event
397 ! wait, but real-world coarray runtimes can let a small event
398 ! message race past a still-in-flight RDMA put -- the
399 ! receiver's wait then completes before the data has landed.
400 ! sync memory forces the put to commit locally before the post.
401 sync memory
402 event post(gs_caf_data_ready_ev[dimg])
403 end do
404#endif
405 else
406 me_rank = this_image() - 1
407
408 ! Pack all peers up front so the subsequent network waits and
409 ! puts can overlap with each other rather than serialising
410 ! behind per-peer pack work.
411 do i = 1, size(this%send_pe)
412 dst = this%send_pe(i)
413 off = this%send_offset(i)
414 ndst = this%send_len(i)
415 sp => this%send_dof(dst)%array()
416 do concurrent(j = 1:ndst)
417 this%send_buf(off + j) = u(sp(j))
418 end do
419 end do
420
421 ! Back-pressure, put and signal per peer. With double-buffering
422 ! the half we are about to write last carried round
423 ! (send_count - 2), so we only need the receiver to have
424 ! unpacked through (send_count - 1).
425 do i = 1, size(this%send_pe)
426 off = this%send_offset(i)
427 ndst = this%send_len(i)
428 dimg = this%send_img(i)
429 doff = this%dest_offset(i)
430
431 do
432 call atomic_ref(flag, gs_caf_buf_ready(this%send_pe(i)))
433 if (int(flag) .ge. gs_caf_send_count(this%send_pe(i)) - 1) exit
434 end do
435
436 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
437 = this%send_buf(off + 1 : off + ndst)
438
439 gs_caf_send_count(this%send_pe(i)) = &
440 gs_caf_send_count(this%send_pe(i)) + 1
441 call atomic_define(gs_caf_data_ready(me_rank)[dimg], &
442 int(gs_caf_send_count(this%send_pe(i)), atomic_int_kind))
443 end do
444 end if
445#else
446 call neko_error("Coarray Fortran support not built")
447#endif
448 end subroutine gs_nbsend_caf
449
452 subroutine gs_nbrecv_caf(this)
453 class(gs_caf_t), intent(inout) :: this
454 end subroutine gs_nbrecv_caf
455
460 subroutine gs_nbwait_caf(this, u, n, op, strm)
461 class(gs_caf_t), intent(inout) :: this
462 integer, intent(in) :: n
463 real(kind=rp), dimension(n), intent(inout) :: u
464 type(c_ptr), intent(inout) :: strm
465 integer :: op
466#ifdef HAVE_COARRAY
467 integer :: i, j, src, off, nsrc, half_off
468 integer, pointer :: sp(:)
469 integer(kind=atomic_int_kind) :: flag
470 integer :: me_rank
471
472 half_off = this%parity * gs_caf_buf_size
473
474 if (gs_caf_mode .eq. gs_caf_signal_sync) then
475 if (allocated(this%sync_img)) then
476 if (size(this%sync_img) .gt. 0) then
477 sync images(this%sync_img)
478 end if
479 end if
480#ifdef HAVE_COARRAY_EVENTS
481 else if (gs_caf_mode .eq. gs_caf_signal_event) then
482 if (size(this%recv_pe) .gt. 0) then
483 event wait(gs_caf_data_ready_ev, until_count=size(this%recv_pe))
484 end if
485#endif
486 else
487 ! Atomic mode: spin per-sender on data_ready until the expected
488 ! round count is observed.
489 do i = 1, size(this%recv_pe)
490 gs_caf_recv_count(this%recv_pe(i)) = &
491 gs_caf_recv_count(this%recv_pe(i)) + 1
492 do
493 call atomic_ref(flag, gs_caf_data_ready(this%recv_pe(i)))
494 if (int(flag) .ge. gs_caf_recv_count(this%recv_pe(i))) exit
495 end do
496 end do
497 end if
498
499 do i = 1, size(this%recv_pe)
500 src = this%recv_pe(i)
501 off = this%recv_offset(i)
502 nsrc = this%recv_len(i)
503 sp => this%recv_dof(src)%array()
504 select case (op)
505 case (gs_op_add)
506 !NEC$ IVDEP
507 do concurrent(j = 1:nsrc)
508 u(sp(j)) = u(sp(j)) + gs_caf_recv_buf(half_off + off + j)
509 end do
510 case (gs_op_mul)
511 !NEC$ IVDEP
512 do concurrent(j = 1:nsrc)
513 u(sp(j)) = u(sp(j)) * gs_caf_recv_buf(half_off + off + j)
514 end do
515 case (gs_op_min)
516 !NEC$ IVDEP
517 do concurrent(j = 1:nsrc)
518 u(sp(j)) = min(u(sp(j)), gs_caf_recv_buf(half_off + off + j))
519 end do
520 case (gs_op_max)
521 !NEC$ IVDEP
522 do concurrent(j = 1:nsrc)
523 u(sp(j)) = max(u(sp(j)), gs_caf_recv_buf(half_off + off + j))
524 end do
525 case default
526 call neko_error("Unknown operation in gs_nbwait_caf")
527 end select
528 end do
529
530 if (gs_caf_mode .eq. gs_caf_signal_atomic) then
531 ! Credit each sender that we have unpacked their slab so they
532 ! may proceed with their next round.
533 me_rank = this_image() - 1
534 do i = 1, size(this%recv_pe)
535 call atomic_define(gs_caf_buf_ready(me_rank)[this%recv_img(i)], &
536 int(gs_caf_recv_count(this%recv_pe(i)), atomic_int_kind))
537 end do
538#ifdef HAVE_COARRAY_EVENTS
539 else if (gs_caf_mode .eq. gs_caf_signal_event) then
540 do i = 1, size(this%recv_pe)
541 event post(gs_caf_buf_ready_ev[this%recv_img(i)])
542 end do
543 gs_caf_event_in_use = .false.
544#endif
545 end if
546
547 ! Flip the double-buffer parity for the next round.
548 this%parity = 1 - this%parity
549#else
550 call neko_error("Coarray Fortran support not built")
551#endif
552 end subroutine gs_nbwait_caf
553
554end module gs_caf
Definition comm.F90:1
integer, public pe_size
MPI size of communicator.
Definition comm.F90:60
Defines Coarray Fortran gather-scatter communication.
Definition gs_caf.F90:34
subroutine gs_caf_init(this, send_pe, recv_pe)
Initialise Coarray Fortran based communication method.
Definition gs_caf.F90:163
subroutine gs_nbwait_caf(this, u, n, op, strm)
Wait for all incoming puts and reduce them into u. In sync mode a sync_images bracket pairs with the ...
Definition gs_caf.F90:461
integer, parameter, public gs_caf_signal_event
Definition gs_caf.F90:55
integer, parameter, public gs_caf_signal_atomic
Definition gs_caf.F90:54
subroutine gs_caf_free(this)
Deallocate Coarray Fortran based communication method. The shared module-level recv coarray is intent...
Definition gs_caf.F90:312
integer, parameter, public gs_caf_signal_sync
Definition gs_caf.F90:53
subroutine gs_nbrecv_caf(this)
No-op for coarrays: senders push into the receiver's buffer, so the receive side does not need to pos...
Definition gs_caf.F90:453
subroutine gs_nbsend_caf(this, u, n, deps, strm)
Pack u into per-peer slabs and put each slab into the remote image's recv_buf. Double buffering means...
Definition gs_caf.F90:335
Defines a gather-scatter communication method.
Definition gs_comm.f90:34
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
integer, parameter, public gs_op_max
Definition gs_ops.f90:36
integer, parameter, public gs_op_min
Definition gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition gs_ops.f90:36
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements a dynamic stack ADT.
Definition stack.f90:49
Utilities.
Definition utils.f90:35
Gather-scatter communication using Coarray Fortran (F2008). Each image puts directly into the (module...
Definition gs_caf.F90:129
Gather-scatter communication method.
Definition gs_comm.f90:47
Integer based stack.
Definition stack.f90:77
#define max(a, b)
Definition tensor.cu:40