41  use, 
intrinsic :: iso_c_binding, only : c_ptr, c_int, c_null_ptr, &
 
   48     integer, 
allocatable :: local_blk_off(:)
 
   49     integer, 
allocatable :: shared_blk_off(:)
 
   50     type(c_ptr) :: local_gs_d = c_null_ptr      
 
   51     type(c_ptr) :: local_dof_gs_d = c_null_ptr  
 
   52     type(c_ptr) :: local_gs_dof_d = c_null_ptr  
 
   53     type(c_ptr) :: shared_gs_d = c_null_ptr     
 
   54     type(c_ptr) :: shared_dof_gs_d = c_null_ptr 
 
   55     type(c_ptr) :: shared_gs_dof_d = c_null_ptr 
 
   56     type(c_ptr) :: local_blk_len_d = c_null_ptr 
 
   57     type(c_ptr) :: shared_blk_len_d = c_null_ptr
 
   58     type(c_ptr) :: local_blk_off_d = c_null_ptr 
 
   59     type(c_ptr) :: shared_blk_off_d = c_null_ptr
 
   62     logical :: shared_on_host
 
 
   72     subroutine hip_gather_kernel(v, m, o, dg, u, n, gd, nb, b, bo, op, strm) &
 
   73          bind(c, name=
'hip_gather_kernel')
 
   74       use, 
intrinsic :: iso_c_binding
 
   76       integer(c_int) :: m, n, nb, o, op
 
   77       type(c_ptr), 
value :: v, u, dg, gd, b, bo, strm
 
 
   83          bind(c, name=
'hip_scatter_kernel')
 
   84       use, 
intrinsic :: iso_c_binding
 
   86       integer(c_int) :: m, n, nb
 
   87       type(c_ptr), 
value :: v, u, dg, gd, b, bo, strm
 
 
   93     subroutine cuda_gather_kernel(v, m, o, dg, u, n, gd, nb, b, bo, op, strm) &
 
   94          bind(c, name=
'cuda_gather_kernel')
 
   95       use, 
intrinsic :: iso_c_binding
 
   97       integer(c_int) :: m, n, nb, o, op
 
   98       type(c_ptr), 
value :: v, u, dg, gd, b, bo, strm
 
  104          bind(c, name=
'cuda_scatter_kernel')
 
  105       use, 
intrinsic :: iso_c_binding
 
  107       integer(c_int) :: m, n, nb
 
  108       type(c_ptr), 
value :: v, u, dg, gd, b, bo, strm
 
  113     subroutine opencl_gather_kernel(v, m, o, dg, u, n, gd, nb, b, bo, op) &
 
  114          bind(c, name=
'opencl_gather_kernel')
 
  115       use, 
intrinsic :: iso_c_binding
 
  117       integer(c_int) :: m, n, nb, o, op
 
  118       type(c_ptr), 
value :: v, u, dg, gd, b, bo
 
  124          bind(c, name=
'opencl_scatter_kernel')
 
  125       use, 
intrinsic :: iso_c_binding
 
  127       integer(c_int) :: m, n, nb
 
  128       type(c_ptr), 
value :: v, u, dg, gd, b, bo
 
  138    integer, 
intent(in) :: nlocal
 
  139    integer, 
intent(in) :: nshared
 
  140    integer, 
intent(in) :: nlcl_blks
 
  141    integer, 
intent(in) :: nshrd_blks
 
  146    this%nshared = nshared
 
  148    allocate(this%local_blk_off(nlcl_blks))
 
  149    allocate(this%shared_blk_off(nshrd_blks))
 
  151    this%local_gs_d = c_null_ptr
 
  152    this%local_dof_gs_d = c_null_ptr
 
  153    this%local_gs_dof_d = c_null_ptr
 
  154    this%local_blk_len_d = c_null_ptr
 
  155    this%local_blk_off_d = c_null_ptr
 
  156    this%shared_gs_d = c_null_ptr
 
  157    this%shared_dof_gs_d = c_null_ptr
 
  158    this%shared_gs_dof_d = c_null_ptr
 
  159    this%shared_blk_len_d = c_null_ptr
 
  160    this%shared_blk_off_d = c_null_ptr
 
  162    this%shared_on_host = .true.
 
  167    this%gs_stream = glb_cmd_queue
 
 
  175    if (
allocated(this%local_blk_off)) 
then 
  176       deallocate(this%local_blk_off)
 
  179    if (
allocated(this%shared_blk_off)) 
then 
  180       deallocate(this%shared_blk_off)
 
  183    if (c_associated(this%local_gs_d)) 
then 
  187    if (c_associated(this%local_dof_gs_d)) 
then 
  191    if (c_associated(this%local_gs_dof_d)) 
then 
  195    if (c_associated(this%local_blk_len_d)) 
then 
  199    if (c_associated(this%shared_blk_len_d)) 
then 
  203    if (c_associated(this%local_blk_off_d)) 
then 
  207    if (c_associated(this%shared_blk_off_d)) 
then 
  214    if (c_associated(this%gather_event)) 
then 
  218    if (c_associated(this%scatter_event)) 
then 
  222    if (c_associated(this%gs_stream)) 
then 
  223       this%gs_stream = c_null_ptr
 
 
  229  subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
 
  230    integer, 
intent(in) :: m
 
  231    integer, 
intent(in) :: n
 
  232    integer, 
intent(in) :: nb
 
  234    real(kind=
rp), 
dimension(m), 
intent(inout) :: v
 
  235    integer, 
dimension(m), 
intent(inout) :: dg
 
  236    real(kind=
rp), 
dimension(n), 
intent(inout) :: u
 
  237    integer, 
dimension(m), 
intent(inout) :: gd
 
  238    integer, 
dimension(nb), 
intent(inout) :: b
 
  239    integer, 
intent(in) :: o
 
  240    integer, 
intent(in) :: op
 
  241    logical, 
intent(in) :: shrd
 
  248       associate(v_d=>this%local_gs_d, dg_d=>this%local_dof_gs_d, &
 
  249            gd_d=>this%local_gs_dof_d, b_d=>this%local_blk_len_d, &
 
  250            bo=>this%local_blk_off, bo_d=>this%local_blk_off_d, &
 
  251            strm=>this%gs_stream)
 
  253         if (.not. c_associated(v_d)) 
then 
  257         if (.not. c_associated(dg_d)) 
then 
  260                               sync=.false., strm=strm)
 
  263         if (.not. c_associated(gd_d)) 
then 
  266                               sync=.false., strm=strm)
 
  269         if (.not. c_associated(b_d)) 
then 
  272                               sync=.false., strm=strm)
 
  275         if (.not. c_associated(bo_d)) 
then 
  279               bo(i) = bo(i - 1) + b(i - 1)
 
  282                               sync=.false., strm=strm)
 
  287                                nb, b_d, bo_d, op, strm)
 
  290              nb, b_d, bo_d, op, strm)
 
  295         call neko_error(
'No device backend configured')
 
  300       associate(v_d=>this%shared_gs_d, dg_d=>this%shared_dof_gs_d, &
 
  301            gd_d=>this%shared_gs_dof_d, b_d=>this%shared_blk_len_d, &
 
  302            bo=>this%shared_blk_off, bo_d=>this%shared_blk_off_d, &
 
  303            strm=>this%gs_stream)
 
  305         if (.not. c_associated(v_d)) 
then 
  309         if (.not. c_associated(dg_d)) 
then 
  312                               sync=.false., strm=strm)
 
  315         if (.not. c_associated(gd_d)) 
then 
  318                               sync=.false., strm=strm)
 
  321         if (.not. c_associated(b_d)) 
then 
  324                               sync=.false., strm=strm)
 
  327         if (.not. c_associated(bo_d)) 
then 
  331               bo(i) = bo(i - 1) + b(i - 1)
 
  334                               sync=.false., strm=strm)
 
  340                                nb, b_d, bo_d, op, strm)
 
  343              nb, b_d, bo_d, op, strm)
 
  348         call neko_error(
'No device backend configured')
 
  353         if (this%shared_on_host) 
then 
  354            if (this%nshared .eq. m) 
then 
  356                                  sync=.true., strm=strm)
 
 
  366  subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event)
 
  367    integer, 
intent(in) :: m
 
  368    integer, 
intent(in) :: n
 
  369    integer, 
intent(in) :: nb
 
  371    real(kind=rp), 
dimension(m), 
intent(inout) :: v
 
  372    integer, 
dimension(m), 
intent(inout) :: dg
 
  373    real(kind=rp), 
dimension(n), 
intent(inout) :: u
 
  374    integer, 
dimension(m), 
intent(inout) :: gd
 
  375    integer, 
dimension(nb), 
intent(inout) :: b
 
  376    logical, 
intent(in) :: shrd
 
  380    u_d = device_get_ptr(u)
 
  383       associate(v_d=>this%local_gs_d, dg_d=>this%local_dof_gs_d, &
 
  384            gd_d=>this%local_gs_dof_d, b_d=>this%local_blk_len_d, &
 
  385            bo_d=>this%local_blk_off_d, strm=>this%gs_stream)
 
  393         call neko_error(
'No device backend configured')
 
  397       associate(v_d=>this%shared_gs_d, dg_d=>this%shared_dof_gs_d, &
 
  398            gd_d=>this%shared_gs_dof_d, b_d=>this%shared_blk_len_d, &
 
  399            bo_d=>this%shared_blk_off_d, strm=>this%gs_stream)
 
  401         if (this%shared_on_host) 
then 
  402            call device_memcpy(v, v_d, m, host_to_device, &
 
  403                               sync=.false., strm=strm)
 
  413         call neko_error(
'No device backend configured')
 
  416         if (c_associated(event)) 
then 
  417            call device_event_record(event, strm)
 
  419            call device_sync(strm)
 
 
void opencl_gather_kernel(void *v, int *m, int *o, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, int *op)
 
void opencl_scatter_kernel(void *v, int *m, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo)
 
void cuda_gather_kernel(void *v, int *m, int *o, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, int *op, cudaStream_t stream)
 
void cuda_scatter_kernel(void *v, int *m, void *dg, void *u, int *n, void *gd, int *nb, void *b, void *bo, cudaStream_t stream)
 
Return the device pointer for an associated Fortran array.
 
Map a Fortran array to a device (allocate and associate)
 
Copy data between host and device (or device and device)
 
Device abstraction, common interface for various accelerators.
 
subroutine, public device_event_record(event, stream)
Record a device event.
 
integer, parameter, public host_to_device
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
integer, parameter, public device_to_host
 
subroutine, public device_event_destroy(event)
Destroy a device event.
 
subroutine, public device_event_create(event, flags)
Create a device event queue.
 
Defines a gather-scatter backend.
 
Generic Gather-scatter backend for accelerators.
 
subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
Gather kernel.
 
subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event)
Scatter kernel.
 
subroutine gs_device_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
Accelerator backend initialisation.
 
subroutine gs_device_free(this)
Dummy backend deallocation.
 
Defines Gather-scatter operations.
 
integer, parameter, public rp
Global precision used in computations.
 
Gather-scatter backend for offloading devices.