Neko  0.9.0
A portable framework for high-order spectral element flow simulations
gs_cpu.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2021, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module gs_cpu
35  use num_types
36  use gs_bcknd
37  use gs_ops
38  use, intrinsic :: iso_c_binding, only : c_ptr
39  implicit none
40  private
41 
43  type, public, extends(gs_bcknd_t) :: gs_cpu_t
44  contains
45  procedure, pass(this) :: init => gs_cpu_init
46  procedure, pass(this) :: free => gs_cpu_free
47  procedure, pass(this) :: gather => gs_gather_cpu
48  procedure, pass(this) :: scatter => gs_scatter_cpu
49  end type gs_cpu_t
50 
51 contains
52 
54  subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
55  class(gs_cpu_t), intent(inout) :: this
56  integer, intent(in) :: nlocal
57  integer, intent(in) :: nshared
58  integer, intent(in) :: nlcl_blks
59  integer, intent(in) :: nshrd_blks
60  end subroutine gs_cpu_init
61 
63  subroutine gs_cpu_free(this)
64  class(gs_cpu_t), intent(inout) :: this
65  end subroutine gs_cpu_free
66 
68  subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
69  integer, intent(in) :: m
70  integer, intent(in) :: n
71  integer, intent(in) :: nb
72  class(gs_cpu_t), intent(inout) :: this
73  real(kind=rp), dimension(m), intent(inout) :: v
74  integer, dimension(m), intent(inout) :: dg
75  real(kind=rp), dimension(n), intent(inout) :: u
76  integer, dimension(m), intent(inout) :: gd
77  integer, dimension(nb), intent(inout) :: b
78  integer, intent(in) :: o
79  integer, intent(in) :: op
80  logical, intent(in) :: shrd
81 
82  select case(op)
83  case (gs_op_add)
84  call gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
85  case (gs_op_mul)
86  call gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
87  case (gs_op_min)
88  call gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
89  case (gs_op_max)
90  call gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
91  end select
92 
93  end subroutine gs_gather_cpu
94 
97  subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
98  integer, intent(in) :: m
99  integer, intent(in) :: n
100  integer, intent(in) :: nb
101  real(kind=rp), dimension(m), intent(inout) :: v
102  integer, dimension(m), intent(inout) :: dg
103  real(kind=rp), dimension(n), intent(inout) :: u
104  integer, dimension(m), intent(inout) :: gd
105  integer, dimension(nb), intent(inout) :: b
106  integer, intent(in) :: o
107  integer :: i, j, k, blk_len
108  real(kind=rp) :: tmp
109 
110  k = 0
111  do i = 1, nb
112  blk_len = b(i)
113  tmp = u(gd(k + 1))
114  do j = 2, blk_len
115  tmp = tmp + u(gd(k + j))
116  end do
117  v(dg(k + 1)) = tmp
118  k = k + blk_len
119  end do
120 
121  if (o .lt. 0) then
122  do i = abs(o), m
123  v(dg(i)) = u(gd(i))
124  end do
125  else
126  do i = o, m, 2
127  tmp = u(gd(i)) + u(gd(i+1))
128  v(dg(i)) = tmp
129  end do
130  end if
131 
132  end subroutine gs_gather_kernel_add
133 
136  subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
137  integer, intent(in) :: m
138  integer, intent(in) :: n
139  integer, intent(in) :: nb
140  real(kind=rp), dimension(m), intent(inout) :: v
141  integer, dimension(m), intent(inout) :: dg
142  real(kind=rp), dimension(n), intent(inout) :: u
143  integer, dimension(m), intent(inout) :: gd
144  integer, dimension(nb), intent(inout) :: b
145  integer, intent(in) :: o
146  integer :: i, j, k, blk_len
147  real(kind=rp) :: tmp
148 
149  k = 0
150  do i = 1, nb
151  blk_len = b(i)
152  tmp = u(gd(k + 1))
153  do j = 2, blk_len
154  tmp = tmp * u(gd(k + j))
155  end do
156  v(dg(k + 1)) = tmp
157  k = k + blk_len
158  end do
159 
160  if (o .lt. 0) then
161  do i = abs(o), m
162  v(dg(i)) = u(gd(i))
163  end do
164  else
165  do i = o, m, 2
166  tmp = u(gd(i)) * u(gd(i+1))
167  v(dg(i)) = tmp
168  end do
169  end if
170 
171  end subroutine gs_gather_kernel_mul
172 
175  subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
176  integer, intent(in) :: m
177  integer, intent(in) :: n
178  integer, intent(in) :: nb
179  real(kind=rp), dimension(m), intent(inout) :: v
180  integer, dimension(m), intent(inout) :: dg
181  real(kind=rp), dimension(n), intent(inout) :: u
182  integer, dimension(m), intent(inout) :: gd
183  integer, dimension(nb), intent(inout) :: b
184  integer, intent(in) :: o
185  integer :: i, j, k, blk_len
186  real(kind=rp) :: tmp
187 
188  k = 0
189  do i = 1, nb
190  blk_len = b(i)
191  tmp = u(gd(k + 1))
192  do j = 2, blk_len
193  tmp = min(tmp, u(gd(k + j)))
194  end do
195  v(dg(k + 1)) = tmp
196  k = k + blk_len
197  end do
198 
199  if (o .lt. 0) then
200  do i = abs(o), m
201  v(dg(i)) = u(gd(i))
202  end do
203  else
204  do i = o, m, 2
205  tmp = min(u(gd(i)), u(gd(i+1)))
206  v(dg(i)) = tmp
207  end do
208  end if
209 
210  end subroutine gs_gather_kernel_min
211 
214  subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
215  integer, intent(in) :: m
216  integer, intent(in) :: n
217  integer, intent(in) :: nb
218  real(kind=rp), dimension(m), intent(inout) :: v
219  integer, dimension(m), intent(inout) :: dg
220  real(kind=rp), dimension(n), intent(inout) :: u
221  integer, dimension(m), intent(inout) :: gd
222  integer, dimension(nb), intent(inout) :: b
223  integer, intent(in) :: o
224  integer :: i, j, k, blk_len
225  real(kind=rp) :: tmp
226 
227  k = 0
228  do i = 1, nb
229  blk_len = b(i)
230  tmp = u(gd(k + 1))
231  do j = 2, blk_len
232  tmp = max(tmp, u(gd(k + j)))
233  end do
234  v(dg(k + 1)) = tmp
235  k = k + blk_len
236  end do
237 
238  if (o .lt. 0) then
239  do i = abs(o), m
240  v(dg(i)) = u(gd(i))
241  end do
242  else
243  do i = o, m, 2
244  tmp = max(u(gd(i)), u(gd(i+1)))
245  v(dg(i)) = tmp
246  end do
247  end if
248 
249  end subroutine gs_gather_kernel_max
250 
252  subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event)
253  integer, intent(in) :: m
254  integer, intent(in) :: n
255  integer, intent(in) :: nb
256  class(gs_cpu_t), intent(inout) :: this
257  real(kind=rp), dimension(m), intent(inout) :: v
258  integer, dimension(m), intent(inout) :: dg
259  real(kind=rp), dimension(n), intent(inout) :: u
260  integer, dimension(m), intent(inout) :: gd
261  integer, dimension(nb), intent(inout) :: b
262  logical, intent(in) :: shrd
263  type(c_ptr) :: event
264 
265  call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
266 
267  end subroutine gs_scatter_cpu
268 
270  subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
271  integer, intent(in) :: m
272  integer, intent(in) :: n
273  integer, intent(in) :: nb
274  real(kind=rp), dimension(m), intent(inout) :: v
275  integer, dimension(m), intent(inout) :: dg
276  real(kind=rp), dimension(n), intent(inout) :: u
277  integer, dimension(m), intent(inout) :: gd
278  integer, dimension(nb), intent(inout) :: b
279  integer :: i, j, k, blk_len
280  real(kind=rp) :: tmp
281 
282  k = 0
283  do i = 1, nb
284  blk_len = b(i)
285  tmp = v(dg(k + 1))
286  do j = 1, blk_len
287  u(gd(k + j)) = tmp
288  end do
289  k = k + blk_len
290  end do
291 
292  do i = k + 1, m
293  u(gd(i)) = v(dg(i))
294  end do
295 
296  end subroutine gs_scatter_kernel
297 
298 end module gs_cpu
Defines a gather-scatter backend.
Definition: gs_bcknd.f90:34
Generic Gather-scatter backend for CPUs.
Definition: gs_cpu.f90:34
subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for addition of data .
Definition: gs_cpu.f90:98
subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
Scatter kernel .
Definition: gs_cpu.f90:271
subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
Dummy backend initialisation.
Definition: gs_cpu.f90:55
subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event)
Scatter kernel.
Definition: gs_cpu.f90:253
subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for multiplication of data .
Definition: gs_cpu.f90:137
subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
Gather kernel.
Definition: gs_cpu.f90:69
subroutine gs_cpu_free(this)
Dummy backend deallocation.
Definition: gs_cpu.f90:64
subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for minimum of data .
Definition: gs_cpu.f90:176
subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
Gather kernel for maximum of data .
Definition: gs_cpu.f90:215
Defines Gather-scatter operations.
Definition: gs_ops.f90:34
integer, parameter, public gs_op_add
Definition: gs_ops.f90:36
integer, parameter, public gs_op_max
Definition: gs_ops.f90:36
integer, parameter, public gs_op_min
Definition: gs_ops.f90:36
integer, parameter, public gs_op_mul
Definition: gs_ops.f90:36
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Gather-scatter backend.
Definition: gs_bcknd.f90:44
Gather-scatter backend for CPUs.
Definition: gs_cpu.f90:43
#define max(a, b)
Definition: tensor.cu:40