Neko  0.8.99
A portable framework for high-order spectral element flow simulations
matrix.f90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module matrix
36  use math, only: sub3, chsign, add3, cmult2, cadd2, copy
37  use num_types, only: rp
38  use device, only: device_map, device_free, c_ptr, c_null_ptr
41  use utils, only: neko_error
42  use, intrinsic :: iso_c_binding
43  implicit none
44  private
45 
46  type, public :: matrix_t
47  real(kind=rp), allocatable :: x(:,:)
48  type(c_ptr) :: x_d = c_null_ptr
49  integer :: nrows = 0
50  integer :: ncols = 0
51  integer :: n = 0
52  contains
54  procedure, pass(m) :: init => matrix_init
56  procedure, pass(m) :: free => matrix_free
58  procedure, pass(m) :: size => matrix_size
60  procedure, pass(m) :: matrix_assign_matrix
62  procedure, pass(m) :: matrix_assign_scalar
64  procedure, pass(m) :: matrix_add_matrix
66  procedure, pass(m) :: matrix_add_scalar_left
68  procedure, pass(m) :: matrix_add_scalar_right
70  procedure, pass(m) :: matrix_sub_matrix
72  procedure, pass(m) :: matrix_sub_scalar_left
74  procedure, pass(m) :: matrix_sub_scalar_right
76  procedure, pass(m) :: matrix_cmult_left
78  procedure, pass(m) :: matrix_cmult_right
80  procedure, pass(m) :: inverse => matrix_bcknd_inverse
81 
82  generic :: assignment(=) => matrix_assign_matrix, &
84  generic :: operator(+) => matrix_add_matrix, &
86  generic :: operator(-) => matrix_sub_matrix, &
88  generic :: operator(*) => matrix_cmult_left, matrix_cmult_right
89  end type matrix_t
90 
91 contains
92 
96  subroutine matrix_init(m, nrows, ncols)
97  class(matrix_t), intent(inout) :: m
98  integer, intent(in) :: nrows
99  integer, intent(in) :: ncols
100 
101  call m%free()
102 
103  allocate(m%x(nrows, ncols))
104  m%x = 0.0_rp
105  m%nrows = nrows
106  m%ncols = ncols
107  m%n = nrows*ncols
108 
109  if (neko_bcknd_device .eq. 1) then
110  call device_map(m%x, m%x_d, m%n)
111  call device_cfill(m%x_d, 0.0_rp, m%n)
112  end if
113 
114  end subroutine matrix_init
115 
117  subroutine matrix_free(m)
118  class(matrix_t), intent(inout) :: m
119 
120  if (allocated(m%x)) then
121  deallocate(m%x)
122  end if
123 
124  if (c_associated(m%x_d)) then
125  call device_free(m%x_d)
126  end if
127 
128  m%nrows = 0
129  m%ncols = 0
130  m%n = 0
131 
132  end subroutine matrix_free
133 
135  function matrix_size(m) result(s)
136  class(matrix_t), intent(inout) :: m
137  integer :: s
138  s = m%n
139  end function matrix_size
140 
142  subroutine matrix_assign_matrix(m, w)
143  class(matrix_t), intent(inout) :: m
144  type(matrix_t), intent(in) :: w
145 
146  if (allocated(m%x)) then
147  call m%free()
148  end if
149 
150  if (.not. allocated(m%x)) then
151 
152  m%nrows = w%nrows
153  m%ncols = w%ncols
154  m%n = w%n
155  allocate(m%x(m%nrows, m%ncols))
156 
157  if (neko_bcknd_device .eq. 1) then
158  call device_map(m%x, m%x_d, m%n)
159  end if
160 
161  end if
162 
163  if (neko_bcknd_device .eq. 1) then
164  call device_copy(m%x_d, w%x_d, m%n)
165  else
166  m%x = w%x
167  end if
168 
169  end subroutine matrix_assign_matrix
170 
172  subroutine matrix_assign_scalar(m, s)
173  class(matrix_t), intent(inout) :: m
174  real(kind=rp), intent(in) :: s
175 
176  if (.not. allocated(m%x)) then
177  call neko_error('matrix not allocated')
178  end if
179 
180  if (neko_bcknd_device .eq. 1) then
181  call device_cfill(m%x_d, s, m%n)
182  else
183  m%x = s
184  end if
185 
186  end subroutine matrix_assign_scalar
187 
189  function matrix_add_matrix(m, b) result(v)
190  class(matrix_t), intent(in) :: m, b
191  type(matrix_t) :: v
192 
193  if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
194  call neko_error("Matrices must be the same size!")
195 
196  v%n = m%n
197  v%nrows = m%nrows
198  v%ncols = m%ncols
199  allocate(v%x(v%nrows, v%ncols))
200 
201  if (neko_bcknd_device .eq. 1) then
202  call device_map(v%x, v%x_d, v%n)
203  end if
204 
205  if (neko_bcknd_device .eq. 1) then
206  call device_add3(v%x_d, m%x_d, b%x_d, v%n)
207  else
208  call add3(v%x, m%x, b%x, v%n)
209  end if
210 
211  end function matrix_add_matrix
212 
214  function matrix_add_scalar_left(m, c) result(v)
215  class(matrix_t), intent(in) :: m
216  real(kind=rp), intent(in) :: c
217  type(matrix_t) :: v
218 
219  v%n = m%n
220  v%nrows = m%nrows
221  v%ncols = m%ncols
222  allocate(v%x(v%nrows, v%ncols))
223 
224  if (neko_bcknd_device .eq. 1) then
225  call device_map(v%x, v%x_d, v%n)
226  end if
227 
228  if (neko_bcknd_device .eq. 1) then
229  call device_cadd2(v%x_d, m%x_d, c, v%n)
230  else
231  call cadd2(v%x, m%x, c, v%n)
232  end if
233 
234  end function matrix_add_scalar_left
235 
237  function matrix_add_scalar_right(c, m) result(v)
238  real(kind=rp), intent(in) :: c
239  class(matrix_t), intent(in) :: m
240  type(matrix_t) :: v
241 
242  v = matrix_add_scalar_left(m, c)
243 
244  end function matrix_add_scalar_right
245 
247  function matrix_sub_matrix(m, b) result(v)
248  class(matrix_t), intent(in) :: m, b
249  type(matrix_t) :: v
250 
251  if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
252  call neko_error("Matrices must be the same size!")
253 
254  v%n = m%n
255  v%nrows = m%nrows
256  v%ncols = m%ncols
257  allocate(v%x(v%nrows, v%ncols))
258 
259  if (neko_bcknd_device .eq. 1) then
260  call device_map(v%x, v%x_d, v%n)
261  end if
262 
263  if (neko_bcknd_device .eq. 1) then
264  call device_sub3(v%x_d, m%x_d, b%x_d, v%n)
265  else
266  call sub3(v%x, m%x, b%x, v%n)
267  end if
268 
269  end function matrix_sub_matrix
270 
272  function matrix_sub_scalar_left(m, c) result(v)
273  class(matrix_t), intent(in) :: m
274  real(kind=rp), intent(in) :: c
275  type(matrix_t) :: v
276 
277  v%n = m%n
278  v%nrows = m%nrows
279  v%ncols = m%ncols
280  allocate(v%x(v%nrows, v%ncols))
281 
282  if (neko_bcknd_device .eq. 1) then
283  call device_map(v%x, v%x_d, v%n)
284  end if
285 
286  if (neko_bcknd_device .eq. 1) then
287  call device_cadd2(v%x_d, m%x_d, -1.0_rp*c, v%n)
288  else
289  call cadd2(v%x, m%x, -1.0_rp*c, m%n)
290  end if
291 
292  end function matrix_sub_scalar_left
293 
295  function matrix_sub_scalar_right(c, m) result(v)
296  real(kind=rp), intent(in) :: c
297  class(matrix_t), intent(in) :: m
298  type(matrix_t) :: v
299 
300  v = matrix_sub_scalar_left(m, c)
301 
302  if (neko_bcknd_device .eq. 1) then
303  call device_cmult(v%x_d, -1.0_rp, v%n)
304  else
305  call chsign(v%x, v%n)
306  end if
307 
308  end function matrix_sub_scalar_right
309 
311  function matrix_cmult_left(m, c) result(v)
312  class(matrix_t), intent(in) :: m
313  real(kind=rp), intent(in) :: c
314  type(matrix_t) :: v
315 
316  v%n = m%n
317  v%nrows = m%nrows
318  v%ncols = m%ncols
319  allocate(v%x(v%nrows, v%ncols))
320 
321  if (neko_bcknd_device .eq. 1) then
322  call device_map(v%x, v%x_d, v%n)
323  end if
324 
325  if (neko_bcknd_device .eq. 1) then
326  call device_cmult2(v%x_d, m%x_d, c, v%n)
327  else
328  call cmult2(v%x, m%x, c, v%n)
329  end if
330 
331  end function matrix_cmult_left
332 
334  function matrix_cmult_right(c, m) result(v)
335  real(kind=rp), intent(in) :: c
336  class(matrix_t), intent(in) :: m
337  type(matrix_t) :: v
338 
339  v = matrix_cmult_left(m, c)
340 
341  end function matrix_cmult_right
342 
343  subroutine matrix_bcknd_inverse(m)
344  class(matrix_t), intent(inout) :: m
345  if (neko_bcknd_device .eq. 1) then
346  call neko_error("matrix_bcknd_inverse not implemented on accelarators.")
347  else
348  call cpu_matrix_inverse(m)
349  end if
350  end subroutine matrix_bcknd_inverse
351 
352  subroutine cpu_matrix_inverse(m)
353  ! Gauss-Jordan matrix inversion with full pivoting
354  ! Num. Rec. p. 30, 2nd Ed., Fortran
355  ! m%x is an sqaure matrix
356  ! rmult is m work array of length nrows = ncols
357  class(matrix_t), intent(inout) :: m
358  integer :: indr(m%nrows), indc(m%ncols), ipiv(m%ncols)
359  real(kind=rp) :: rmult(m%nrows), amx, tmp, piv, eps
360  integer :: i, j, k, ir, jc
361 
362  if (.not. (m%ncols .eq. m%nrows)) then
363  call neko_error("Fatal error: trying to invert m matrix that is not &
364 &square")
365  end if
366 
367  eps = 1e-9_rp
368  ipiv = 0
369 
370  do k = 1, m%nrows
371  amx = 0.0_rp
372  do i = 1, m%nrows ! Pivot search
373  if (ipiv(i) .ne. 1) then
374  do j = 1, m%nrows
375  if (ipiv(j) .eq. 0) then
376  if (abs(m%x(i, j)) .ge. amx) then
377  amx = abs(m%x(i, j))
378  ir = i
379  jc = j
380  end if
381  else if (ipiv(j) .gt. 1) then
382  return
383  end if
384  end do
385  end if
386  end do
387  ipiv(jc) = ipiv(jc) + 1
388 
389  ! Swap rows
390  if (ir .ne. jc) then
391  do j = 1, m%ncols
392  tmp = m%x(ir, j)
393  m%x(ir, j) = m%x(jc, j)
394  m%x(jc, j) = tmp
395  end do
396  end if
397  indr(k) = ir
398  indc(k) = jc
399 
400  if (abs(m%x(jc, jc)) .lt. eps) then
401  call neko_error("matrix_inverse error: small Gauss Jordan Piv")
402  end if
403  piv = 1.0_rp/m%x(jc, jc)
404  m%x(jc, jc) = 1.0_rp
405  do j = 1, m%ncols
406  m%x(jc, j) = m%x(jc, j)*piv
407  end do
408 
409  do j = 1, m%ncols
410  tmp = m%x(jc, j)
411  m%x(jc, j) = m%x(1 , j)
412  m%x(1 , j) = tmp
413  end do
414  do i = 2, m%nrows
415  rmult(i) = m%x(i, jc)
416  m%x(i, jc) = 0.0_rp
417  end do
418 
419  do j = 1, m%ncols
420  do i = 2, m%nrows
421  m%x(i, j) = m%x(i, j) - rmult(i)*m%x(1, j)
422  end do
423  end do
424 
425  do j = 1, m%ncols
426  tmp = m%x(jc, j)
427  m%x(jc, j) = m%x(1 , j)
428  m%x(1 , j) = tmp
429  end do
430  end do
431 
432  ! Unscramble matrix
433  do j= m%nrows, 1, -1
434  if (indr(j) .ne. indc(j)) then
435  do i = 1, m%nrows
436  tmp = m%x(i, indr(j))
437  m%x(i, indr(j)) = m%x(i, indc(j))
438  m%x(i, indc(j)) = tmp
439  end do
440  end if
441  end do
442 
443  return
444  end subroutine cpu_matrix_inverse
445 
446 end module matrix
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
Definition: math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition: math.f90:661
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition: math.f90:301
subroutine, public add3(a, b, c, n)
Vector addition .
Definition: math.f90:560
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition: math.f90:602
subroutine, public chsign(a, n)
Change sign of vector .
Definition: math.f90:400
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:228
Defines a matrix.
Definition: matrix.f90:34
subroutine matrix_free(m)
Deallocate a matrix.
Definition: matrix.f90:118
type(matrix_t) function matrix_sub_matrix(m, b)
Matrix-matrix subtraction .
Definition: matrix.f90:248
subroutine matrix_assign_matrix(m, w)
Assignment .
Definition: matrix.f90:143
type(matrix_t) function matrix_add_scalar_left(m, c)
Matrix-scalar addition .
Definition: matrix.f90:215
type(matrix_t) function matrix_sub_scalar_right(c, m)
Scalar-matrix subtraction .
Definition: matrix.f90:296
type(matrix_t) function matrix_add_matrix(m, b)
Matrix-matrix addition .
Definition: matrix.f90:190
type(matrix_t) function matrix_add_scalar_right(c, m)
Scalar-matrix addition .
Definition: matrix.f90:238
type(matrix_t) function matrix_cmult_right(c, m)
Scalar-matrix multiplication .
Definition: matrix.f90:335
subroutine matrix_bcknd_inverse(m)
Definition: matrix.f90:344
integer function matrix_size(m)
Returns the number of entries in the matrix.
Definition: matrix.f90:136
subroutine cpu_matrix_inverse(m)
Definition: matrix.f90:353
type(matrix_t) function matrix_cmult_left(m, c)
Matrix-scalar multiplication .
Definition: matrix.f90:312
type(matrix_t) function matrix_sub_scalar_left(m, c)
Matrix-scalar subtraction .
Definition: matrix.f90:273
subroutine matrix_assign_scalar(m, s)
Assignment .
Definition: matrix.f90:173
subroutine matrix_init(m, nrows, ncols)
Initialise a matrix of size nrows*ncols.
Definition: matrix.f90:97
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Utilities.
Definition: utils.f90:35