Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
matrix.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module matrix
36 use math, only: sub3, chsign, add3, cmult2, cadd2
37 use num_types, only: rp, xp
42 use utils, only: neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 type, public :: matrix_t
48 real(kind=rp), allocatable :: x(:,:)
49 type(c_ptr) :: x_d = c_null_ptr
50 integer, private :: nrows = 0
51 integer, private :: ncols = 0
52 integer, private :: n = 0
53 contains
55 procedure, pass(m) :: init => matrix_init
57 procedure, pass(m) :: free => matrix_free
59 procedure, pass(m) :: size => matrix_size
61 procedure, pass(m) :: copy_from => matrix_copy_from
63 procedure, pass(m) :: get_nrows => matrix_nrows
65 procedure, pass(m) :: get_ncols => matrix_ncols
67 procedure, pass(m) :: matrix_assign_matrix
69 procedure, pass(m) :: matrix_assign_scalar
71 procedure, pass(m) :: inverse => matrix_bcknd_inverse
72 procedure, pass(m) :: inverse_on_host => cpu_matrix_inverse
73
74 generic :: assignment(=) => matrix_assign_matrix, &
76
78 procedure, pass(m), private :: alloc => matrix_allocate
79 end type matrix_t
80
81contains
82
86 subroutine matrix_init(m, nrows, ncols)
87 class(matrix_t), intent(inout) :: m
88 integer, intent(in) :: nrows
89 integer, intent(in) :: ncols
90
91 call m%alloc(nrows, ncols)
92 m%x = 0.0_rp
93 if (neko_bcknd_device .eq. 1) then
94 call device_cfill(m%x_d, 0.0_rp, m%n)
95 end if
96
97 end subroutine matrix_init
98
102 subroutine matrix_allocate(m, nrows, ncols)
103 class(matrix_t), intent(inout) :: m
104 integer, intent(in) :: nrows
105 integer, intent(in) :: ncols
106
107 call m%free()
108
109 allocate(m%x(nrows, ncols))
110 m%nrows = nrows
111 m%ncols = ncols
112 m%n = nrows*ncols
113
114 if (neko_bcknd_device .eq. 1) then
115 call device_map(m%x, m%x_d, m%n)
116 call device_cfill(m%x_d, 0.0_rp, m%n)
117 call device_sync()
118 end if
119
120 end subroutine matrix_allocate
121
123 subroutine matrix_free(m)
124 class(matrix_t), intent(inout) :: m
125
126 if (allocated(m%x)) then
127 deallocate(m%x)
128 end if
129
130 if (c_associated(m%x_d)) then
131 call device_deassociate(m%x)
132 call device_free(m%x_d)
133 end if
134
135 m%nrows = 0
136 m%ncols = 0
137 m%n = 0
138
139 end subroutine matrix_free
140
142 pure function matrix_size(m) result(s)
143 class(matrix_t), intent(in) :: m
144 integer :: s
145 s = m%n
146 end function matrix_size
147
148
153 subroutine matrix_copy_from(m, memdir, sync)
154 class(matrix_t), intent(inout) :: m
155 integer, intent(in) :: memdir
156 logical, intent(in) :: sync
157
158 if (neko_bcknd_device .eq. 1) then
159 call device_memcpy(m%x, m%x_d, m%n, memdir, sync)
160 end if
161
162 end subroutine matrix_copy_from
163
165 pure function matrix_nrows(m) result(nr)
166 class(matrix_t), intent(in) :: m
167 integer :: nr
168 nr = m%nrows
169 end function matrix_nrows
170
172 pure function matrix_ncols(m) result(nc)
173 class(matrix_t), intent(in) :: m
174 integer :: nc
175 nc = m%ncols
176 end function matrix_ncols
177
179 subroutine matrix_assign_matrix(m, w)
180 class(matrix_t), intent(inout) :: m
181 type(matrix_t), intent(in) :: w
182
183 if (allocated(m%x)) then
184 call m%free()
185 end if
186
187 if (.not. allocated(m%x)) then
188
189 m%nrows = w%nrows
190 m%ncols = w%ncols
191 m%n = w%n
192 allocate(m%x(m%nrows, m%ncols))
193
194 if (neko_bcknd_device .eq. 1) then
195 call device_map(m%x, m%x_d, m%n)
196 end if
197
198 end if
199
200 if (neko_bcknd_device .eq. 1) then
201 call device_copy(m%x_d, w%x_d, m%n)
202 else
203 m%x = w%x
204 end if
205
206 end subroutine matrix_assign_matrix
207
209 subroutine matrix_assign_scalar(m, s)
210 class(matrix_t), intent(inout) :: m
211 real(kind=rp), intent(in) :: s
212
213 if (.not. allocated(m%x)) then
214 call neko_error('matrix not allocated')
215 end if
216
217 if (neko_bcknd_device .eq. 1) then
218 call device_cfill(m%x_d, s, m%n)
219 else
220 m%x = s
221 end if
222
223 end subroutine matrix_assign_scalar
224
225 subroutine matrix_bcknd_inverse(m, bcknd)
226 class(matrix_t), intent(inout) :: m
227 integer, optional :: bcknd
228
229 if (neko_bcknd_device .eq. 1 .and. &
230 bcknd .eq. neko_bcknd_device) then
231 call neko_error("matrix_bcknd_inverse not &
232 &implemented on accelarators.")
233 else
234 call cpu_matrix_inverse(m)
235 end if
236
237 end subroutine matrix_bcknd_inverse
238
239 subroutine cpu_matrix_inverse(m)
240 ! Gauss-Jordan matrix inversion with full pivoting
241 ! Num. Rec. p. 30, 2nd Ed., Fortran
242 ! m%x is an sqaure matrix
243 ! rmult is m work array of length nrows = ncols
244 class(matrix_t), intent(inout) :: m
245 integer :: indr(m%nrows), indc(m%ncols), ipiv(m%ncols)
246 real(kind=xp) :: rmult(m%nrows), amx, tmp, piv, eps
247 integer :: i, j, k, ir, jc
248
249 if (.not. (m%ncols .eq. m%nrows)) then
250 call neko_error("Fatal error: trying to invert m matrix that is not &
251 &square")
252 end if
253
254 eps = 1e-9_rp
255 ipiv = 0
256
257 do k = 1, m%nrows
258 amx = 0.0_rp
259 do i = 1, m%nrows ! Pivot search
260 if (ipiv(i) .ne. 1) then
261 do j = 1, m%nrows
262 if (ipiv(j) .eq. 0) then
263 if (abs(m%x(i, j)) .ge. amx) then
264 amx = abs(m%x(i, j))
265 ir = i
266 jc = j
267 end if
268 else if (ipiv(j) .gt. 1) then
269 return
270 end if
271 end do
272 end if
273 end do
274 ipiv(jc) = ipiv(jc) + 1
275
276 ! Swap rows
277 if (ir .ne. jc) then
278 do j = 1, m%ncols
279 tmp = m%x(ir, j)
280 m%x(ir, j) = m%x(jc, j)
281 m%x(jc, j) = tmp
282 end do
283 end if
284 indr(k) = ir
285 indc(k) = jc
286
287 if (abs(m%x(jc, jc)) .lt. eps) then
288 call neko_error("matrix_inverse error: small Gauss Jordan Piv")
289 end if
290 piv = 1.0_xp/m%x(jc, jc)
291 m%x(jc, jc) = 1.0_xp
292 do j = 1, m%ncols
293 m%x(jc, j) = m%x(jc, j)*piv
294 end do
295
296 do j = 1, m%ncols
297 tmp = m%x(jc, j)
298 m%x(jc, j) = m%x(1 , j)
299 m%x(1 , j) = tmp
300 end do
301 do i = 2, m%nrows
302 rmult(i) = m%x(i, jc)
303 m%x(i, jc) = 0.0_rp
304 end do
305
306 do j = 1, m%ncols
307 do i = 2, m%nrows
308 m%x(i, j) = m%x(i, j) - rmult(i)*m%x(1, j)
309 end do
310 end do
311
312 do j = 1, m%ncols
313 tmp = m%x(jc, j)
314 m%x(jc, j) = m%x(1 , j)
315 m%x(1 , j) = tmp
316 end do
317 end do
318
319 ! Unscramble matrix
320 do j= m%nrows, 1, -1
321 if (indr(j) .ne. indc(j)) then
322 do i = 1, m%nrows
323 tmp = m%x(i, indr(j))
324 m%x(i, indr(j)) = m%x(i, indc(j))
325 m%x(i, indc(j)) = tmp
326 end do
327 end if
328 end do
329
330 return
331 end subroutine cpu_matrix_inverse
332
333end module matrix
Deassociate a Fortran array from a device pointer.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:107
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:423
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:474
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:739
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
Defines a matrix.
Definition matrix.f90:34
subroutine matrix_free(m)
Deallocate a matrix.
Definition matrix.f90:124
subroutine matrix_bcknd_inverse(m, bcknd)
Definition matrix.f90:226
pure integer function matrix_ncols(m)
Returns the number of columns in the matrix.
Definition matrix.f90:173
subroutine matrix_assign_matrix(m, w)
Assignment .
Definition matrix.f90:180
subroutine cpu_matrix_inverse(m)
Definition matrix.f90:240
pure integer function matrix_nrows(m)
Returns the number of rows in the matrix.
Definition matrix.f90:166
pure integer function matrix_size(m)
Returns the number of entries in the matrix.
Definition matrix.f90:143
subroutine matrix_copy_from(m, memdir, sync)
Easy way to copy between host and device.
Definition matrix.f90:154
subroutine matrix_assign_scalar(m, s)
Assignment .
Definition matrix.f90:210
subroutine matrix_init(m, nrows, ncols)
Initialise a matrix of size nrows*ncols.
Definition matrix.f90:87
subroutine matrix_allocate(m, nrows, ncols)
Allocate a matrix of size nrows*ncols.
Definition matrix.f90:103
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35