Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
matrix.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module matrix
36 use math, only: sub3, chsign, add3, cmult2, cadd2
37 use num_types, only: rp, xp
42 use utils, only: neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 type, public :: matrix_t
48 real(kind=rp), allocatable :: x(:,:)
49 type(c_ptr) :: x_d = c_null_ptr
50 integer, private :: nrows = 0
51 integer, private :: ncols = 0
52 integer, private :: n = 0
53 contains
55 procedure, pass(m) :: init => matrix_init
57 procedure, pass(m) :: free => matrix_free
59 procedure, pass(m) :: size => matrix_size
61 procedure, pass(m) :: copy_from => matrix_copy_from
63 procedure, pass(m) :: get_nrows => matrix_nrows
65 procedure, pass(m) :: get_ncols => matrix_ncols
67 procedure, pass(m) :: matrix_assign_matrix
69 procedure, pass(m) :: matrix_assign_scalar
71 procedure, pass(m) :: inverse => matrix_bcknd_inverse
72 procedure, pass(m) :: inverse_on_host => cpu_matrix_inverse
73
74 generic :: assignment(=) => matrix_assign_matrix, &
76
78 procedure, pass(m), private :: alloc => matrix_allocate
79 end type matrix_t
80
81 type, public :: matrix_ptr_t
82 type(matrix_t), pointer :: ptr
83 end type matrix_ptr_t
84
85contains
86
90 subroutine matrix_init(m, nrows, ncols)
91 class(matrix_t), intent(inout) :: m
92 integer, intent(in) :: nrows
93 integer, intent(in) :: ncols
94
95 call m%alloc(nrows, ncols)
96 m%x = 0.0_rp
97 if (neko_bcknd_device .eq. 1) then
98 call device_cfill(m%x_d, 0.0_rp, m%n)
99 end if
100
101 end subroutine matrix_init
102
106 subroutine matrix_allocate(m, nrows, ncols)
107 class(matrix_t), intent(inout) :: m
108 integer, intent(in) :: nrows
109 integer, intent(in) :: ncols
110
111 call m%free()
112
113 allocate(m%x(nrows, ncols))
114 m%nrows = nrows
115 m%ncols = ncols
116 m%n = nrows*ncols
117
118 if (neko_bcknd_device .eq. 1) then
119 call device_map(m%x, m%x_d, m%n)
120 call device_cfill(m%x_d, 0.0_rp, m%n)
121 call device_sync()
122 end if
123
124 end subroutine matrix_allocate
125
127 subroutine matrix_free(m)
128 class(matrix_t), intent(inout) :: m
129
130 if (allocated(m%x)) then
131 deallocate(m%x)
132 end if
133
134 if (c_associated(m%x_d)) then
135 call device_deassociate(m%x)
136 call device_free(m%x_d)
137 end if
138
139 m%nrows = 0
140 m%ncols = 0
141 m%n = 0
142
143 end subroutine matrix_free
144
146 pure function matrix_size(m) result(s)
147 class(matrix_t), intent(in) :: m
148 integer :: s
149 s = m%n
150 end function matrix_size
151
152
157 subroutine matrix_copy_from(m, memdir, sync)
158 class(matrix_t), intent(inout) :: m
159 integer, intent(in) :: memdir
160 logical, intent(in) :: sync
161
162 if (neko_bcknd_device .eq. 1) then
163 call device_memcpy(m%x, m%x_d, m%n, memdir, sync)
164 end if
165
166 end subroutine matrix_copy_from
167
169 pure function matrix_nrows(m) result(nr)
170 class(matrix_t), intent(in) :: m
171 integer :: nr
172 nr = m%nrows
173 end function matrix_nrows
174
176 pure function matrix_ncols(m) result(nc)
177 class(matrix_t), intent(in) :: m
178 integer :: nc
179 nc = m%ncols
180 end function matrix_ncols
181
183 subroutine matrix_assign_matrix(m, w)
184 class(matrix_t), intent(inout) :: m
185 type(matrix_t), intent(in) :: w
186
187 if (allocated(m%x)) then
188 call m%free()
189 end if
190
191 if (.not. allocated(m%x)) then
192
193 m%nrows = w%nrows
194 m%ncols = w%ncols
195 m%n = w%n
196 allocate(m%x(m%nrows, m%ncols))
197
198 if (neko_bcknd_device .eq. 1) then
199 call device_map(m%x, m%x_d, m%n)
200 end if
201
202 end if
203
204 if (neko_bcknd_device .eq. 1) then
205 call device_copy(m%x_d, w%x_d, m%n)
206 else
207 m%x = w%x
208 end if
209
210 end subroutine matrix_assign_matrix
211
213 subroutine matrix_assign_scalar(m, s)
214 class(matrix_t), intent(inout) :: m
215 real(kind=rp), intent(in) :: s
216
217 if (.not. allocated(m%x)) then
218 call neko_error('matrix not allocated')
219 end if
220
221 if (neko_bcknd_device .eq. 1) then
222 call device_cfill(m%x_d, s, m%n)
223 else
224 m%x = s
225 end if
226
227 end subroutine matrix_assign_scalar
228
229 subroutine matrix_bcknd_inverse(m, bcknd)
230 class(matrix_t), intent(inout) :: m
231 integer, optional :: bcknd
232
233 if (neko_bcknd_device .eq. 1 .and. &
234 bcknd .eq. neko_bcknd_device) then
235 call neko_error("matrix_bcknd_inverse not &
236 &implemented on accelarators.")
237 else
238 call cpu_matrix_inverse(m)
239 end if
240
241 end subroutine matrix_bcknd_inverse
242
243 subroutine cpu_matrix_inverse(m)
244 ! Gauss-Jordan matrix inversion with full pivoting
245 ! Num. Rec. p. 30, 2nd Ed., Fortran
246 ! m%x is an sqaure matrix
247 ! rmult is m work array of length nrows = ncols
248 class(matrix_t), intent(inout) :: m
249 integer :: indr(m%nrows), indc(m%ncols), ipiv(m%ncols)
250 real(kind=xp) :: rmult(m%nrows), amx, tmp, piv, eps
251 integer :: i, j, k, ir, jc
252
253 if (.not. (m%ncols .eq. m%nrows)) then
254 call neko_error("Fatal error: trying to invert m matrix that is not &
255 &square")
256 end if
257
258 eps = 1e-9_rp
259 ipiv = 0
260
261 do k = 1, m%nrows
262 amx = 0.0_rp
263 do i = 1, m%nrows ! Pivot search
264 if (ipiv(i) .ne. 1) then
265 do j = 1, m%nrows
266 if (ipiv(j) .eq. 0) then
267 if (abs(m%x(i, j)) .ge. amx) then
268 amx = abs(m%x(i, j))
269 ir = i
270 jc = j
271 end if
272 else if (ipiv(j) .gt. 1) then
273 return
274 end if
275 end do
276 end if
277 end do
278 ipiv(jc) = ipiv(jc) + 1
279
280 ! Swap rows
281 if (ir .ne. jc) then
282 do j = 1, m%ncols
283 tmp = m%x(ir, j)
284 m%x(ir, j) = m%x(jc, j)
285 m%x(jc, j) = tmp
286 end do
287 end if
288 indr(k) = ir
289 indc(k) = jc
290
291 if (abs(m%x(jc, jc)) .lt. eps) then
292 call neko_error("matrix_inverse error: small Gauss Jordan Piv")
293 end if
294 piv = 1.0_xp/m%x(jc, jc)
295 m%x(jc, jc) = 1.0_xp
296 do j = 1, m%ncols
297 m%x(jc, j) = m%x(jc, j)*piv
298 end do
299
300 do j = 1, m%ncols
301 tmp = m%x(jc, j)
302 m%x(jc, j) = m%x(1 , j)
303 m%x(1 , j) = tmp
304 end do
305 do i = 2, m%nrows
306 rmult(i) = m%x(i, jc)
307 m%x(i, jc) = 0.0_rp
308 end do
309
310 do j = 1, m%ncols
311 do i = 2, m%nrows
312 m%x(i, j) = m%x(i, j) - rmult(i)*m%x(1, j)
313 end do
314 end do
315
316 do j = 1, m%ncols
317 tmp = m%x(jc, j)
318 m%x(jc, j) = m%x(1 , j)
319 m%x(1 , j) = tmp
320 end do
321 end do
322
323 ! Unscramble matrix
324 do j= m%nrows, 1, -1
325 if (indr(j) .ne. indc(j)) then
326 do i = 1, m%nrows
327 tmp = m%x(i, indr(j))
328 m%x(i, indr(j)) = m%x(i, indc(j))
329 m%x(i, indc(j)) = tmp
330 end do
331 end if
332 end do
333
334 return
335 end subroutine cpu_matrix_inverse
336
337end module matrix
Deassociate a Fortran array from a device pointer.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:107
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:423
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:474
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:739
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
Defines a matrix.
Definition matrix.f90:34
subroutine matrix_free(m)
Deallocate a matrix.
Definition matrix.f90:128
subroutine matrix_bcknd_inverse(m, bcknd)
Definition matrix.f90:230
pure integer function matrix_ncols(m)
Returns the number of columns in the matrix.
Definition matrix.f90:177
subroutine matrix_assign_matrix(m, w)
Assignment .
Definition matrix.f90:184
subroutine cpu_matrix_inverse(m)
Definition matrix.f90:244
pure integer function matrix_nrows(m)
Returns the number of rows in the matrix.
Definition matrix.f90:170
pure integer function matrix_size(m)
Returns the number of entries in the matrix.
Definition matrix.f90:147
subroutine matrix_copy_from(m, memdir, sync)
Easy way to copy between host and device.
Definition matrix.f90:158
subroutine matrix_assign_scalar(m, s)
Assignment .
Definition matrix.f90:214
subroutine matrix_init(m, nrows, ncols)
Initialise a matrix of size nrows*ncols.
Definition matrix.f90:91
subroutine matrix_allocate(m, nrows, ncols)
Allocate a matrix of size nrows*ncols.
Definition matrix.f90:107
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35