Neko 0.9.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
matrix.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module matrix
36 use math, only: sub3, chsign, add3, cmult2, cadd2, copy
37 use num_types, only: rp
38 use device, only: device_map, device_free, c_ptr, c_null_ptr
41 use utils, only: neko_error
42 use, intrinsic :: iso_c_binding
43 implicit none
44 private
45
46 type, public :: matrix_t
47 real(kind=rp), allocatable :: x(:,:)
48 type(c_ptr) :: x_d = c_null_ptr
49 integer :: nrows = 0
50 integer :: ncols = 0
51 integer :: n = 0
52 contains
54 procedure, pass(m) :: init => matrix_init
56 procedure, pass(m) :: free => matrix_free
58 procedure, pass(m) :: size => matrix_size
60 procedure, pass(m) :: matrix_assign_matrix
62 procedure, pass(m) :: matrix_assign_scalar
64 procedure, pass(m) :: matrix_add_matrix
66 procedure, pass(m) :: matrix_add_scalar_left
68 procedure, pass(m) :: matrix_add_scalar_right
70 procedure, pass(m) :: matrix_sub_matrix
72 procedure, pass(m) :: matrix_sub_scalar_left
74 procedure, pass(m) :: matrix_sub_scalar_right
76 procedure, pass(m) :: matrix_cmult_left
78 procedure, pass(m) :: matrix_cmult_right
80 procedure, pass(m) :: inverse => matrix_bcknd_inverse
81
82 generic :: assignment(=) => matrix_assign_matrix, &
84 generic :: operator(+) => matrix_add_matrix, &
86 generic :: operator(-) => matrix_sub_matrix, &
88 generic :: operator(*) => matrix_cmult_left, matrix_cmult_right
89 end type matrix_t
90
91contains
92
96 subroutine matrix_init(m, nrows, ncols)
97 class(matrix_t), intent(inout) :: m
98 integer, intent(in) :: nrows
99 integer, intent(in) :: ncols
100
101 call m%free()
102
103 allocate(m%x(nrows, ncols))
104 m%x = 0.0_rp
105 m%nrows = nrows
106 m%ncols = ncols
107 m%n = nrows*ncols
108
109 if (neko_bcknd_device .eq. 1) then
110 call device_map(m%x, m%x_d, m%n)
111 call device_cfill(m%x_d, 0.0_rp, m%n)
112 end if
113
114 end subroutine matrix_init
115
117 subroutine matrix_free(m)
118 class(matrix_t), intent(inout) :: m
119
120 if (allocated(m%x)) then
121 deallocate(m%x)
122 end if
123
124 if (c_associated(m%x_d)) then
125 call device_free(m%x_d)
126 end if
127
128 m%nrows = 0
129 m%ncols = 0
130 m%n = 0
131
132 end subroutine matrix_free
133
135 function matrix_size(m) result(s)
136 class(matrix_t), intent(inout) :: m
137 integer :: s
138 s = m%n
139 end function matrix_size
140
142 subroutine matrix_assign_matrix(m, w)
143 class(matrix_t), intent(inout) :: m
144 type(matrix_t), intent(in) :: w
145
146 if (allocated(m%x)) then
147 call m%free()
148 end if
149
150 if (.not. allocated(m%x)) then
151
152 m%nrows = w%nrows
153 m%ncols = w%ncols
154 m%n = w%n
155 allocate(m%x(m%nrows, m%ncols))
156
157 if (neko_bcknd_device .eq. 1) then
158 call device_map(m%x, m%x_d, m%n)
159 end if
160
161 end if
162
163 if (neko_bcknd_device .eq. 1) then
164 call device_copy(m%x_d, w%x_d, m%n)
165 else
166 m%x = w%x
167 end if
168
169 end subroutine matrix_assign_matrix
170
172 subroutine matrix_assign_scalar(m, s)
173 class(matrix_t), intent(inout) :: m
174 real(kind=rp), intent(in) :: s
175
176 if (.not. allocated(m%x)) then
177 call neko_error('matrix not allocated')
178 end if
179
180 if (neko_bcknd_device .eq. 1) then
181 call device_cfill(m%x_d, s, m%n)
182 else
183 m%x = s
184 end if
185
186 end subroutine matrix_assign_scalar
187
189 function matrix_add_matrix(m, b) result(v)
190 class(matrix_t), intent(in) :: m, b
191 type(matrix_t) :: v
192
193 if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
194 call neko_error("Matrices must be the same size!")
195
196 v%n = m%n
197 v%nrows = m%nrows
198 v%ncols = m%ncols
199 allocate(v%x(v%nrows, v%ncols))
200
201 if (neko_bcknd_device .eq. 1) then
202 call device_map(v%x, v%x_d, v%n)
203 end if
204
205 if (neko_bcknd_device .eq. 1) then
206 call device_add3(v%x_d, m%x_d, b%x_d, v%n)
207 else
208 call add3(v%x, m%x, b%x, v%n)
209 end if
210
211 end function matrix_add_matrix
212
214 function matrix_add_scalar_left(m, c) result(v)
215 class(matrix_t), intent(in) :: m
216 real(kind=rp), intent(in) :: c
217 type(matrix_t) :: v
218
219 v%n = m%n
220 v%nrows = m%nrows
221 v%ncols = m%ncols
222 allocate(v%x(v%nrows, v%ncols))
223
224 if (neko_bcknd_device .eq. 1) then
225 call device_map(v%x, v%x_d, v%n)
226 end if
227
228 if (neko_bcknd_device .eq. 1) then
229 call device_cadd2(v%x_d, m%x_d, c, v%n)
230 else
231 call cadd2(v%x, m%x, c, v%n)
232 end if
233
234 end function matrix_add_scalar_left
235
237 function matrix_add_scalar_right(c, m) result(v)
238 real(kind=rp), intent(in) :: c
239 class(matrix_t), intent(in) :: m
240 type(matrix_t) :: v
241
242 v = matrix_add_scalar_left(m, c)
243
244 end function matrix_add_scalar_right
245
247 function matrix_sub_matrix(m, b) result(v)
248 class(matrix_t), intent(in) :: m, b
249 type(matrix_t) :: v
250
251 if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
252 call neko_error("Matrices must be the same size!")
253
254 v%n = m%n
255 v%nrows = m%nrows
256 v%ncols = m%ncols
257 allocate(v%x(v%nrows, v%ncols))
258
259 if (neko_bcknd_device .eq. 1) then
260 call device_map(v%x, v%x_d, v%n)
261 end if
262
263 if (neko_bcknd_device .eq. 1) then
264 call device_sub3(v%x_d, m%x_d, b%x_d, v%n)
265 else
266 call sub3(v%x, m%x, b%x, v%n)
267 end if
268
269 end function matrix_sub_matrix
270
272 function matrix_sub_scalar_left(m, c) result(v)
273 class(matrix_t), intent(in) :: m
274 real(kind=rp), intent(in) :: c
275 type(matrix_t) :: v
276
277 v%n = m%n
278 v%nrows = m%nrows
279 v%ncols = m%ncols
280 allocate(v%x(v%nrows, v%ncols))
281
282 if (neko_bcknd_device .eq. 1) then
283 call device_map(v%x, v%x_d, v%n)
284 end if
285
286 if (neko_bcknd_device .eq. 1) then
287 call device_cadd2(v%x_d, m%x_d, -1.0_rp*c, v%n)
288 else
289 call cadd2(v%x, m%x, -1.0_rp*c, m%n)
290 end if
291
292 end function matrix_sub_scalar_left
293
295 function matrix_sub_scalar_right(c, m) result(v)
296 real(kind=rp), intent(in) :: c
297 class(matrix_t), intent(in) :: m
298 type(matrix_t) :: v
299
300 v = matrix_sub_scalar_left(m, c)
301
302 if (neko_bcknd_device .eq. 1) then
303 call device_cmult(v%x_d, -1.0_rp, v%n)
304 else
305 call chsign(v%x, v%n)
306 end if
307
308 end function matrix_sub_scalar_right
309
311 function matrix_cmult_left(m, c) result(v)
312 class(matrix_t), intent(in) :: m
313 real(kind=rp), intent(in) :: c
314 type(matrix_t) :: v
315
316 v%n = m%n
317 v%nrows = m%nrows
318 v%ncols = m%ncols
319 allocate(v%x(v%nrows, v%ncols))
320
321 if (neko_bcknd_device .eq. 1) then
322 call device_map(v%x, v%x_d, v%n)
323 end if
324
325 if (neko_bcknd_device .eq. 1) then
326 call device_cmult2(v%x_d, m%x_d, c, v%n)
327 else
328 call cmult2(v%x, m%x, c, v%n)
329 end if
330
331 end function matrix_cmult_left
332
334 function matrix_cmult_right(c, m) result(v)
335 real(kind=rp), intent(in) :: c
336 class(matrix_t), intent(in) :: m
337 type(matrix_t) :: v
338
339 v = matrix_cmult_left(m, c)
340
341 end function matrix_cmult_right
342
344 class(matrix_t), intent(inout) :: m
345 if (neko_bcknd_device .eq. 1) then
346 call neko_error("matrix_bcknd_inverse not implemented on accelarators.")
347 else
348 call cpu_matrix_inverse(m)
349 end if
350 end subroutine matrix_bcknd_inverse
351
352 subroutine cpu_matrix_inverse(m)
353 ! Gauss-Jordan matrix inversion with full pivoting
354 ! Num. Rec. p. 30, 2nd Ed., Fortran
355 ! m%x is an sqaure matrix
356 ! rmult is m work array of length nrows = ncols
357 class(matrix_t), intent(inout) :: m
358 integer :: indr(m%nrows), indc(m%ncols), ipiv(m%ncols)
359 real(kind=rp) :: rmult(m%nrows), amx, tmp, piv, eps
360 integer :: i, j, k, ir, jc
361
362 if (.not. (m%ncols .eq. m%nrows)) then
363 call neko_error("Fatal error: trying to invert m matrix that is not &
364&square")
365 end if
366
367 eps = 1e-9_rp
368 ipiv = 0
369
370 do k = 1, m%nrows
371 amx = 0.0_rp
372 do i = 1, m%nrows ! Pivot search
373 if (ipiv(i) .ne. 1) then
374 do j = 1, m%nrows
375 if (ipiv(j) .eq. 0) then
376 if (abs(m%x(i, j)) .ge. amx) then
377 amx = abs(m%x(i, j))
378 ir = i
379 jc = j
380 end if
381 else if (ipiv(j) .gt. 1) then
382 return
383 end if
384 end do
385 end if
386 end do
387 ipiv(jc) = ipiv(jc) + 1
388
389 ! Swap rows
390 if (ir .ne. jc) then
391 do j = 1, m%ncols
392 tmp = m%x(ir, j)
393 m%x(ir, j) = m%x(jc, j)
394 m%x(jc, j) = tmp
395 end do
396 end if
397 indr(k) = ir
398 indc(k) = jc
399
400 if (abs(m%x(jc, jc)) .lt. eps) then
401 call neko_error("matrix_inverse error: small Gauss Jordan Piv")
402 end if
403 piv = 1.0_rp/m%x(jc, jc)
404 m%x(jc, jc) = 1.0_rp
405 do j = 1, m%ncols
406 m%x(jc, j) = m%x(jc, j)*piv
407 end do
408
409 do j = 1, m%ncols
410 tmp = m%x(jc, j)
411 m%x(jc, j) = m%x(1 , j)
412 m%x(1 , j) = tmp
413 end do
414 do i = 2, m%nrows
415 rmult(i) = m%x(i, jc)
416 m%x(i, jc) = 0.0_rp
417 end do
418
419 do j = 1, m%ncols
420 do i = 2, m%nrows
421 m%x(i, j) = m%x(i, j) - rmult(i)*m%x(1, j)
422 end do
423 end do
424
425 do j = 1, m%ncols
426 tmp = m%x(jc, j)
427 m%x(jc, j) = m%x(1 , j)
428 m%x(1 , j) = tmp
429 end do
430 end do
431
432 ! Unscramble matrix
433 do j= m%nrows, 1, -1
434 if (indr(j) .ne. indc(j)) then
435 do i = 1, m%nrows
436 tmp = m%x(i, indr(j))
437 m%x(i, indr(j)) = m%x(i, indc(j))
438 m%x(i, indc(j)) = tmp
439 end do
440 end if
441 end do
442
443 return
444 end subroutine cpu_matrix_inverse
445
446end module matrix
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:701
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:335
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:600
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:642
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:440
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:239
Defines a matrix.
Definition matrix.f90:34
subroutine matrix_free(m)
Deallocate a matrix.
Definition matrix.f90:118
type(matrix_t) function matrix_sub_matrix(m, b)
Matrix-matrix subtraction .
Definition matrix.f90:248
subroutine matrix_assign_matrix(m, w)
Assignment .
Definition matrix.f90:143
type(matrix_t) function matrix_add_scalar_left(m, c)
Matrix-scalar addition .
Definition matrix.f90:215
type(matrix_t) function matrix_sub_scalar_right(c, m)
Scalar-matrix subtraction .
Definition matrix.f90:296
type(matrix_t) function matrix_add_matrix(m, b)
Matrix-matrix addition .
Definition matrix.f90:190
type(matrix_t) function matrix_add_scalar_right(c, m)
Scalar-matrix addition .
Definition matrix.f90:238
type(matrix_t) function matrix_cmult_right(c, m)
Scalar-matrix multiplication .
Definition matrix.f90:335
subroutine matrix_bcknd_inverse(m)
Definition matrix.f90:344
integer function matrix_size(m)
Returns the number of entries in the matrix.
Definition matrix.f90:136
subroutine cpu_matrix_inverse(m)
Definition matrix.f90:353
type(matrix_t) function matrix_cmult_left(m, c)
Matrix-scalar multiplication .
Definition matrix.f90:312
type(matrix_t) function matrix_sub_scalar_left(m, c)
Matrix-scalar subtraction .
Definition matrix.f90:273
subroutine matrix_assign_scalar(m, s)
Assignment .
Definition matrix.f90:173
subroutine matrix_init(m, nrows, ncols)
Initialise a matrix of size nrows*ncols.
Definition matrix.f90:97
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35