Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
matrix.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module matrix
36 use math, only: sub3, chsign, add3, cmult2, cadd2
37 use num_types, only: rp
38 use device, only: device_map, device_free, c_ptr, c_null_ptr
41 use utils, only: neko_error
42 use, intrinsic :: iso_c_binding
43 implicit none
44 private
45
46 type, public :: matrix_t
47 real(kind=rp), allocatable :: x(:,:)
48 type(c_ptr) :: x_d = c_null_ptr
49 integer :: nrows = 0
50 integer :: ncols = 0
51 integer :: n = 0
52 contains
54 procedure, pass(m) :: init => matrix_init
56 procedure, pass(m) :: free => matrix_free
58 procedure, pass(m) :: size => matrix_size
60 procedure, pass(m) :: matrix_assign_matrix
62 procedure, pass(m) :: matrix_assign_scalar
64 procedure, pass(m) :: matrix_add_matrix
66 procedure, pass(m) :: matrix_add_scalar_left
68 procedure, pass(m) :: matrix_add_scalar_right
70 procedure, pass(m) :: matrix_sub_matrix
72 procedure, pass(m) :: matrix_sub_scalar_left
74 procedure, pass(m) :: matrix_sub_scalar_right
76 procedure, pass(m) :: matrix_cmult_left
78 procedure, pass(m) :: matrix_cmult_right
80 procedure, pass(m) :: inverse => matrix_bcknd_inverse
81
82 generic :: assignment(=) => matrix_assign_matrix, &
84 generic :: operator(+) => matrix_add_matrix, &
86 generic :: operator(-) => matrix_sub_matrix, &
88 generic :: operator(*) => matrix_cmult_left, matrix_cmult_right
89 end type matrix_t
90
91contains
92
96 subroutine matrix_init(m, nrows, ncols)
97 class(matrix_t), intent(inout) :: m
98 integer, intent(in) :: nrows
99 integer, intent(in) :: ncols
100
101 call m%free()
102
103 allocate(m%x(nrows, ncols))
104 m%x = 0.0_rp
105 m%nrows = nrows
106 m%ncols = ncols
107 m%n = nrows*ncols
108
109 if (neko_bcknd_device .eq. 1) then
110 call device_map(m%x, m%x_d, m%n)
111 call device_cfill(m%x_d, 0.0_rp, m%n)
112 end if
113
114 end subroutine matrix_init
115
117 subroutine matrix_free(m)
118 class(matrix_t), intent(inout) :: m
119
120 if (allocated(m%x)) then
121 deallocate(m%x)
122 end if
123
124 if (c_associated(m%x_d)) then
125 call device_free(m%x_d)
126 end if
127
128 m%nrows = 0
129 m%ncols = 0
130 m%n = 0
131
132 end subroutine matrix_free
133
135 function matrix_size(m) result(s)
136 class(matrix_t), intent(inout) :: m
137 integer :: s
138 s = m%n
139 end function matrix_size
140
142 subroutine matrix_assign_matrix(m, w)
143 class(matrix_t), intent(inout) :: m
144 type(matrix_t), intent(in) :: w
145
146 if (allocated(m%x)) then
147 call m%free()
148 end if
149
150 if (.not. allocated(m%x)) then
151
152 m%nrows = w%nrows
153 m%ncols = w%ncols
154 m%n = w%n
155 allocate(m%x(m%nrows, m%ncols))
156
157 if (neko_bcknd_device .eq. 1) then
158 call device_map(m%x, m%x_d, m%n)
159 end if
160
161 end if
162
163 if (neko_bcknd_device .eq. 1) then
164 call device_copy(m%x_d, w%x_d, m%n)
165 else
166 m%x = w%x
167 end if
168
169 end subroutine matrix_assign_matrix
170
172 subroutine matrix_assign_scalar(m, s)
173 class(matrix_t), intent(inout) :: m
174 real(kind=rp), intent(in) :: s
175
176 if (.not. allocated(m%x)) then
177 call neko_error('matrix not allocated')
178 end if
179
180 if (neko_bcknd_device .eq. 1) then
181 call device_cfill(m%x_d, s, m%n)
182 else
183 m%x = s
184 end if
185
186 end subroutine matrix_assign_scalar
187
189 function matrix_add_matrix(m, b) result(v)
190 class(matrix_t), intent(in) :: m, b
191 type(matrix_t) :: v
192
193 if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
194 call neko_error("Matrices must be the same size!")
195
196 v%n = m%n
197 v%nrows = m%nrows
198 v%ncols = m%ncols
199 allocate(v%x(v%nrows, v%ncols))
200
201 if (neko_bcknd_device .eq. 1) then
202 call device_map(v%x, v%x_d, v%n)
203 end if
204
205 if (neko_bcknd_device .eq. 1) then
206 call device_add3(v%x_d, m%x_d, b%x_d, v%n)
207 else
208 call add3(v%x, m%x, b%x, v%n)
209 end if
210
211 end function matrix_add_matrix
212
214 function matrix_add_scalar_left(m, c) result(v)
215 class(matrix_t), intent(in) :: m
216 real(kind=rp), intent(in) :: c
217 type(matrix_t) :: v
218
219 v%n = m%n
220 v%nrows = m%nrows
221 v%ncols = m%ncols
222 allocate(v%x(v%nrows, v%ncols))
223
224 if (neko_bcknd_device .eq. 1) then
225 call device_map(v%x, v%x_d, v%n)
226 end if
227
228 if (neko_bcknd_device .eq. 1) then
229 call device_cadd2(v%x_d, m%x_d, c, v%n)
230 else
231 call cadd2(v%x, m%x, c, v%n)
232 end if
233
234 end function matrix_add_scalar_left
235
237 function matrix_add_scalar_right(c, m) result(v)
238 real(kind=rp), intent(in) :: c
239 class(matrix_t), intent(in) :: m
240 type(matrix_t) :: v
241
242 v = matrix_add_scalar_left(m, c)
243
244 end function matrix_add_scalar_right
245
247 function matrix_sub_matrix(m, b) result(v)
248 class(matrix_t), intent(in) :: m, b
249 type(matrix_t) :: v
250
251 if (m%nrows .ne. b%nrows .or. m%ncols .ne. b%ncols) &
252 call neko_error("Matrices must be the same size!")
253
254 v%n = m%n
255 v%nrows = m%nrows
256 v%ncols = m%ncols
257 allocate(v%x(v%nrows, v%ncols))
258
259 if (neko_bcknd_device .eq. 1) then
260 call device_map(v%x, v%x_d, v%n)
261 end if
262
263 if (neko_bcknd_device .eq. 1) then
264 call device_sub3(v%x_d, m%x_d, b%x_d, v%n)
265 else
266 call sub3(v%x, m%x, b%x, v%n)
267 end if
268
269 end function matrix_sub_matrix
270
272 function matrix_sub_scalar_left(m, c) result(v)
273 class(matrix_t), intent(in) :: m
274 real(kind=rp), intent(in) :: c
275 type(matrix_t) :: v
276
277 v%n = m%n
278 v%nrows = m%nrows
279 v%ncols = m%ncols
280 allocate(v%x(v%nrows, v%ncols))
281
282 if (neko_bcknd_device .eq. 1) then
283 call device_map(v%x, v%x_d, v%n)
284 end if
285
286 if (neko_bcknd_device .eq. 1) then
287 call device_cadd2(v%x_d, m%x_d, -1.0_rp*c, v%n)
288 else
289 call cadd2(v%x, m%x, -1.0_rp*c, m%n)
290 end if
291
292 end function matrix_sub_scalar_left
293
295 function matrix_sub_scalar_right(c, m) result(v)
296 real(kind=rp), intent(in) :: c
297 class(matrix_t), intent(in) :: m
298 type(matrix_t) :: v
299
300 v = matrix_sub_scalar_left(m, c)
301
302 if (neko_bcknd_device .eq. 1) then
303 call device_cmult(v%x_d, -1.0_rp, v%n)
304 else
305 call chsign(v%x, v%n)
306 end if
307
308 end function matrix_sub_scalar_right
309
311 function matrix_cmult_left(m, c) result(v)
312 class(matrix_t), intent(in) :: m
313 real(kind=rp), intent(in) :: c
314 type(matrix_t) :: v
315
316 v%n = m%n
317 v%nrows = m%nrows
318 v%ncols = m%ncols
319 allocate(v%x(v%nrows, v%ncols))
320
321 if (neko_bcknd_device .eq. 1) then
322 call device_map(v%x, v%x_d, v%n)
323 end if
324
325 if (neko_bcknd_device .eq. 1) then
326 call device_cmult2(v%x_d, m%x_d, c, v%n)
327 else
328 call cmult2(v%x, m%x, c, v%n)
329 end if
330
331 end function matrix_cmult_left
332
334 function matrix_cmult_right(c, m) result(v)
335 real(kind=rp), intent(in) :: c
336 class(matrix_t), intent(in) :: m
337 type(matrix_t) :: v
338
339 v = matrix_cmult_left(m, c)
340
341 end function matrix_cmult_right
342
343 subroutine matrix_bcknd_inverse(m, bcknd)
344 class(matrix_t), intent(inout) :: m
345 integer, optional :: bcknd
346
347 if (neko_bcknd_device .eq. 1 .and. &
348 bcknd .eq. neko_bcknd_device) then
349 call neko_error("matrix_bcknd_inverse not &
350 &implemented on accelarators.")
351 else
352 call cpu_matrix_inverse(m)
353 end if
354
355 end subroutine matrix_bcknd_inverse
356
357 subroutine cpu_matrix_inverse(m)
358 ! Gauss-Jordan matrix inversion with full pivoting
359 ! Num. Rec. p. 30, 2nd Ed., Fortran
360 ! m%x is an sqaure matrix
361 ! rmult is m work array of length nrows = ncols
362 class(matrix_t), intent(inout) :: m
363 integer :: indr(m%nrows), indc(m%ncols), ipiv(m%ncols)
364 real(kind=rp) :: rmult(m%nrows), amx, tmp, piv, eps
365 integer :: i, j, k, ir, jc
366
367 if (.not. (m%ncols .eq. m%nrows)) then
368 call neko_error("Fatal error: trying to invert m matrix that is not &
369&square")
370 end if
371
372 eps = 1e-9_rp
373 ipiv = 0
374
375 do k = 1, m%nrows
376 amx = 0.0_rp
377 do i = 1, m%nrows ! Pivot search
378 if (ipiv(i) .ne. 1) then
379 do j = 1, m%nrows
380 if (ipiv(j) .eq. 0) then
381 if (abs(m%x(i, j)) .ge. amx) then
382 amx = abs(m%x(i, j))
383 ir = i
384 jc = j
385 end if
386 else if (ipiv(j) .gt. 1) then
387 return
388 end if
389 end do
390 end if
391 end do
392 ipiv(jc) = ipiv(jc) + 1
393
394 ! Swap rows
395 if (ir .ne. jc) then
396 do j = 1, m%ncols
397 tmp = m%x(ir, j)
398 m%x(ir, j) = m%x(jc, j)
399 m%x(jc, j) = tmp
400 end do
401 end if
402 indr(k) = ir
403 indc(k) = jc
404
405 if (abs(m%x(jc, jc)) .lt. eps) then
406 call neko_error("matrix_inverse error: small Gauss Jordan Piv")
407 end if
408 piv = 1.0_rp/m%x(jc, jc)
409 m%x(jc, jc) = 1.0_rp
410 do j = 1, m%ncols
411 m%x(jc, j) = m%x(jc, j)*piv
412 end do
413
414 do j = 1, m%ncols
415 tmp = m%x(jc, j)
416 m%x(jc, j) = m%x(1 , j)
417 m%x(1 , j) = tmp
418 end do
419 do i = 2, m%nrows
420 rmult(i) = m%x(i, jc)
421 m%x(i, jc) = 0.0_rp
422 end do
423
424 do j = 1, m%ncols
425 do i = 2, m%nrows
426 m%x(i, j) = m%x(i, j) - rmult(i)*m%x(1, j)
427 end do
428 end do
429
430 do j = 1, m%ncols
431 tmp = m%x(jc, j)
432 m%x(jc, j) = m%x(1 , j)
433 m%x(1 , j) = tmp
434 end do
435 end do
436
437 ! Unscramble matrix
438 do j= m%nrows, 1, -1
439 if (indr(j) .ne. indc(j)) then
440 do i = 1, m%nrows
441 tmp = m%x(i, indr(j))
442 m%x(i, indr(j)) = m%x(i, indc(j))
443 m%x(i, indc(j)) = tmp
444 end do
445 end if
446 end do
447
448 return
449 end subroutine cpu_matrix_inverse
450
451end module matrix
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:700
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:334
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:599
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:641
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:439
Defines a matrix.
Definition matrix.f90:34
subroutine matrix_free(m)
Deallocate a matrix.
Definition matrix.f90:118
type(matrix_t) function matrix_sub_matrix(m, b)
Matrix-matrix subtraction .
Definition matrix.f90:248
subroutine matrix_bcknd_inverse(m, bcknd)
Definition matrix.f90:344
subroutine matrix_assign_matrix(m, w)
Assignment .
Definition matrix.f90:143
type(matrix_t) function matrix_add_scalar_left(m, c)
Matrix-scalar addition .
Definition matrix.f90:215
type(matrix_t) function matrix_sub_scalar_right(c, m)
Scalar-matrix subtraction .
Definition matrix.f90:296
type(matrix_t) function matrix_add_matrix(m, b)
Matrix-matrix addition .
Definition matrix.f90:190
type(matrix_t) function matrix_add_scalar_right(c, m)
Scalar-matrix addition .
Definition matrix.f90:238
type(matrix_t) function matrix_cmult_right(c, m)
Scalar-matrix multiplication .
Definition matrix.f90:335
integer function matrix_size(m)
Returns the number of entries in the matrix.
Definition matrix.f90:136
subroutine cpu_matrix_inverse(m)
Definition matrix.f90:358
type(matrix_t) function matrix_cmult_left(m, c)
Matrix-scalar multiplication .
Definition matrix.f90:312
type(matrix_t) function matrix_sub_scalar_left(m, c)
Matrix-scalar subtraction .
Definition matrix.f90:273
subroutine matrix_assign_scalar(m, s)
Assignment .
Definition matrix.f90:173
subroutine matrix_init(m, nrows, ncols)
Initialise a matrix of size nrows*ncols.
Definition matrix.f90:97
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35