Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
60module math
61 use num_types, only : rp, dp, sp, qp, i4, xp
63 use mpi_f08, only : mpi_min, mpi_max, mpi_sum, mpi_in_place, mpi_integer, &
64 mpi_allreduce
65 use utils, only : nonlinear_index
66 implicit none
67 private
68
70 real(kind=rp), public, parameter :: neko_eps = epsilon(1.0_rp)
71
73 real(kind=rp), public, parameter :: neko_m_ln2 = log(2.0_rp)
74
76 real(kind=rp), public, parameter :: pi = 4._rp*atan(1._rp)
77
78 interface abscmp
79 module procedure sabscmp, dabscmp, qabscmp
80 end interface abscmp
81
82 interface sort
83 module procedure sortrp, sorti4
84 end interface sort
85
86 interface swap
87 module procedure swapdp, swapi4
88 end interface swap
89
90 interface reord
91 module procedure reorddp, reordi4
92 end interface reord
93
94 interface flipv
95 module procedure flipvdp, flipvi4
96 end interface flipv
97
98 interface relcmp
99 module procedure srelcmp, drelcmp, qrelcmp
100 end interface relcmp
101
102 public :: abscmp, rzero, izero, row_zero, rone, copy, cmult, cadd, cfill, &
110 matinv39, &
115
116contains
117
119 pure function sabscmp(x, y, tol)
120 real(kind=sp), intent(in) :: x
121 real(kind=sp), intent(in) :: y
122 real(kind=sp), intent(in), optional :: tol
123 logical :: sabscmp
124
125 if (present(tol)) then
126 sabscmp = abs(x - y) .lt. tol
127 else
128 sabscmp = abs(x - y) .lt. neko_eps
129 end if
130
131 end function sabscmp
132
134 pure function dabscmp(x, y, tol)
135 real(kind=dp), intent(in) :: x
136 real(kind=dp), intent(in) :: y
137 real(kind=dp), intent(in), optional :: tol
138 logical :: dabscmp
139
140 if (present(tol)) then
141 dabscmp = abs(x - y) .lt. tol
142 else
143 dabscmp = abs(x - y) .lt. neko_eps
144 end if
145
146 end function dabscmp
147
149 pure function qabscmp(x, y, tol)
150 real(kind=qp), intent(in) :: x
151 real(kind=qp), intent(in) :: y
152 real(kind=qp), intent(in), optional :: tol
153 logical :: qabscmp
154
155 if (present(tol)) then
156 qabscmp = abs(x - y) .lt. tol
157 else
158 qabscmp = abs(x - y) .lt. neko_eps
159 end if
160
161 end function qabscmp
162
164 pure function srelcmp(x, y, eps)
165 real(kind=sp), intent(in) :: x
166 real(kind=sp), intent(in) :: y
167 real(kind=sp), intent(in), optional :: eps
168 logical :: srelcmp
169 if (present(eps)) then
170 srelcmp = abs(x - y) .le. eps*abs(y)
171 else
172 srelcmp = abs(x - y) .le. neko_eps*abs(y)
173 end if
174
175 end function srelcmp
176
178 pure function drelcmp(x, y, eps)
179 real(kind=dp), intent(in) :: x
180 real(kind=dp), intent(in) :: y
181 real(kind=dp), intent(in), optional :: eps
182 logical :: drelcmp
183 if (present(eps)) then
184 drelcmp = abs(x - y) .le. eps*abs(y)
185 else
186 drelcmp = abs(x - y) .le. neko_eps*abs(y)
187 end if
188
189 end function drelcmp
190
191
193 pure function qrelcmp(x, y, eps)
194 real(kind=qp), intent(in) :: x
195 real(kind=qp), intent(in) :: y
196 real(kind=qp), intent(in), optional :: eps
197 logical :: qrelcmp
198 if (present(eps)) then
199 qrelcmp = abs(x - y)/abs(y) .lt. eps
200 else
201 qrelcmp = abs(x - y)/abs(y) .lt. neko_eps
202 end if
203
204 end function qrelcmp
205
211 pure function lambert_w0(x, niter) result(w)
212 real(kind=rp), intent(in) :: x
213 integer, intent(in) :: niter
214 real(kind=rp) :: w
215 real(kind=rp) :: a
216 integer :: k
217
218 if (x == 0.0_rp) then
219 w = 0.0_rp
220 return
221 end if
222
223 a = 1.0_rp / (1.0_rp + 0.5_rp * log(1.0_rp + x))
224 w = log(1.0_rp + a * x)
225
226 do k = 1, max(niter, 0)
227 w = w / (1.0_rp + w) * (1.0_rp + log(x / w))
228 end do
229 end function lambert_w0
230
232 subroutine rzero(a, n)
233 integer, intent(in) :: n
234 real(kind=rp), dimension(n), intent(inout) :: a
235 integer :: i
236
237 !$omp parallel do
238 do i = 1, n
239 a(i) = 0.0_rp
240 end do
241 !$omp end parallel do
242
243 end subroutine rzero
244
246 subroutine izero(a, n)
247 integer, intent(in) :: n
248 integer, dimension(n), intent(inout) :: a
249 integer :: i
250
251 !$omp parallel do
252 do i = 1, n
253 a(i) = 0
254 end do
255 !$omp end parallel do
256
257 end subroutine izero
258
260 subroutine row_zero(a, m, n, e)
261 integer, intent(in) :: m, n, e
262 real(kind=rp), intent(inout) :: a(m,n)
263 integer :: j
264
265 !$omp parallel do
266 do j = 1, n
267 a(e,j) = 0.0_rp
268 end do
269 !$omp end parallel do
270
271 end subroutine row_zero
272
274 subroutine rone(a, n)
275 integer, intent(in) :: n
276 real(kind=rp), dimension(n), intent(inout) :: a
277 integer :: i
278
279 !$omp parallel do
280 do i = 1, n
281 a(i) = 1.0_rp
282 end do
283 !$omp end parallel do
284
285 end subroutine rone
286
288 subroutine copy(a, b, n)
289 integer, intent(in) :: n
290 real(kind=rp), dimension(n), intent(in) :: b
291 real(kind=rp), dimension(n), intent(inout) :: a
292 integer :: i
293
294 !$omp parallel do
295 do i = 1, n
296 a(i) = b(i)
297 end do
298 !$omp end parallel do
299
300 end subroutine copy
301
309 subroutine masked_copy_0(a, b, mask, n, n_mask)
310 integer, intent(in) :: n, n_mask
311 real(kind=rp), dimension(n), intent(in) :: b
312 real(kind=rp), dimension(n), intent(inout) :: a
313 integer, dimension(0:n_mask) :: mask
314 integer :: i, j
315
316 !$omp parallel do private(i, j)
317 do i = 1, n_mask
318 j = mask(i)
319 a(j) = b(j)
320 end do
321 !$omp end parallel do
322
323 end subroutine masked_copy_0
324
332 subroutine masked_copy(a, b, mask, n, n_mask)
333 integer, intent(in) :: n, n_mask
334 real(kind=rp), dimension(n), intent(in) :: b
335 real(kind=rp), dimension(n), intent(inout) :: a
336 integer, dimension(n_mask) :: mask
337 integer :: i, j
338
339 !$omp parallel do private(i, j)
340 do i = 1, n_mask
341 j = mask(i)
342 a(j) = b(j)
343 end do
344 !$omp end parallel do
345
346 end subroutine masked_copy
347
357 subroutine masked_gather_copy_0(a, b, mask, n, n_mask)
358 integer, intent(in) :: n, n_mask
359 real(kind=rp), dimension(n), intent(in) :: b
360 real(kind=rp), dimension(n_mask), intent(inout) :: a
361 integer, dimension(0:n_mask) :: mask
362 integer :: i, j
363
364 !$omp parallel do private(i, j)
365 do i = 1, n_mask
366 j = mask(i)
367 a(i) = b(j)
368 end do
369 !$omp end parallel do
370
371 end subroutine masked_gather_copy_0
372
382 subroutine face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
383 integer, intent(in) :: lx, ly, lz, n_mask
384 real(kind=rp), dimension(n_mask), intent(inout) :: a
385 real(kind=rp), dimension(:, :, :, :), intent(in) :: b
386 integer, dimension(0:n_mask), intent(in) :: mask
387 integer, dimension(0:n_mask), intent(in) :: facet
388 integer :: l
389 integer :: idx(4)
390
391 !$omp parallel do private(l, idx)
392 do l = 1, n_mask
393 idx = nonlinear_index(mask(l), lx, ly, lz)
394
395 select case (facet(l))
396 case (1, 2)
397 a(l) = b(idx(2), idx(3), facet(l), idx(4))
398 case (3, 4)
399 a(l) = b(idx(1), idx(3), facet(l), idx(4))
400 case (5, 6)
401 a(l) = b(idx(1), idx(2), facet(l), idx(4))
402 end select
403 end do
404 !$omp end parallel do
405
406 end subroutine face_masked_gather_copy_0
407
417 subroutine masked_gather_copy(a, b, mask, n, n_mask)
418 integer, intent(in) :: n, n_mask
419 real(kind=rp), dimension(n), intent(in) :: b
420 real(kind=rp), dimension(n_mask), intent(inout) :: a
421 integer, dimension(n_mask) :: mask
422 integer :: i, j
423
424 !$omp parallel do private(i, j)
425 do i = 1, n_mask
426 j = mask(i)
427 a(i) = b(j)
428 end do
429 !$omp end parallel do
430
431 end subroutine masked_gather_copy
432
442 subroutine masked_scatter_copy_0(a, b, mask, n, n_mask)
443 integer, intent(in) :: n, n_mask
444 real(kind=rp), dimension(n_mask), intent(in) :: b
445 real(kind=rp), dimension(n), intent(inout) :: a
446 integer, dimension(0:n_mask) :: mask
447 integer :: i, j
448
449 !$omp parallel do private(i, j)
450 do i = 1, n_mask
451 j = mask(i)
452 a(j) = b(i)
453 end do
454 !$omp end parallel do
455
456 end subroutine masked_scatter_copy_0
457
467 subroutine masked_scatter_copy(a, b, mask, n, n_mask)
468 integer, intent(in) :: n, n_mask
469 real(kind=rp), dimension(n_mask), intent(in) :: b
470 real(kind=rp), dimension(n), intent(inout) :: a
471 integer, dimension(n_mask) :: mask
472 integer :: i, j
473
474 !$omp parallel do private(i, j)
475 do i = 1, n_mask
476 j = mask(i)
477 a(j) = b(i)
478 end do
479 !$omp end parallel do
480
481 end subroutine masked_scatter_copy
482
485 subroutine cfill_mask(a, c, n, mask, n_mask)
486 integer, intent(in) :: n, n_mask
487 real(kind=rp), dimension(n), intent(inout) :: a
488 real(kind=rp), intent(in) :: c
489 integer, dimension(n_mask), intent(in) :: mask
490 integer :: i
491
492 !$omp parallel do
493 do i = 1, n_mask
494 a(mask(i)) = c
495 end do
496 !$omp end parallel do
497
498 end subroutine cfill_mask
499
501 subroutine cmult(a, c, n)
502 integer, intent(in) :: n
503 real(kind=rp), dimension(n), intent(inout) :: a
504 real(kind=rp), intent(in) :: c
505 integer :: i
506
507 !$omp parallel do
508 do i = 1, n
509 a(i) = c * a(i)
510 end do
511 !$omp end parallel do
512
513 end subroutine cmult
514
516 subroutine cmult2(a, b, c, n)
517 integer, intent(in) :: n
518 real(kind=rp), dimension(n), intent(inout) :: a
519 real(kind=rp), dimension(n), intent(in) :: b
520 real(kind=rp), intent(in) :: c
521 integer :: i
522
523 !$omp parallel do
524 do i = 1, n
525 a(i) = c * b(i)
526 end do
527 !$omp end parallel do
528
529 end subroutine cmult2
530
532 subroutine cdiv(a, c, n)
533 integer, intent(in) :: n
534 real(kind=rp), dimension(n), intent(inout) :: a
535 real(kind=rp), intent(in) :: c
536 integer :: i
537
538 !$omp parallel do
539 do i = 1, n
540 a(i) = c / a(i)
541 end do
542 !$omp end parallel do
543
544 end subroutine cdiv
545
547 subroutine cdiv2(a, b, c, n)
548 integer, intent(in) :: n
549 real(kind=rp), dimension(n), intent(inout) :: a
550 real(kind=rp), dimension(n), intent(in) :: b
551 real(kind=rp), intent(in) :: c
552 integer :: i
553
554 !$omp parallel do
555 do i = 1, n
556 a(i) = c / b(i)
557 end do
558 !$omp end parallel do
559
560 end subroutine cdiv2
561
563 subroutine cadd(a, s, n)
564 integer, intent(in) :: n
565 real(kind=rp), dimension(n), intent(inout) :: a
566 real(kind=rp), intent(in) :: s
567 integer :: i
568
569 !$omp parallel do
570 do i = 1, n
571 a(i) = a(i) + s
572 end do
573 !$omp end parallel do
574
575 end subroutine cadd
576
578 subroutine cadd2(a, b, s, n)
579 integer, intent(in) :: n
580 real(kind=rp), dimension(n), intent(inout) :: a
581 real(kind=rp), dimension(n), intent(in) :: b
582 real(kind=rp), intent(in) :: s
583 integer :: i
584
585 !$omp parallel do
586 do i = 1,n
587 a(i) = b(i) + s
588 end do
589 !$omp end parallel do
590
591 end subroutine cadd2
592
594 subroutine cfill(a, c, n)
595 integer, intent(in) :: n
596 real(kind=rp), dimension(n), intent(inout) :: a
597 real(kind=rp), intent(in) :: c
598 integer :: i
599
600 !$omp parallel do
601 do i = 1, n
602 a(i) = c
603 end do
604 !$omp end parallel do
605
606 end subroutine cfill
607
609 subroutine cwrap(a, min_val, max_val, n)
610 integer, intent(in) :: n
611 real(kind=rp), dimension(n), intent(inout) :: a
612 real(kind=rp), intent(in) :: min_val, max_val
613 integer :: i
614
615 if (n .lt. 1 .or. max_val .le. min_val) return
616
617 !$omp parallel do
618 do i = 1, n
619 a(i) = modulo(a(i) - min_val, max_val - min_val) + min_val
620 end do
621 !$omp end parallel do
622
623 end subroutine cwrap
624
626 function glsum(a, n)
627 integer, intent(in) :: n
628 real(kind=rp), dimension(n) :: a
629 real(kind=rp) :: glsum
630 real(kind=xp) :: tmp
631 integer :: i, ierr
632
633 tmp = 0.0_rp
634 !$omp parallel do reduction(+:tmp)
635 do i = 1, n
636 tmp = tmp + a(i)
637 end do
638 !$omp end parallel do
639
640 call mpi_allreduce(mpi_in_place, tmp, 1, &
641 mpi_extra_precision, mpi_sum, neko_comm, ierr)
642 glsum = tmp
643
644 end function glsum
645
647 function glmax(a, n)
648 integer, intent(in) :: n
649 real(kind=rp), dimension(n) :: a
650 real(kind=rp) :: tmp, glmax
651 integer :: i, ierr
652
653 tmp = -huge(0.0_rp)
654 !$omp parallel do reduction(max:tmp)
655 do i = 1, n
656 tmp = max(tmp,a(i))
657 end do
658 !$omp end parallel do
659
660 call mpi_allreduce(tmp, glmax, 1, &
661 mpi_real_precision, mpi_max, neko_comm, ierr)
662
663 end function glmax
664
666 function glimax(a, n)
667 integer, intent(in) :: n
668 integer, dimension(n) :: a
669 integer :: tmp, glimax
670 integer :: i, ierr
671
672 tmp = -huge(0)
673 !$omp parallel do reduction(max:tmp)
674 do i = 1, n
675 tmp = max(tmp,a(i))
676 end do
677 !$omp end parallel do
678
679 call mpi_allreduce(tmp, glimax, 1, &
680 mpi_integer, mpi_max, neko_comm, ierr)
681
682 end function glimax
683
685 function glmin(a, n)
686 integer, intent(in) :: n
687 real(kind=rp), dimension(n) :: a
688 real(kind=rp) :: tmp, glmin
689 integer :: i, ierr
690
691 tmp = huge(0.0_rp)
692 !$omp parallel do reduction(min:tmp)
693 do i = 1, n
694 tmp = min(tmp,a(i))
695 end do
696 !$omp end parallel do
697
698 call mpi_allreduce(tmp, glmin, 1, &
699 mpi_real_precision, mpi_min, neko_comm, ierr)
700
701 end function glmin
702
704 function glimin(a, n)
705 integer, intent(in) :: n
706 integer, dimension(n) :: a
707 integer :: tmp, glimin
708 integer :: i, ierr
709
710 tmp = huge(0)
711 !$omp parallel do reduction(min:tmp)
712 do i = 1, n
713 tmp = min(tmp,a(i))
714 end do
715 !$omp end parallel do
716
717 call mpi_allreduce(tmp, glimin, 1, &
718 mpi_integer, mpi_min, neko_comm, ierr)
719
720 end function glimin
721
723 subroutine chsign(a, n)
724 integer, intent(in) :: n
725 real(kind=rp), dimension(n), intent(inout) :: a
726 integer :: i
727
728 !$omp parallel do
729 do i = 1, n
730 a(i) = -a(i)
731 end do
732 !$omp end parallel do
733
734 end subroutine chsign
735
737 function vlmax(vec,n) result(tmax)
738 integer :: n, i
739 real(kind=rp), intent(in) :: vec(n)
740 real(kind=rp) :: tmax
741
742 tmax = real(-99d20, rp)
743 !$omp parallel do reduction(max:tmax)
744 do i = 1, n
745 tmax = max(tmax, vec(i))
746 end do
747 !$omp end parallel do
748
749 end function vlmax
750
752 function vlmin(vec,n) result(tmin)
753 integer, intent(in) :: n
754 real(kind=rp), intent(in) :: vec(n)
755 real(kind=rp) :: tmin
756 integer :: i
757
758 tmin = real(99.0e20, rp)
759 !$omp parallel do reduction(min:tmin)
760 do i = 1, n
761 tmin = min(tmin, vec(i))
762 end do
763 !$omp end parallel do
764
765 end function vlmin
766
768 subroutine invcol1(a, n)
769 integer, intent(in) :: n
770 real(kind=rp), dimension(n), intent(inout) :: a
771 integer :: i
772
773 !$omp parallel do
774 do i = 1, n
775 a(i) = 1.0_xp / real(a(i), xp)
776 end do
777 !$omp end parallel do
778
779 end subroutine invcol1
780
782 subroutine invcol3(a, b, c, n)
783 integer, intent(in) :: n
784 real(kind=rp), dimension(n), intent(inout) :: a
785 real(kind=rp), dimension(n), intent(in) :: b, c
786 integer :: i
787
788 !$omp parallel do
789 do i = 1, n
790 a(i) = real(b(i), xp) / c(i)
791 end do
792 !$omp end parallel do
793
794 end subroutine invcol3
795
797 subroutine invers2(a, b, n)
798 integer, intent(in) :: n
799 real(kind=rp), dimension(n), intent(inout) :: a
800 real(kind=rp), dimension(n), intent(in) :: b
801 integer :: i
802
803 !$omp parallel do
804 do i = 1, n
805 a(i) = 1.0_xp / real(b(i), xp)
806 end do
807 !$omp end parallel do
808
809 end subroutine invers2
810
813 subroutine vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
814 integer, intent(in) :: n
815 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
816 real(kind=rp), dimension(n), intent(in) :: w1, w2, w3
817 real(kind=rp), dimension(n), intent(out) :: u1, u2, u3
818 integer :: i
819
820 !$omp parallel do
821 do i = 1, n
822 u1(i) = v2(i)*w3(i) - v3(i)*w2(i)
823 u2(i) = v3(i)*w1(i) - v1(i)*w3(i)
824 u3(i) = v1(i)*w2(i) - v2(i)*w1(i)
825 end do
826 !$omp end parallel do
827
828 end subroutine vcross
829
832 subroutine vdot2(dot, u1, u2, v1, v2, n)
833 integer, intent(in) :: n
834 real(kind=rp), dimension(n), intent(in) :: u1, u2
835 real(kind=rp), dimension(n), intent(in) :: v1, v2
836 real(kind=rp), dimension(n), intent(out) :: dot
837 integer :: i
838
839 !$omp parallel do
840 do i = 1, n
841 dot(i) = u1(i)*v1(i) + u2(i)*v2(i)
842 end do
843 !$omp end parallel do
844
845 end subroutine vdot2
846
849 subroutine vdot3(dot, u1, u2, u3, v1, v2, v3, n)
850 integer, intent(in) :: n
851 real(kind=rp), dimension(n), intent(in) :: u1, u2, u3
852 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
853 real(kind=rp), dimension(n), intent(out) :: dot
854 integer :: i
855
856 !$omp parallel do
857 do i = 1, n
858 dot(i) = u1(i)*v1(i) + u2(i)*v2(i) + u3(i)*v3(i)
859 end do
860 !$omp end parallel do
861
862 end subroutine vdot3
863
865 function vlsc3(u, v, w, n) result(s)
866 integer, intent(in) :: n
867 real(kind=rp), dimension(n), intent(in) :: u, v, w
868 real(kind=rp) :: s
869 integer :: i
870
871 s = 0.0_rp
872 !$omp parallel do reduction(+:s)
873 do i = 1, n
874 s = s + u(i)*v(i)*w(i)
875 end do
876 !$omp end parallel do
877
878 end function vlsc3
879
881 function vlsc2(u, v, n) result(s)
882 integer, intent(in) :: n
883 real(kind=rp), dimension(n), intent(in) :: u, v
884 real(kind=rp) :: s
885 integer :: i
886
887 s = 0.0_rp
888 !$omp parallel do reduction(+:s)
889 do i = 1, n
890 s = s + u(i)*v(i)
891 end do
892 !$omp end parallel do
893
894 end function vlsc2
895
897 subroutine add2(a, b, n)
898 integer, intent(in) :: n
899 real(kind=rp), dimension(n), intent(inout) :: a
900 real(kind=rp), dimension(n), intent(in) :: b
901 integer :: i
902
903 !$omp parallel do
904 do i = 1, n
905 a(i) = a(i) + b(i)
906 end do
907 !$omp end parallel do
908
909 end subroutine add2
910
912 subroutine add3(a, b, c, n)
913 integer, intent(in) :: n
914 real(kind=rp), dimension(n), intent(inout) :: a
915 real(kind=rp), dimension(n), intent(in) :: b
916 real(kind=rp), dimension(n), intent(in) :: c
917 integer :: i
918
919 !$omp parallel do
920 do i = 1, n
921 a(i) = b(i) + c(i)
922 end do
923 !$omp end parallel do
924
925 end subroutine add3
926
928 subroutine add4(a, b, c, d, n)
929 integer, intent(in) :: n
930 real(kind=rp), dimension(n), intent(out) :: a
931 real(kind=rp), dimension(n), intent(in) :: d
932 real(kind=rp), dimension(n), intent(in) :: c
933 real(kind=rp), dimension(n), intent(in) :: b
934 integer :: i
935
936 !$omp parallel do
937 do i = 1, n
938 a(i) = b(i) + c(i) + d(i)
939 end do
940 !$omp end parallel do
941
942 end subroutine add4
943
945 subroutine sub2(a, b, n)
946 integer, intent(in) :: n
947 real(kind=rp), dimension(n), intent(inout) :: a
948 real(kind=rp), dimension(n), intent(in) :: b
949 integer :: i
950
951 !$omp parallel do
952 do i = 1, n
953 a(i) = a(i) - b(i)
954 end do
955 !$omp end parallel do
956
957 end subroutine sub2
958
960 subroutine sub3(a, b, c, n)
961 integer, intent(in) :: n
962 real(kind=rp), dimension(n), intent(inout) :: a
963 real(kind=rp), dimension(n), intent(in) :: b
964 real(kind=rp), dimension(n), intent(in) :: c
965 integer :: i
966
967 !$omp parallel do
968 do i = 1, n
969 a(i) = b(i) - c(i)
970 end do
971 !$omp end parallel do
972
973 end subroutine sub3
974
975
978 subroutine add2s1(a, b, c1, n)
979 integer, intent(in) :: n
980 real(kind=rp), dimension(n), intent(inout) :: a
981 real(kind=rp), dimension(n), intent(in) :: b
982 real(kind=rp), intent(in) :: c1
983 integer :: i
984
985 !$omp parallel do
986 do i = 1, n
987 a(i) = c1 * a(i) + b(i)
988 end do
989 !$omp end parallel do
990
991 end subroutine add2s1
992
995 subroutine add2s2(a, b, c1, n)
996 integer, intent(in) :: n
997 real(kind=rp), dimension(n), intent(inout) :: a
998 real(kind=rp), dimension(n), intent(in) :: b
999 real(kind=rp), intent(in) :: c1
1000 integer :: i
1001
1002 !$omp parallel do
1003 do i = 1, n
1004 a(i) = a(i) + c1 * b(i)
1005 end do
1006 !$omp end parallel do
1007
1008 end subroutine add2s2
1009
1011 subroutine addsqr2s2(a, b, c1, n)
1012 integer, intent(in) :: n
1013 real(kind=rp), dimension(n), intent(inout) :: a
1014 real(kind=rp), dimension(n), intent(in) :: b
1015 real(kind=rp), intent(in) :: c1
1016 integer :: i
1017
1018 !$omp parallel do
1019 do i = 1, n
1020 a(i) = a(i) + c1 * ( b(i) * b(i) )
1021 end do
1022 !$omp end parallel do
1023
1024 end subroutine addsqr2s2
1025
1027 subroutine invcol2(a, b, n)
1028 integer, intent(in) :: n
1029 real(kind=rp), dimension(n), intent(inout) :: a
1030 real(kind=rp), dimension(n), intent(in) :: b
1031 integer :: i
1032
1033 !$omp parallel do
1034 do i = 1, n
1035 a(i) = real(a(i), xp) / b(i)
1036 end do
1037 !$omp end parallel do
1038
1039 end subroutine invcol2
1040
1041
1043 subroutine col2(a, b, n)
1044 integer, intent(in) :: n
1045 real(kind=rp), dimension(n), intent(inout) :: a
1046 real(kind=rp), dimension(n), intent(in) :: b
1047 integer :: i
1048
1049 !$omp parallel do
1050 do i = 1, n
1051 a(i) = a(i) * b(i)
1052 end do
1053 !$omp end parallel do
1054
1055 end subroutine col2
1056
1058 subroutine col3(a, b, c, n)
1059 integer, intent(in) :: n
1060 real(kind=rp), dimension(n), intent(inout) :: a
1061 real(kind=rp), dimension(n), intent(in) :: b
1062 real(kind=rp), dimension(n), intent(in) :: c
1063 integer :: i
1064
1065 !$omp parallel do
1066 do i = 1, n
1067 a(i) = b(i) * c(i)
1068 end do
1069 !$omp end parallel do
1070
1071 end subroutine col3
1072
1074 subroutine subcol3(a, b, c, n)
1075 integer, intent(in) :: n
1076 real(kind=rp), dimension(n), intent(inout) :: a
1077 real(kind=rp), dimension(n), intent(in) :: b
1078 real(kind=rp), dimension(n), intent(in) :: c
1079 integer :: i
1080
1081 !$omp parallel do
1082 do i = 1, n
1083 a(i) = a(i) - b(i) * c(i)
1084 end do
1085 !$omp end parallel do
1086
1087 end subroutine subcol3
1088
1090 subroutine add3s2(a, b, c, c1, c2 ,n)
1091 integer, intent(in) :: n
1092 real(kind=rp), dimension(n), intent(inout) :: a
1093 real(kind=rp), dimension(n), intent(in) :: b
1094 real(kind=rp), dimension(n), intent(in) :: c
1095 real(kind=rp), intent(in) :: c1, c2
1096 integer :: i
1097
1098 !$omp parallel do
1099 do i = 1, n
1100 a(i) = c1 * b(i) + c2 * c(i)
1101 end do
1102 !$omp end parallel do
1103
1104 end subroutine add3s2
1105
1107 subroutine add4s3(a, b, c, d, c1, c2, c3, n)
1108 integer, intent(in) :: n
1109 real(kind=rp), dimension(n), intent(inout) :: a
1110 real(kind=rp), dimension(n), intent(in) :: b
1111 real(kind=rp), dimension(n), intent(in) :: c
1112 real(kind=rp), dimension(n), intent(in) :: d
1113 real(kind=rp), intent(in) :: c1, c2, c3
1114 integer :: i
1115
1116 !$omp parallel do
1117 do i = 1, n
1118 a(i) = c1 * b(i) + c2 * c(i) + c3 * d(i)
1119 end do
1120 !$omp end parallel do
1121
1122 end subroutine add4s3
1123
1125 subroutine add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
1126 integer, intent(in) :: n
1127 real(kind=rp), dimension(n), intent(inout) :: a
1128 real(kind=rp), dimension(n), intent(in) :: b
1129 real(kind=rp), dimension(n), intent(in) :: c
1130 real(kind=rp), dimension(n), intent(in) :: d
1131 real(kind=rp), dimension(n), intent(in) :: e
1132 real(kind=rp), intent(in) :: c1, c2, c3, c4
1133 integer :: i
1134
1135 !$omp parallel do
1136 do i = 1, n
1137 a(i) = a(i) + c1 * b(i) + c2 * c(i) + c3 * d(i) + c4 * e(i)
1138 end do
1139 !$omp end parallel do
1140
1141 end subroutine add5s4
1142
1144 subroutine subcol4(a, b, c, d, n)
1145 integer, intent(in) :: n
1146 real(kind=rp), dimension(n), intent(inout) :: a
1147 real(kind=rp), dimension(n), intent(in) :: b
1148 real(kind=rp), dimension(n), intent(in) :: c
1149 real(kind=rp), dimension(n), intent(in) :: d
1150 integer :: i
1151
1152 !$omp parallel do
1153 do i = 1, n
1154 a(i) = a(i) - b(i) * c(i) * d(i)
1155 end do
1156 !$omp end parallel do
1157
1158 end subroutine subcol4
1159
1161 subroutine addcol3(a, b, c, n)
1162 integer, intent(in) :: n
1163 real(kind=rp), dimension(n), intent(inout) :: a
1164 real(kind=rp), dimension(n), intent(in) :: b
1165 real(kind=rp), dimension(n), intent(in) :: c
1166 integer :: i
1167
1168 !$omp parallel do
1169 do i = 1, n
1170 a(i) = a(i) + b(i) * c(i)
1171 end do
1172 !$omp end parallel do
1173
1174 end subroutine addcol3
1175
1177 subroutine addcol4(a, b, c, d, n)
1178 integer, intent(in) :: n
1179 real(kind=rp), dimension(n), intent(inout) :: a
1180 real(kind=rp), dimension(n), intent(in) :: b
1181 real(kind=rp), dimension(n), intent(in) :: c
1182 real(kind=rp), dimension(n), intent(in) :: d
1183 integer :: i
1184
1185 !$omp parallel do
1186 do i = 1, n
1187 a(i) = a(i) + b(i) * c(i) * d(i)
1188 end do
1189 !$omp end parallel do
1190
1191 end subroutine addcol4
1192
1194 subroutine addcol3s2(a, b, c, s, n)
1195 integer, intent(in) :: n
1196 real(kind=rp), dimension(n), intent(inout) :: a
1197 real(kind=rp), dimension(n), intent(in) :: b
1198 real(kind=rp), dimension(n), intent(in) :: c
1199 real(kind=rp), intent(in) :: s
1200 integer :: i
1201
1202 !$omp parallel do
1203 do i = 1, n
1204 a(i) = a(i) + s * b(i) * c(i)
1205 end do
1206 !$omp end parallel do
1207
1208 end subroutine addcol3s2
1209
1211 subroutine ascol5(a, b, c, d, e, n)
1212 integer, intent(in) :: n
1213 real(kind=rp), dimension(n), intent(inout) :: a
1214 real(kind=rp), dimension(n), intent(in) :: b
1215 real(kind=rp), dimension(n), intent(in) :: c
1216 real(kind=rp), dimension(n), intent(in) :: d
1217 real(kind=rp), dimension(n), intent(in) :: e
1218 integer :: i
1219
1220 !$omp parallel do
1221 do i = 1, n
1222 a(i) = b(i)*c(i) - d(i)*e(i)
1223 end do
1224 !$omp end parallel do
1225
1226 end subroutine ascol5
1227
1229 subroutine p_update(a, b, c, c1, c2, n)
1230 integer, intent(in) :: n
1231 real(kind=rp), dimension(n), intent(inout) :: a
1232 real(kind=rp), dimension(n), intent(in) :: b
1233 real(kind=rp), dimension(n), intent(in) :: c
1234 real(kind=rp), intent(in) :: c1, c2
1235 integer :: i
1236
1237 !$omp parallel do
1238 do i = 1, n
1239 a(i) = b(i) + c1*(a(i)-c2*c(i))
1240 end do
1241 !$omp end parallel do
1242
1243 end subroutine p_update
1244
1246 subroutine x_update(a, b, c, c1, c2, n)
1247 integer, intent(in) :: n
1248 real(kind=rp), dimension(n), intent(inout) :: a
1249 real(kind=rp), dimension(n), intent(in) :: b
1250 real(kind=rp), dimension(n), intent(in) :: c
1251 real(kind=rp), intent(in) :: c1, c2
1252 integer :: i
1253
1254 !$omp parallel do
1255 do i = 1, n
1256 a(i) = a(i) + c1*b(i)+c2*c(i)
1257 end do
1258 !$omp end parallel do
1259
1260 end subroutine x_update
1261
1263 function glsc2(a, b, n)
1264 integer, intent(in) :: n
1265 real(kind=rp), dimension(n), intent(in) :: a
1266 real(kind=rp), dimension(n), intent(in) :: b
1267 real(kind=rp) :: glsc2
1268 real(kind=xp) :: tmp
1269 integer :: i, ierr
1270
1271 tmp = 0.0_xp
1272 !$omp parallel do reduction(+:tmp)
1273 do i = 1, n
1274 tmp = tmp + a(i) * b(i)
1275 end do
1276 !$omp end parallel do
1277
1278 call mpi_allreduce(mpi_in_place, tmp, 1, &
1279 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1280 glsc2 = tmp
1281 end function glsc2
1282
1284 function glsc3(a, b, c, n)
1285 integer, intent(in) :: n
1286 real(kind=rp), dimension(n), intent(in) :: a
1287 real(kind=rp), dimension(n), intent(in) :: b
1288 real(kind=rp), dimension(n), intent(in) :: c
1289 real(kind=rp) :: glsc3
1290 real(kind=xp) :: tmp
1291 integer :: i, ierr
1292
1293 tmp = 0.0_xp
1294 !$omp parallel do reduction(+:tmp)
1295 do i = 1, n
1296 tmp = tmp + a(i) * b(i) * c(i)
1297 end do
1298 !$omp end parallel do
1299
1300 call mpi_allreduce(mpi_in_place, tmp, 1, &
1301 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1302 glsc3 = tmp
1303
1304 end function glsc3
1305 function glsc4(a, b, c, d, n)
1306 integer, intent(in) :: n
1307 real(kind=rp), dimension(n), intent(in) :: a
1308 real(kind=rp), dimension(n), intent(in) :: b
1309 real(kind=rp), dimension(n), intent(in) :: c
1310 real(kind=rp), dimension(n), intent(in) :: d
1311 real(kind=rp) :: glsc4
1312 real(kind=xp) :: tmp
1313 integer :: i, ierr
1314
1315 tmp = 0.0_xp
1316 !$omp parallel do reduction(+:tmp)
1317 do i = 1, n
1318 tmp = tmp + a(i) * b(i) * c(i) * d(i)
1319 end do
1320 !$omp end parallel do
1321
1322 call mpi_allreduce(mpi_in_place, tmp, 1, &
1323 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1324 glsc4 = tmp
1325
1326 end function glsc4
1327
1330 function glsubnorm(a, b, n)
1331 integer, intent(in) :: n
1332 real(kind=rp), dimension(n), intent(in) :: a
1333 real(kind=rp), dimension(n), intent(in) :: b
1334 real(kind=rp) :: glsubnorm
1335 real(kind=xp) :: tmp
1336 integer :: i, ierr
1337
1338 tmp = 0.0_xp
1339 !$omp parallel do reduction(+:tmp)
1340 do i = 1, n
1341 tmp = tmp + (a(i) - b(i))**2
1342 end do
1343 !$omp end parallel do
1344
1345 call mpi_allreduce(mpi_in_place, tmp, 1, &
1346 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1347 glsubnorm = sqrt(tmp)
1348
1349 end function glsubnorm
1350
1356 subroutine sortrp(a, ind, n)
1357 integer, intent(in) :: n
1358 real(kind=rp), intent(inout) :: a(n)
1359 integer, intent(out) :: ind(n)
1360 real(kind=rp) :: aa
1361 integer :: j, ir, i, ii, l
1362
1363 do j = 1, n
1364 ind(j) = j
1365 end do
1366
1367 if (n .le. 1) return
1368
1369
1370 l = n/2+1
1371 ir = n
1372 do while (.true.)
1373 if (l .gt. 1) then
1374 l = l-1
1375 aa = a(l)
1376 ii = ind(l)
1377 else
1378 aa = a(ir)
1379 ii = ind(ir)
1380 a(ir) = a(1)
1381 ind(ir) = ind(1)
1382 ir = ir - 1
1383 if (ir .eq. 1) then
1384 a(1) = aa
1385 ind(1) = ii
1386 return
1387 end if
1388 end if
1389 i = l
1390 j = l+l
1391 do while (j .le. ir)
1392 if (j .lt. ir) then
1393 if ( a(j) .lt. a(j+1) ) j = j + 1
1394 end if
1395 if (aa .lt. a(j)) then
1396 a(i) = a(j)
1397 ind(i) = ind(j)
1398 i = j
1399 j = j+j
1400 else
1401 j = ir+1
1402 end if
1403 end do
1404 a(i) = aa
1405 ind(i) = ii
1406 end do
1407 end subroutine sortrp
1408
1414 subroutine sorti4(a, ind, n)
1415 integer, intent(in) :: n
1416 integer(i4), intent(inout) :: a(n)
1417 integer, intent(out) :: ind(n)
1418 integer(i4) :: aa
1419 integer :: j, ir, i, ii, l
1420
1421 do j = 1, n
1422 ind(j) = j
1423 end do
1424
1425 if (n .le. 1) return
1426
1427 l = n/2+1
1428 ir = n
1429 do while (.true.)
1430 if (l .gt. 1) then
1431 l = l - 1
1432 aa = a(l)
1433 ii = ind(l)
1434 else
1435 aa = a(ir)
1436 ii = ind(ir)
1437 a(ir) = a( 1)
1438 ind(ir) = ind( 1)
1439 ir = ir - 1
1440 if (ir .eq. 1) then
1441 a(1) = aa
1442 ind(1) = ii
1443 return
1444 end if
1445 end if
1446 i = l
1447 j = l + l
1448 do while (j .le. ir)
1449 if (j .lt. ir) then
1450 if ( a(j) .lt. a(j + 1) ) j = j + 1
1451 end if
1452 if (aa .lt. a(j)) then
1453 a(i) = a(j)
1454 ind(i) = ind(j)
1455 i = j
1456 j = j + j
1457 else
1458 j = ir + 1
1459 end if
1460 end do
1461 a(i) = aa
1462 ind(i) = ii
1463 end do
1464 end subroutine sorti4
1465
1470 subroutine swapdp(b, ind, n)
1471 integer, intent(in) :: n
1472 real(kind=rp), intent(inout) :: b(n)
1473 integer, intent(in) :: ind(n)
1474 real(kind=rp) :: temp(n)
1475 integer :: i, jj
1476
1477 !$omp parallel private(i, jj)
1478 !$omp do
1479 do i = 1, n
1480 temp(i) = b(i)
1481 end do
1482 !$omp end do
1483 !$omp do
1484 do i = 1, n
1485 jj = ind(i)
1486 b(i) = temp(jj)
1487 end do
1488 !$omp end do
1489 !$omp end parallel
1490
1491 end subroutine swapdp
1492
1497 subroutine swapi4(b, ind, n)
1498 integer, intent(in) :: n
1499 integer(i4), intent(inout) :: b(n)
1500 integer, intent(in) :: ind(n)
1501 integer(i4) :: temp(n)
1502 integer :: i, jj
1503
1504 !$omp parallel private(i, jj)
1505 !$omp do
1506 do i = 1, n
1507 temp(i) = b(i)
1508 end do
1509 !$omp end do
1510 !$omp do
1511 do i = 1, n
1512 jj = ind(i)
1513 b(i) = temp(jj)
1514 end do
1515 !$omp end do
1516 !$omp end parallel
1517
1518 end subroutine swapi4
1519
1524 subroutine reorddp(b, ind, n)
1525 integer, intent(in) :: n
1526 real(kind=rp), intent(inout) :: b(n)
1527 integer, intent(in) :: ind(n)
1528 real(kind=rp) :: temp(n)
1529 integer :: i, jj
1530
1531 !$omp parallel private(i, jj)
1532 !$omp do
1533 do i = 1, n
1534 temp(i) = b(i)
1535 end do
1536 !$omp end do
1537 !$omp do
1538 do i = 1, n
1539 jj = ind(i)
1540 b(jj) = temp(i)
1541 end do
1542 !$omp end do
1543 !$omp end parallel
1544
1545 end subroutine reorddp
1546
1551 subroutine reordi4(b, ind, n)
1552 integer, intent(in) :: n
1553 integer(i4), intent(inout) :: b(n)
1554 integer, intent(in) :: ind(n)
1555 integer(i4) :: temp(n)
1556 integer :: i, jj
1557
1558 !$omp parallel private(i, jj)
1559 !$omp do
1560 do i = 1, n
1561 temp(i) = b(i)
1562 end do
1563 !$omp end do
1564 !$omp do
1565 do i = 1, n
1566 jj = ind(i)
1567 b(jj) = temp(i)
1568 end do
1569 !$omp end do
1570 !$omp end parallel
1571
1572 end subroutine reordi4
1573
1578 subroutine flipvdp(b, ind, n)
1579 integer, intent(in) :: n
1580 real(kind=rp), intent(inout) :: b(n)
1581 integer, intent(inout) :: ind(n)
1582 real(kind=rp) :: temp(n)
1583 integer :: tempind(n)
1584 integer :: i, jj
1585
1586 !$omp parallel private(i, jj)
1587 !$omp do
1588 do i = 1, n
1589 jj = n+1-i
1590 temp(jj) = b(i)
1591 tempind(jj) = ind(i)
1592 end do
1593 !$omp end do
1594 !$omp do
1595 do i = 1,n
1596 b(i) = temp(i)
1597 ind(i) = tempind(i)
1598 end do
1599 !$omp end do
1600 !$omp end parallel
1601
1602 end subroutine flipvdp
1603
1608 subroutine flipvi4(b, ind, n)
1609 integer, intent(in) :: n
1610 integer(i4), intent(inout) :: b(n)
1611 integer, intent(inout) :: ind(n)
1612 integer(i4) :: temp(n)
1613 integer :: tempind(n)
1614 integer :: i, jj
1615
1616 !$omp parallel private(i, jj)
1617 !$omp do
1618 do i = 1, n
1619 jj = n+1-i
1620 temp(jj) = b(i)
1621 tempind(jj) = ind(i)
1622 end do
1623 !$omp end do
1624 !$omp do
1625 do i = 1,n
1626 b(i) = temp(i)
1627 ind(i) = tempind(i)
1628 end do
1629 !$omp end do
1630 !$omp end parallel
1631
1632 end subroutine flipvi4
1633
1637 subroutine absval(a, n)
1638 integer, intent(in) :: n
1639 real(kind=rp), dimension(n), intent(inout) :: a
1640 integer :: i
1641
1642 !$omp parallel do
1643 do i = 1, n
1644 a(i) = abs(a(i))
1645 end do
1646 !$omp end parallel do
1647
1648 end subroutine absval
1649
1650 ! ========================================================================== !
1651 ! Point-wise operations
1652
1654 subroutine pwmax2(a, b, n)
1655 integer, intent(in) :: n
1656 real(kind=rp), dimension(n), intent(inout) :: a
1657 real(kind=rp), dimension(n), intent(in) :: b
1658 integer :: i
1659
1660 !$omp parallel do
1661 do i = 1, n
1662 a(i) = max(a(i), b(i))
1663 end do
1664 !$omp end parallel do
1665
1666 end subroutine pwmax2
1667
1669 subroutine pwmax3(a, b, c, n)
1670 integer, intent(in) :: n
1671 real(kind=rp), dimension(n), intent(inout) :: a
1672 real(kind=rp), dimension(n), intent(in) :: b, c
1673 integer :: i
1674
1675 !$omp parallel do
1676 do i = 1, n
1677 a(i) = max(b(i), c(i))
1678 end do
1679 !$omp end parallel do
1680
1681 end subroutine pwmax3
1682
1684 subroutine cpwmax2(a, b, n)
1685 integer, intent(in) :: n
1686 real(kind=rp), dimension(n), intent(inout) :: a
1687 real(kind=rp), intent(in) :: b
1688 integer :: i
1689
1690 !$omp parallel do
1691 do i = 1, n
1692 a(i) = max(a(i), b)
1693 end do
1694 !$omp end parallel do
1695
1696 end subroutine cpwmax2
1697
1699 subroutine cpwmax3(a, b, c, n)
1700 integer, intent(in) :: n
1701 real(kind=rp), dimension(n), intent(inout) :: a
1702 real(kind=rp), dimension(n), intent(in) :: b
1703 real(kind=rp), intent(in) :: c
1704 integer :: i
1705
1706 !$omp parallel do
1707 do i = 1, n
1708 a(i) = max(b(i), c)
1709 end do
1710 !$omp end parallel do
1711
1712 end subroutine cpwmax3
1713
1715 subroutine pwmin2(a, b, n)
1716 integer, intent(in) :: n
1717 real(kind=rp), dimension(n), intent(inout) :: a
1718 real(kind=rp), dimension(n), intent(in) :: b
1719 integer :: i
1720
1721 !$omp parallel do
1722 do i = 1, n
1723 a(i) = min(a(i), b(i))
1724 end do
1725 !$omp end parallel do
1726
1727 end subroutine pwmin2
1728
1730 subroutine pwmin3(a, b, c, n)
1731 integer, intent(in) :: n
1732 real(kind=rp), dimension(n), intent(inout) :: a
1733 real(kind=rp), dimension(n), intent(in) :: b, c
1734 integer :: i
1735
1736 !$omp parallel do
1737 do i = 1, n
1738 a(i) = min(b(i), c(i))
1739 end do
1740 !$omp end parallel do
1741
1742 end subroutine pwmin3
1743
1745 subroutine cpwmin2(a, b, n)
1746 integer, intent(in) :: n
1747 real(kind=rp), dimension(n), intent(inout) :: a
1748 real(kind=rp), intent(in) :: b
1749 integer :: i
1750
1751 !$omp parallel do
1752 do i = 1, n
1753 a(i) = min(a(i), b)
1754 end do
1755 !$omp end parallel do
1756
1757 end subroutine cpwmin2
1758
1760 subroutine cpwmin3(a, b, c, n)
1761 integer, intent(in) :: n
1762 real(kind=rp), dimension(n), intent(inout) :: a
1763 real(kind=rp), dimension(n), intent(in) :: b
1764 real(kind=rp), intent(in) :: c
1765 integer :: i
1766
1767 !$omp parallel do
1768 do i = 1, n
1769 a(i) = min(b(i), c)
1770 end do
1771 !$omp end parallel do
1772
1773 end subroutine cpwmin3
1774
1775 ! M33INV and M44INV by David G. Simpson pure function version from
1776 ! https://fortranwiki.org/fortran/show/Matrix+inversion
1777 ! Invert 3x3 matrix
1778 function matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33) &
1779 result(b)
1780 real(kind=rp), intent(in) :: a11, a12, a13, a21, a22, a23, a31, a32, a33
1781 real(xp) :: a(3,3) !! Matrix
1782 real(rp) :: b(3,3) !! Inverse matrix
1783 a(1,1) = a11
1784 a(1,2) = a12
1785 a(1,3) = a13
1786 a(2,1) = a21
1787 a(2,2) = a22
1788 a(2,3) = a23
1789 a(3,1) = a31
1790 a(3,2) = a32
1791 a(3,3) = a33
1792 b = matinv3(a)
1793 end function matinv39
1794
1799 function matinv3(A) result(B)
1800 !! Performs a direct calculation of the inverse of a 3×3 matrix.
1801 real(kind=xp), intent(in) :: a(3,3) !! Matrix
1802 real(kind=xp) :: b(3,3) !! Inverse matrix
1803 real(kind=xp) :: detinv
1804
1805 ! Calculate the inverse determinant of the matrix
1806 ! first index x,y,z, second r, s, t
1807 detinv = 1.0_xp / real(a(1,1)*a(2,2)*a(3,3) - a(1,1)*a(2,3)*a(3,2) &
1808 - a(1,2)*a(2,1)*a(3,3) + a(1,2)*a(2,3)*a(3,1)&
1809 + a(1,3)*a(2,1)*a(3,2) - a(1,3)*a(2,2)*a(3,1), xp)
1810 ! Calculate the inverse of the matrix
1811 ! first index r, s, t, second x, y, z
1812 b(1,1) = +detinv * (a(2,2)*a(3,3) - a(2,3)*a(3,2))
1813 b(2,1) = -detinv * (a(2,1)*a(3,3) - a(2,3)*a(3,1))
1814 b(3,1) = +detinv * (a(2,1)*a(3,2) - a(2,2)*a(3,1))
1815 b(1,2) = -detinv * (a(1,2)*a(3,3) - a(1,3)*a(3,2))
1816 b(2,2) = +detinv * (a(1,1)*a(3,3) - a(1,3)*a(3,1))
1817 b(3,2) = -detinv * (a(1,1)*a(3,2) - a(1,2)*a(3,1))
1818 b(1,3) = +detinv * (a(1,2)*a(2,3) - a(1,3)*a(2,2))
1819 b(2,3) = -detinv * (a(1,1)*a(2,3) - a(1,3)*a(2,1))
1820 b(3,3) = +detinv * (a(1,1)*a(2,2) - a(1,2)*a(2,1))
1821 end function matinv3
1822
1825 function math_stepf(x) result(val)
1826 real(kind=rp), intent(in) :: x
1827 real(kind=rp) :: val
1828 real(kind=rp), parameter :: xdmin = 0.0001_rp
1829 real(kind=rp), parameter :: xdmax = 0.9999_rp
1830 real(kind=rp) :: g
1831
1832 if (x <= xdmin) then
1833 ! Below the lower bound, the function is 0
1834 val = 0.0_rp
1835 else if (x >= xdmax) then
1836 ! Above the upper bound, the function is 1
1837 val = 1.0_rp
1838 else
1839 ! g(x) = 1/(x-1) + 1/x
1840 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1841
1842 ! The sigmoid: S(x) = 1 / (1 + exp(g))
1843 val = 1.0_rp / (1.0_rp + exp(g))
1844 end if
1845 end function math_stepf
1846
1848 function math_dstepf(x) result(val)
1849 real(kind=rp), intent(in) :: x
1850 real(kind=rp) :: val
1851 real(kind=rp), parameter :: xdmin = 0.0001_rp
1852 real(kind=rp), parameter :: xdmax = 0.9999_rp
1853 real(kind=rp) :: arg, g, dg, s_val
1854
1855 if (x <= xdmin .or. x >= xdmax) then
1856 val = 0.0_rp
1857 else
1858 ! The step function is S(x) = 1 / (1 + exp(g(x)))
1859 ! where g(x) = 1/(x-1) + 1/x
1860 ! S'(x) = -S(x) * (1 - S(x)) * g'(x)
1861
1862 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1863
1864 ! Derivative of g(x)
1865 dg = -(1.0_rp / ((x - 1.0_rp)**2)) - (1.0_rp / (x**2))
1866
1867 ! Recompute S(x) locally
1868 s_val = 1.0_rp / (1.0_rp + exp(g))
1869
1870 val = -s_val * (1.0_rp - s_val) * dg
1871 end if
1872 end function math_dstepf
1873
1874end module math
__inline__ __device__ void nonlinear_index(const int idx, const int lx, int *index)
Definition bc_utils.h:44
double real
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:52
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:53
Object for handling masks in Neko.
Definition mask.f90:34
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:502
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:517
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
Definition math.f90:261
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:1028
real(kind=rp) function, public vlsc2(u, v, n)
Compute multiplication sum .
Definition math.f90:882
pure logical function, public dabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:135
real(kind=rp), parameter, public pi
Definition math.f90:76
pure logical function qabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:150
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
subroutine, public ascol5(a, b, c, d, e, n)
Returns .
Definition math.f90:1212
subroutine, public addcol3s2(a, b, c, s, n)
Returns .
Definition math.f90:1195
subroutine, public masked_scatter_copy(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:468
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:798
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:579
subroutine, public face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
Gather values from a face-local SEM field to a reduced contiguous vector.
Definition math.f90:383
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1780
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:564
subroutine, public masked_copy(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:333
subroutine reorddp(b, ind, n)
reorder double precision array - inverse of swap
Definition math.f90:1525
subroutine, public addsqr2s2(a, b, c1, n)
Returns .
Definition math.f90:1012
subroutine, public cwrap(a, min_val, max_val, n)
Wrap value around a range [min, max)
Definition math.f90:610
real(kind=rp) function, public glsc4(a, b, c, d, n)
Definition math.f90:1306
subroutine, public cdiv2(a, b, c, n)
Division of constant c by elements of a .
Definition math.f90:548
real(kind=rp) function, public math_stepf(x)
Smooth step function S(x) Returns 0 for x <= 0, 1 for x >= 1, and smooth transition in between.
Definition math.f90:1826
subroutine swapdp(b, ind, n)
sort double precision array acording to ind vector
Definition math.f90:1471
subroutine flipvi4(b, ind, n)
Flip single integer vector b and ind.
Definition math.f90:1609
subroutine, public add2s1(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
Definition math.f90:979
subroutine, public cpwmin2(a, b, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1746
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1264
subroutine, public masked_scatter_copy_0(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:443
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:1075
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:275
subroutine flipvdp(b, ind, n)
Flip double precision vector b and ind.
Definition math.f90:1579
subroutine, public cpwmin3(a, b, c, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1761
subroutine, public pwmax3(a, b, c, n)
Point-wise maximum of two vectors .
Definition math.f90:1670
subroutine, public masked_gather_copy(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:418
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1247
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:913
subroutine swapi4(b, ind, n)
sort single integer array acording to ind vector
Definition math.f90:1498
integer function, public glimin(a, n)
Min of an integer vector of length n.
Definition math.f90:705
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:627
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:961
subroutine, public addcol4(a, b, c, d, n)
Returns .
Definition math.f90:1178
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:898
subroutine, public cfill(a, c, n)
Set all elements to a constant c .
Definition math.f90:595
subroutine, public absval(a, n)
Take the absolute value of an array.
Definition math.f90:1638
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:783
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1091
real(kind=xp) function, dimension(3, 3), public matinv3(a)
Performs a direct calculation of the inverse of a 3×3 matrix. M33INV and M44INV by David G....
Definition math.f90:1800
subroutine, public pwmax2(a, b, n)
Point-wise maximum of two vectors .
Definition math.f90:1655
subroutine, public pwmin2(a, b, n)
Point-wise minimum of two vectors .
Definition math.f90:1716
subroutine, public masked_gather_copy_0(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:358
subroutine, public subcol4(a, b, c, d, n)
Returns .
Definition math.f90:1145
subroutine sorti4(a, ind, n)
Heap Sort for single integer arrays.
Definition math.f90:1415
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:1162
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:769
subroutine, public cdiv(a, c, n)
Division of constant c by elements of a .
Definition math.f90:533
real(kind=rp), parameter, public neko_m_ln2
Definition math.f90:73
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:724
subroutine, public cpwmax3(a, b, c, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1700
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:1044
subroutine, public izero(a, n)
Zero an integer vector.
Definition math.f90:247
real(kind=rp) function, public glmax(a, n)
Max of a vector of length n.
Definition math.f90:648
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public add4s3(a, b, c, d, c1, c2, c3, n)
Returns .
Definition math.f90:1108
subroutine, public add4(a, b, c, d, n)
Vector addition .
Definition math.f90:929
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:1059
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:1126
real(kind=rp) function, public math_dstepf(x)
Derivative of math_stepf with respect to x: d(stepf)/dx.
Definition math.f90:1849
pure logical function drelcmp(x, y, eps)
Return double precision relative comparison .
Definition math.f90:179
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:70
subroutine, public vdot3(dot, u1, u2, u3, v1, v2, v3, n)
Compute a dot product (3-d version) assuming vector components etc.
Definition math.f90:850
pure logical function, public sabscmp(x, y, tol)
Return single precision absolute comparison .
Definition math.f90:120
pure logical function qrelcmp(x, y, eps)
Return quad precision relative comparison .
Definition math.f90:194
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
real(kind=rp) function, public glsubnorm(a, b, n)
Returns the norm of the difference of two vectors .
Definition math.f90:1331
subroutine, public vdot2(dot, u1, u2, v1, v2, n)
Compute a dot product (2-d version) assuming vector components etc.
Definition math.f90:833
subroutine, public cpwmax2(a, b, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1685
real(kind=rp) function, public vlmin(vec, n)
minimun value of a vector of length n
Definition math.f90:753
subroutine, public cfill_mask(a, c, n, mask, n_mask)
Fill a constant to a masked vector. .
Definition math.f90:486
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
Definition math.f90:738
integer function, public glimax(a, n)
Max of an integer vector of length n.
Definition math.f90:667
subroutine sortrp(a, ind, n)
Heap Sort for double precision arrays.
Definition math.f90:1357
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:946
subroutine, public pwmin3(a, b, c, n)
Point-wise minimum of two vectors .
Definition math.f90:1731
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:996
real(kind=rp) function, public glmin(a, n)
Min of a vector of length n.
Definition math.f90:686
subroutine, public masked_copy_0(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:310
subroutine, public vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
Compute a cross product assuming vector components etc.
Definition math.f90:814
pure logical function srelcmp(x, y, eps)
Return single precision relative comparison .
Definition math.f90:165
pure real(kind=rp) function, public lambert_w0(x, niter)
Approximate the principal real branch of the Lambert W function for non-negative real x.
Definition math.f90:212
real(kind=rp) function, public vlsc3(u, v, w, n)
Compute multiplication sum .
Definition math.f90:866
subroutine reordi4(b, ind, n)
reorder single integer array - inverse of swap
Definition math.f90:1552
subroutine, public p_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1230
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public i4
Definition num_types.f90:6
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35
#define max(a, b)
Definition tensor.cu:40