Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
60module math
61 use num_types, only : rp, dp, sp, qp, i4, xp
63 use mpi_f08, only : mpi_min, mpi_max, mpi_sum, mpi_in_place, mpi_integer, &
64 mpi_allreduce
65 use utils, only : nonlinear_index
66 implicit none
67 private
68
70 real(kind=rp), public, parameter :: neko_eps = epsilon(1.0_rp)
71
73 real(kind=rp), public, parameter :: neko_m_ln2 = log(2.0_rp)
74
76 real(kind=rp), public, parameter :: pi = 4._rp*atan(1._rp)
77
78 interface abscmp
79 module procedure sabscmp, dabscmp, qabscmp
80 end interface abscmp
81
82 interface sort
83 module procedure sortrp, sorti4
84 end interface sort
85
86 interface swap
87 module procedure swapdp, swapi4
88 end interface swap
89
90 interface reord
91 module procedure reorddp, reordi4
92 end interface reord
93
94 interface flipv
95 module procedure flipvdp, flipvi4
96 end interface flipv
97
98 interface relcmp
99 module procedure srelcmp, drelcmp, qrelcmp
100 end interface relcmp
101
102 public :: abscmp, rzero, izero, row_zero, rone, copy, cmult, cadd, cfill, &
110 matinv39, &
115
116contains
117
119 pure function sabscmp(x, y, tol)
120 real(kind=sp), intent(in) :: x
121 real(kind=sp), intent(in) :: y
122 real(kind=sp), intent(in), optional :: tol
123 logical :: sabscmp
124
125 if (present(tol)) then
126 sabscmp = abs(x - y) .lt. tol
127 else
128 sabscmp = abs(x - y) .lt. neko_eps
129 end if
130
131 end function sabscmp
132
134 pure function dabscmp(x, y, tol)
135 real(kind=dp), intent(in) :: x
136 real(kind=dp), intent(in) :: y
137 real(kind=dp), intent(in), optional :: tol
138 logical :: dabscmp
139
140 if (present(tol)) then
141 dabscmp = abs(x - y) .lt. tol
142 else
143 dabscmp = abs(x - y) .lt. neko_eps
144 end if
145
146 end function dabscmp
147
149 pure function qabscmp(x, y, tol)
150 real(kind=qp), intent(in) :: x
151 real(kind=qp), intent(in) :: y
152 real(kind=qp), intent(in), optional :: tol
153 logical :: qabscmp
154
155 if (present(tol)) then
156 qabscmp = abs(x - y) .lt. tol
157 else
158 qabscmp = abs(x - y) .lt. neko_eps
159 end if
160
161 end function qabscmp
162
164 pure function srelcmp(x, y, eps)
165 real(kind=sp), intent(in) :: x
166 real(kind=sp), intent(in) :: y
167 real(kind=sp), intent(in), optional :: eps
168 logical :: srelcmp
169 if (present(eps)) then
170 srelcmp = abs(x - y) .le. eps*abs(y)
171 else
172 srelcmp = abs(x - y) .le. neko_eps*abs(y)
173 end if
174
175 end function srelcmp
176
178 pure function drelcmp(x, y, eps)
179 real(kind=dp), intent(in) :: x
180 real(kind=dp), intent(in) :: y
181 real(kind=dp), intent(in), optional :: eps
182 logical :: drelcmp
183 if (present(eps)) then
184 drelcmp = abs(x - y) .le. eps*abs(y)
185 else
186 drelcmp = abs(x - y) .le. neko_eps*abs(y)
187 end if
188
189 end function drelcmp
190
191
193 pure function qrelcmp(x, y, eps)
194 real(kind=qp), intent(in) :: x
195 real(kind=qp), intent(in) :: y
196 real(kind=qp), intent(in), optional :: eps
197 logical :: qrelcmp
198 if (present(eps)) then
199 qrelcmp = abs(x - y)/abs(y) .lt. eps
200 else
201 qrelcmp = abs(x - y)/abs(y) .lt. neko_eps
202 end if
203
204 end function qrelcmp
205
211 pure function lambert_w0(x, niter) result(w)
212 real(kind=rp), intent(in) :: x
213 integer, intent(in) :: niter
214 real(kind=rp) :: w
215 real(kind=rp) :: a
216 integer :: k
217
218 if (x == 0.0_rp) then
219 w = 0.0_rp
220 return
221 end if
222
223 a = 1.0_rp / (1.0_rp + 0.5_rp * log(1.0_rp + x))
224 w = log(1.0_rp + a * x)
225
226 do k = 1, max(niter, 0)
227 w = w / (1.0_rp + w) * (1.0_rp + log(x / w))
228 end do
229 end function lambert_w0
230
232 subroutine rzero(a, n)
233 integer, intent(in) :: n
234 real(kind=rp), dimension(n), intent(inout) :: a
235 integer :: i
236
237 do concurrent(i = 1:n)
238 a(i) = 0.0_rp
239 end do
240 end subroutine rzero
241
243 subroutine izero(a, n)
244 integer, intent(in) :: n
245 integer, dimension(n), intent(inout) :: a
246 integer :: i
247
248 do concurrent(i = 1:n)
249 a(i) = 0
250 end do
251 end subroutine izero
252
254 subroutine row_zero(a, m, n, e)
255 integer, intent(in) :: m, n, e
256 real(kind=rp), intent(inout) :: a(m,n)
257 integer :: j
258
259 do concurrent(j = 1:n)
260 a(e,j) = 0.0_rp
261 end do
262 end subroutine row_zero
263
265 subroutine rone(a, n)
266 integer, intent(in) :: n
267 real(kind=rp), dimension(n), intent(inout) :: a
268 integer :: i
269
270 do concurrent(i = 1:n)
271 a(i) = 1.0_rp
272 end do
273 end subroutine rone
274
276 subroutine copy(a, b, n)
277 integer, intent(in) :: n
278 real(kind=rp), dimension(n), intent(in) :: b
279 real(kind=rp), dimension(n), intent(inout) :: a
280 integer :: i
281
282 do concurrent(i = 1:n)
283 a(i) = b(i)
284 end do
285
286 end subroutine copy
287
295 subroutine masked_copy_0(a, b, mask, n, n_mask)
296 integer, intent(in) :: n, n_mask
297 real(kind=rp), dimension(n), intent(in) :: b
298 real(kind=rp), dimension(n), intent(inout) :: a
299 integer, dimension(0:n_mask) :: mask
300 integer :: i, j
301
302 do concurrent(i = 1:n_mask)
303 j = mask(i)
304 a(j) = b(j)
305 end do
306
307 end subroutine masked_copy_0
308
316 subroutine masked_copy(a, b, mask, n, n_mask)
317 integer, intent(in) :: n, n_mask
318 real(kind=rp), dimension(n), intent(in) :: b
319 real(kind=rp), dimension(n), intent(inout) :: a
320 integer, dimension(n_mask) :: mask
321 integer :: i, j
322
323 do concurrent(i = 1:n_mask)
324 j = mask(i)
325 a(j) = b(j)
326 end do
327
328 end subroutine masked_copy
329
339 subroutine masked_gather_copy_0(a, b, mask, n, n_mask)
340 integer, intent(in) :: n, n_mask
341 real(kind=rp), dimension(n), intent(in) :: b
342 real(kind=rp), dimension(n_mask), intent(inout) :: a
343 integer, dimension(0:n_mask) :: mask
344 integer :: i, j
345
346 do concurrent(i = 1:n_mask)
347 j = mask(i)
348 a(i) = b(j)
349 end do
350
351 end subroutine masked_gather_copy_0
352
362 subroutine face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
363 integer, intent(in) :: lx, ly, lz, n_mask
364 real(kind=rp), dimension(n_mask), intent(inout) :: a
365 real(kind=rp), dimension(:, :, :, :), intent(in) :: b
366 integer, dimension(0:n_mask), intent(in) :: mask
367 integer, dimension(0:n_mask), intent(in) :: facet
368 integer :: l
369 integer :: idx(4)
370
371 do l = 1, n_mask
372 idx = nonlinear_index(mask(l), lx, ly, lz)
373
374 select case (facet(l))
375 case (1, 2)
376 a(l) = b(idx(2), idx(3), facet(l), idx(4))
377 case (3, 4)
378 a(l) = b(idx(1), idx(3), facet(l), idx(4))
379 case (5, 6)
380 a(l) = b(idx(1), idx(2), facet(l), idx(4))
381 end select
382 end do
383
384 end subroutine face_masked_gather_copy_0
385
395 subroutine masked_gather_copy(a, b, mask, n, n_mask)
396 integer, intent(in) :: n, n_mask
397 real(kind=rp), dimension(n), intent(in) :: b
398 real(kind=rp), dimension(n_mask), intent(inout) :: a
399 integer, dimension(n_mask) :: mask
400 integer :: i, j
401
402 do concurrent(i = 1:n_mask)
403 j = mask(i)
404 a(i) = b(j)
405 end do
406
407 end subroutine masked_gather_copy
408
418 subroutine masked_scatter_copy_0(a, b, mask, n, n_mask)
419 integer, intent(in) :: n, n_mask
420 real(kind=rp), dimension(n_mask), intent(in) :: b
421 real(kind=rp), dimension(n), intent(inout) :: a
422 integer, dimension(0:n_mask) :: mask
423 integer :: i, j
424
425 do concurrent(i = 1:n_mask)
426 j = mask(i)
427 a(j) = b(i)
428 end do
429
430 end subroutine masked_scatter_copy_0
431
441 subroutine masked_scatter_copy(a, b, mask, n, n_mask)
442 integer, intent(in) :: n, n_mask
443 real(kind=rp), dimension(n_mask), intent(in) :: b
444 real(kind=rp), dimension(n), intent(inout) :: a
445 integer, dimension(n_mask) :: mask
446 integer :: i, j
447
448 do concurrent(i = 1:n_mask)
449 j = mask(i)
450 a(j) = b(i)
451 end do
452
453 end subroutine masked_scatter_copy
454
457 subroutine cfill_mask(a, c, n, mask, n_mask)
458 integer, intent(in) :: n, n_mask
459 real(kind=rp), dimension(n), intent(inout) :: a
460 real(kind=rp), intent(in) :: c
461 integer, dimension(n_mask), intent(in) :: mask
462 integer :: i
463
464 do concurrent(i = 1:n_mask)
465 a(mask(i)) = c
466 end do
467
468 end subroutine cfill_mask
469
471 subroutine cmult(a, c, n)
472 integer, intent(in) :: n
473 real(kind=rp), dimension(n), intent(inout) :: a
474 real(kind=rp), intent(in) :: c
475 integer :: i
476
477 do concurrent(i = 1:n)
478 a(i) = c * a(i)
479 end do
480 end subroutine cmult
481
483 subroutine cmult2(a, b, c, n)
484 integer, intent(in) :: n
485 real(kind=rp), dimension(n), intent(inout) :: a
486 real(kind=rp), dimension(n), intent(in) :: b
487 real(kind=rp), intent(in) :: c
488 integer :: i
489
490 do concurrent(i = 1:n)
491 a(i) = c * b(i)
492 end do
493
494 end subroutine cmult2
495
497 subroutine cdiv(a, c, n)
498 integer, intent(in) :: n
499 real(kind=rp), dimension(n), intent(inout) :: a
500 real(kind=rp), intent(in) :: c
501 integer :: i
502
503 do concurrent(i = 1:n)
504 a(i) = c / a(i)
505 end do
506 end subroutine cdiv
507
509 subroutine cdiv2(a, b, c, n)
510 integer, intent(in) :: n
511 real(kind=rp), dimension(n), intent(inout) :: a
512 real(kind=rp), dimension(n), intent(in) :: b
513 real(kind=rp), intent(in) :: c
514 integer :: i
515
516 do concurrent(i = 1:n)
517 a(i) = c / b(i)
518 end do
519 end subroutine cdiv2
520
522 subroutine cadd(a, s, n)
523 integer, intent(in) :: n
524 real(kind=rp), dimension(n), intent(inout) :: a
525 real(kind=rp), intent(in) :: s
526 integer :: i
527
528 do concurrent(i = 1:n)
529 a(i) = a(i) + s
530 end do
531 end subroutine cadd
532
534 subroutine cadd2(a, b, s, n)
535 integer, intent(in) :: n
536 real(kind=rp), dimension(n), intent(inout) :: a
537 real(kind=rp), dimension(n), intent(in) :: b
538 real(kind=rp), intent(in) :: s
539 integer :: i
540
541 do concurrent(i = 1:n)
542 a(i) = b(i) + s
543 end do
544 end subroutine cadd2
545
547 subroutine cfill(a, c, n)
548 integer, intent(in) :: n
549 real(kind=rp), dimension(n), intent(inout) :: a
550 real(kind=rp), intent(in) :: c
551 integer :: i
552
553 do concurrent(i = 1:n)
554 a(i) = c
555 end do
556 end subroutine cfill
557
559 subroutine cwrap(a, min_val, max_val, n)
560 integer, intent(in) :: n
561 real(kind=rp), dimension(n), intent(inout) :: a
562 real(kind=rp), intent(in) :: min_val, max_val
563 integer :: i
564
565 if (n .lt. 1 .or. max_val .le. min_val) return
566
567 do concurrent(i = 1:n)
568 a(i) = modulo(a(i) - min_val, max_val - min_val) + min_val
569 end do
570 end subroutine cwrap
571
573 function glsum(a, n)
574 integer, intent(in) :: n
575 real(kind=rp), dimension(n) :: a
576 real(kind=rp) :: glsum
577 real(kind=xp) :: tmp
578 integer :: i, ierr
579 tmp = 0.0_rp
580 do i = 1, n
581 tmp = tmp + a(i)
582 end do
583 call mpi_allreduce(mpi_in_place, tmp, 1, &
584 mpi_extra_precision, mpi_sum, neko_comm, ierr)
585 glsum = tmp
586
587 end function glsum
588
590 function glmax(a, n)
591 integer, intent(in) :: n
592 real(kind=rp), dimension(n) :: a
593 real(kind=rp) :: tmp, glmax
594 integer :: i, ierr
595
596 tmp = -huge(0.0_rp)
597 do i = 1, n
598 tmp = max(tmp,a(i))
599 end do
600 call mpi_allreduce(tmp, glmax, 1, &
601 mpi_real_precision, mpi_max, neko_comm, ierr)
602 end function glmax
603
605 function glimax(a, n)
606 integer, intent(in) :: n
607 integer, dimension(n) :: a
608 integer :: tmp, glimax
609 integer :: i, ierr
610
611 tmp = -huge(0)
612 do i = 1, n
613 tmp = max(tmp,a(i))
614 end do
615 call mpi_allreduce(tmp, glimax, 1, &
616 mpi_integer, mpi_max, neko_comm, ierr)
617 end function glimax
618
620 function glmin(a, n)
621 integer, intent(in) :: n
622 real(kind=rp), dimension(n) :: a
623 real(kind=rp) :: tmp, glmin
624 integer :: i, ierr
625
626 tmp = huge(0.0_rp)
627 do i = 1, n
628 tmp = min(tmp,a(i))
629 end do
630 call mpi_allreduce(tmp, glmin, 1, &
631 mpi_real_precision, mpi_min, neko_comm, ierr)
632 end function glmin
633
635 function glimin(a, n)
636 integer, intent(in) :: n
637 integer, dimension(n) :: a
638 integer :: tmp, glimin
639 integer :: i, ierr
640
641 tmp = huge(0)
642 do i = 1, n
643 tmp = min(tmp,a(i))
644 end do
645 call mpi_allreduce(tmp, glimin, 1, &
646 mpi_integer, mpi_min, neko_comm, ierr)
647 end function glimin
648
649
650
651
653 subroutine chsign(a, n)
654 integer, intent(in) :: n
655 real(kind=rp), dimension(n), intent(inout) :: a
656 integer :: i
657
658 do concurrent(i = 1:n)
659 a(i) = -a(i)
660 end do
661
662 end subroutine chsign
663
665 function vlmax(vec,n) result(tmax)
666 integer :: n, i
667 real(kind=rp), intent(in) :: vec(n)
668 real(kind=rp) :: tmax
669 tmax = real(-99d20, rp)
670 do i = 1, n
671 tmax = max(tmax, vec(i))
672 end do
673 end function vlmax
674
676 function vlmin(vec,n) result(tmin)
677 integer, intent(in) :: n
678 real(kind=rp), intent(in) :: vec(n)
679 real(kind=rp) :: tmin
680 integer :: i
681 tmin = real(99.0e20, rp)
682 do i = 1, n
683 tmin = min(tmin, vec(i))
684 end do
685 end function vlmin
686
688 subroutine invcol1(a, n)
689 integer, intent(in) :: n
690 real(kind=rp), dimension(n), intent(inout) :: a
691 integer :: i
692
693 do concurrent(i = 1:n)
694 a(i) = 1.0_xp / real(a(i), xp)
695 end do
696
697 end subroutine invcol1
698
700 subroutine invcol3(a, b, c, n)
701 integer, intent(in) :: n
702 real(kind=rp), dimension(n), intent(inout) :: a
703 real(kind=rp), dimension(n), intent(in) :: b, c
704 integer :: i
705
706 do concurrent(i = 1:n)
707 a(i) = real(b(i), xp) / c(i)
708 end do
709
710 end subroutine invcol3
711
713 subroutine invers2(a, b, n)
714 integer, intent(in) :: n
715 real(kind=rp), dimension(n), intent(inout) :: a
716 real(kind=rp), dimension(n), intent(in) :: b
717 integer :: i
718
719 do concurrent(i = 1:n)
720 a(i) = 1.0_xp / real(b(i), xp)
721 end do
722
723 end subroutine invers2
724
727 subroutine vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
728 integer, intent(in) :: n
729 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
730 real(kind=rp), dimension(n), intent(in) :: w1, w2, w3
731 real(kind=rp), dimension(n), intent(out) :: u1, u2, u3
732 integer :: i
733
734 do concurrent(i = 1:n)
735 u1(i) = v2(i)*w3(i) - v3(i)*w2(i)
736 u2(i) = v3(i)*w1(i) - v1(i)*w3(i)
737 u3(i) = v1(i)*w2(i) - v2(i)*w1(i)
738 end do
739
740 end subroutine vcross
741
744 subroutine vdot2(dot, u1, u2, v1, v2, n)
745 integer, intent(in) :: n
746 real(kind=rp), dimension(n), intent(in) :: u1, u2
747 real(kind=rp), dimension(n), intent(in) :: v1, v2
748 real(kind=rp), dimension(n), intent(out) :: dot
749 integer :: i
750 do concurrent(i = 1:n)
751 dot(i) = u1(i)*v1(i) + u2(i)*v2(i)
752 end do
753
754 end subroutine vdot2
755
758 subroutine vdot3(dot, u1, u2, u3, v1, v2, v3, n)
759 integer, intent(in) :: n
760 real(kind=rp), dimension(n), intent(in) :: u1, u2, u3
761 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
762 real(kind=rp), dimension(n), intent(out) :: dot
763 integer :: i
764
765 do concurrent(i = 1:n)
766 dot(i) = u1(i)*v1(i) + u2(i)*v2(i) + u3(i)*v3(i)
767 end do
768
769 end subroutine vdot3
770
772 function vlsc3(u, v, w, n) result(s)
773 integer, intent(in) :: n
774 real(kind=rp), dimension(n), intent(in) :: u, v, w
775 real(kind=rp) :: s
776 integer :: i
777
778 s = 0.0_rp
779 do i = 1, n
780 s = s + u(i)*v(i)*w(i)
781 end do
782
783 end function vlsc3
784
786 function vlsc2(u, v, n) result(s)
787 integer, intent(in) :: n
788 real(kind=rp), dimension(n), intent(in) :: u, v
789 real(kind=rp) :: s
790 integer :: i
791
792 s = 0.0_rp
793 do i = 1, n
794 s = s + u(i)*v(i)
795 end do
796
797 end function vlsc2
798
800 subroutine add2(a, b, n)
801 integer, intent(in) :: n
802 real(kind=rp), dimension(n), intent(inout) :: a
803 real(kind=rp), dimension(n), intent(in) :: b
804 integer :: i
805
806 do concurrent(i = 1:n)
807 a(i) = a(i) + b(i)
808 end do
809
810 end subroutine add2
811
813 subroutine add3(a, b, c, n)
814 integer, intent(in) :: n
815 real(kind=rp), dimension(n), intent(inout) :: a
816 real(kind=rp), dimension(n), intent(in) :: b
817 real(kind=rp), dimension(n), intent(in) :: c
818 integer :: i
819
820 do concurrent(i = 1:n)
821 a(i) = b(i) + c(i)
822 end do
823
824 end subroutine add3
825
827 subroutine add4(a, b, c, d, n)
828 integer, intent(in) :: n
829 real(kind=rp), dimension(n), intent(out) :: a
830 real(kind=rp), dimension(n), intent(in) :: d
831 real(kind=rp), dimension(n), intent(in) :: c
832 real(kind=rp), dimension(n), intent(in) :: b
833 integer :: i
834
835 do concurrent(i = 1:n)
836 a(i) = b(i) + c(i) + d(i)
837 end do
838
839 end subroutine add4
840
842 subroutine sub2(a, b, n)
843 integer, intent(in) :: n
844 real(kind=rp), dimension(n), intent(inout) :: a
845 real(kind=rp), dimension(n), intent(in) :: b
846 integer :: i
847
848 do concurrent(i = 1:n)
849 a(i) = a(i) - b(i)
850 end do
851
852 end subroutine sub2
853
855 subroutine sub3(a, b, c, n)
856 integer, intent(in) :: n
857 real(kind=rp), dimension(n), intent(inout) :: a
858 real(kind=rp), dimension(n), intent(in) :: b
859 real(kind=rp), dimension(n), intent(in) :: c
860 integer :: i
861
862 do concurrent(i = 1:n)
863 a(i) = b(i) - c(i)
864 end do
865
866 end subroutine sub3
867
868
871 subroutine add2s1(a, b, c1, n)
872 integer, intent(in) :: n
873 real(kind=rp), dimension(n), intent(inout) :: a
874 real(kind=rp), dimension(n), intent(in) :: b
875 real(kind=rp), intent(in) :: c1
876 integer :: i
877
878 do concurrent(i = 1:n)
879 a(i) = c1 * a(i) + b(i)
880 end do
881
882 end subroutine add2s1
883
886 subroutine add2s2(a, b, c1, n)
887 integer, intent(in) :: n
888 real(kind=rp), dimension(n), intent(inout) :: a
889 real(kind=rp), dimension(n), intent(in) :: b
890 real(kind=rp), intent(in) :: c1
891 integer :: i
892
893 do concurrent(i = 1:n)
894 a(i) = a(i) + c1 * b(i)
895 end do
896
897 end subroutine add2s2
898
900 subroutine addsqr2s2(a, b, c1, n)
901 integer, intent(in) :: n
902 real(kind=rp), dimension(n), intent(inout) :: a
903 real(kind=rp), dimension(n), intent(in) :: b
904 real(kind=rp), intent(in) :: c1
905 integer :: i
906
907 do concurrent(i = 1:n)
908 a(i) = a(i) + c1 * ( b(i) * b(i) )
909 end do
910
911 end subroutine addsqr2s2
912
914 subroutine invcol2(a, b, n)
915 integer, intent(in) :: n
916 real(kind=rp), dimension(n), intent(inout) :: a
917 real(kind=rp), dimension(n), intent(in) :: b
918 integer :: i
919
920 do concurrent(i = 1:n)
921 a(i) = real(a(i), xp) / b(i)
922 end do
923
924 end subroutine invcol2
925
926
928 subroutine col2(a, b, n)
929 integer, intent(in) :: n
930 real(kind=rp), dimension(n), intent(inout) :: a
931 real(kind=rp), dimension(n), intent(in) :: b
932 integer :: i
933
934 do concurrent(i = 1:n)
935 a(i) = a(i) * b(i)
936 end do
937
938 end subroutine col2
939
941 subroutine col3(a, b, c, n)
942 integer, intent(in) :: n
943 real(kind=rp), dimension(n), intent(inout) :: a
944 real(kind=rp), dimension(n), intent(in) :: b
945 real(kind=rp), dimension(n), intent(in) :: c
946 integer :: i
947
948 do concurrent(i = 1:n)
949 a(i) = b(i) * c(i)
950 end do
951
952 end subroutine col3
953
955 subroutine subcol3(a, b, c, n)
956 integer, intent(in) :: n
957 real(kind=rp), dimension(n), intent(inout) :: a
958 real(kind=rp), dimension(n), intent(in) :: b
959 real(kind=rp), dimension(n), intent(in) :: c
960 integer :: i
961
962 do concurrent(i = 1:n)
963 a(i) = a(i) - b(i) * c(i)
964 end do
965
966 end subroutine subcol3
967
969 subroutine add3s2(a, b, c, c1, c2 ,n)
970 integer, intent(in) :: n
971 real(kind=rp), dimension(n), intent(inout) :: a
972 real(kind=rp), dimension(n), intent(in) :: b
973 real(kind=rp), dimension(n), intent(in) :: c
974 real(kind=rp), intent(in) :: c1, c2
975 integer :: i
976
977 do concurrent(i = 1:n)
978 a(i) = c1 * b(i) + c2 * c(i)
979 end do
980
981 end subroutine add3s2
982
984 subroutine add4s3(a, b, c, d, c1, c2, c3, n)
985 integer, intent(in) :: n
986 real(kind=rp), dimension(n), intent(inout) :: a
987 real(kind=rp), dimension(n), intent(in) :: b
988 real(kind=rp), dimension(n), intent(in) :: c
989 real(kind=rp), dimension(n), intent(in) :: d
990 real(kind=rp), intent(in) :: c1, c2, c3
991 integer :: i
992
993 do concurrent(i = 1:n)
994 a(i) = c1 * b(i) + c2 * c(i) + c3 * d(i)
995 end do
996
997 end subroutine add4s3
998
1000 subroutine add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
1001 integer, intent(in) :: n
1002 real(kind=rp), dimension(n), intent(inout) :: a
1003 real(kind=rp), dimension(n), intent(in) :: b
1004 real(kind=rp), dimension(n), intent(in) :: c
1005 real(kind=rp), dimension(n), intent(in) :: d
1006 real(kind=rp), dimension(n), intent(in) :: e
1007 real(kind=rp), intent(in) :: c1, c2, c3, c4
1008 integer :: i
1009
1010 do concurrent(i = 1:n)
1011 a(i) = a(i) + c1 * b(i) + c2 * c(i) + c3 * d(i) + c4 * e(i)
1012 end do
1013
1014 end subroutine add5s4
1015
1017 subroutine subcol4(a, b, c, d, n)
1018 integer, intent(in) :: n
1019 real(kind=rp), dimension(n), intent(inout) :: a
1020 real(kind=rp), dimension(n), intent(in) :: b
1021 real(kind=rp), dimension(n), intent(in) :: c
1022 real(kind=rp), dimension(n), intent(in) :: d
1023 integer :: i
1024
1025 do concurrent(i = 1:n)
1026 a(i) = a(i) - b(i) * c(i) * d(i)
1027 end do
1028
1029 end subroutine subcol4
1030
1032 subroutine addcol3(a, b, c, n)
1033 integer, intent(in) :: n
1034 real(kind=rp), dimension(n), intent(inout) :: a
1035 real(kind=rp), dimension(n), intent(in) :: b
1036 real(kind=rp), dimension(n), intent(in) :: c
1037 integer :: i
1038
1039 do concurrent(i = 1:n)
1040 a(i) = a(i) + b(i) * c(i)
1041 end do
1042
1043 end subroutine addcol3
1044
1046 subroutine addcol4(a, b, c, d, n)
1047 integer, intent(in) :: n
1048 real(kind=rp), dimension(n), intent(inout) :: a
1049 real(kind=rp), dimension(n), intent(in) :: b
1050 real(kind=rp), dimension(n), intent(in) :: c
1051 real(kind=rp), dimension(n), intent(in) :: d
1052 integer :: i
1053
1054 do concurrent(i = 1:n)
1055 a(i) = a(i) + b(i) * c(i) * d(i)
1056 end do
1057
1058 end subroutine addcol4
1059
1061 subroutine addcol3s2(a, b, c, s, n)
1062 integer, intent(in) :: n
1063 real(kind=rp), dimension(n), intent(inout) :: a
1064 real(kind=rp), dimension(n), intent(in) :: b
1065 real(kind=rp), dimension(n), intent(in) :: c
1066 real(kind=rp), intent(in) :: s
1067 integer :: i
1068
1069 do concurrent(i = 1:n)
1070 a(i) = a(i) + s * b(i) * c(i)
1071 end do
1072
1073 end subroutine addcol3s2
1074
1076 subroutine ascol5(a, b, c, d, e, n)
1077 integer, intent(in) :: n
1078 real(kind=rp), dimension(n), intent(inout) :: a
1079 real(kind=rp), dimension(n), intent(in) :: b
1080 real(kind=rp), dimension(n), intent(in) :: c
1081 real(kind=rp), dimension(n), intent(in) :: d
1082 real(kind=rp), dimension(n), intent(in) :: e
1083 integer :: i
1084
1085 do concurrent(i = 1:n)
1086 a(i) = b(i)*c(i) - d(i)*e(i)
1087 end do
1088
1089 end subroutine ascol5
1090
1092 subroutine p_update(a, b, c, c1, c2, n)
1093 integer, intent(in) :: n
1094 real(kind=rp), dimension(n), intent(inout) :: a
1095 real(kind=rp), dimension(n), intent(in) :: b
1096 real(kind=rp), dimension(n), intent(in) :: c
1097 real(kind=rp), intent(in) :: c1, c2
1098 integer :: i
1099
1100 do concurrent(i = 1:n)
1101 a(i) = b(i) + c1*(a(i)-c2*c(i))
1102 end do
1103
1104 end subroutine p_update
1105
1107 subroutine x_update(a, b, c, c1, c2, n)
1108 integer, intent(in) :: n
1109 real(kind=rp), dimension(n), intent(inout) :: a
1110 real(kind=rp), dimension(n), intent(in) :: b
1111 real(kind=rp), dimension(n), intent(in) :: c
1112 real(kind=rp), intent(in) :: c1, c2
1113 integer :: i
1114
1115 do concurrent(i = 1:n)
1116 a(i) = a(i) + c1*b(i)+c2*c(i)
1117 end do
1118
1119 end subroutine x_update
1120
1122 function glsc2(a, b, n)
1123 integer, intent(in) :: n
1124 real(kind=rp), dimension(n), intent(in) :: a
1125 real(kind=rp), dimension(n), intent(in) :: b
1126 real(kind=rp) :: glsc2
1127 real(kind=xp) :: tmp
1128 integer :: i, ierr
1129
1130 tmp = 0.0_xp
1131 do i = 1, n
1132 tmp = tmp + a(i) * b(i)
1133 end do
1134
1135 call mpi_allreduce(mpi_in_place, tmp, 1, &
1136 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1137 glsc2 = tmp
1138 end function glsc2
1139
1141 function glsc3(a, b, c, n)
1142 integer, intent(in) :: n
1143 real(kind=rp), dimension(n), intent(in) :: a
1144 real(kind=rp), dimension(n), intent(in) :: b
1145 real(kind=rp), dimension(n), intent(in) :: c
1146 real(kind=rp) :: glsc3
1147 real(kind=xp) :: tmp
1148 integer :: i, ierr
1149
1150 tmp = 0.0_xp
1151 do i = 1, n
1152 tmp = tmp + a(i) * b(i) * c(i)
1153 end do
1154
1155 call mpi_allreduce(mpi_in_place, tmp, 1, &
1156 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1157 glsc3 = tmp
1158
1159 end function glsc3
1160 function glsc4(a, b, c, d, n)
1161 integer, intent(in) :: n
1162 real(kind=rp), dimension(n), intent(in) :: a
1163 real(kind=rp), dimension(n), intent(in) :: b
1164 real(kind=rp), dimension(n), intent(in) :: c
1165 real(kind=rp), dimension(n), intent(in) :: d
1166 real(kind=rp) :: glsc4
1167 real(kind=xp) :: tmp
1168 integer :: i, ierr
1169
1170 tmp = 0.0_xp
1171 do i = 1, n
1172 tmp = tmp + a(i) * b(i) * c(i) * d(i)
1173 end do
1174
1175 call mpi_allreduce(mpi_in_place, tmp, 1, &
1176 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1177 glsc4 = tmp
1178
1179 end function glsc4
1180
1183 function glsubnorm(a, b, n)
1184 integer, intent(in) :: n
1185 real(kind=rp), dimension(n), intent(in) :: a
1186 real(kind=rp), dimension(n), intent(in) :: b
1187 real(kind=rp) :: glsubnorm
1188 real(kind=xp) :: tmp
1189 integer :: i, ierr
1190
1191 tmp = 0.0_xp
1192 do i = 1, n
1193 tmp = tmp + (a(i) - b(i))**2
1194 end do
1195
1196 call mpi_allreduce(mpi_in_place, tmp, 1, &
1197 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1198 glsubnorm = sqrt(tmp)
1199
1200 end function glsubnorm
1201
1207 subroutine sortrp(a, ind, n)
1208 integer, intent(in) :: n
1209 real(kind=rp), intent(inout) :: a(n)
1210 integer, intent(out) :: ind(n)
1211 real(kind=rp) :: aa
1212 integer :: j, ir, i, ii, l
1213
1214 do j = 1, n
1215 ind(j) = j
1216 end do
1217
1218 if (n .le. 1) return
1219
1220
1221 l = n/2+1
1222 ir = n
1223 do while (.true.)
1224 if (l .gt. 1) then
1225 l = l-1
1226 aa = a(l)
1227 ii = ind(l)
1228 else
1229 aa = a(ir)
1230 ii = ind(ir)
1231 a(ir) = a(1)
1232 ind(ir) = ind(1)
1233 ir = ir - 1
1234 if (ir .eq. 1) then
1235 a(1) = aa
1236 ind(1) = ii
1237 return
1238 end if
1239 end if
1240 i = l
1241 j = l+l
1242 do while (j .le. ir)
1243 if (j .lt. ir) then
1244 if ( a(j) .lt. a(j+1) ) j = j + 1
1245 end if
1246 if (aa .lt. a(j)) then
1247 a(i) = a(j)
1248 ind(i) = ind(j)
1249 i = j
1250 j = j+j
1251 else
1252 j = ir+1
1253 end if
1254 end do
1255 a(i) = aa
1256 ind(i) = ii
1257 end do
1258 end subroutine sortrp
1259
1265 subroutine sorti4(a, ind, n)
1266 integer, intent(in) :: n
1267 integer(i4), intent(inout) :: a(n)
1268 integer, intent(out) :: ind(n)
1269 integer(i4) :: aa
1270 integer :: j, ir, i, ii, l
1271
1272 do j = 1, n
1273 ind(j) = j
1274 end do
1275
1276 if (n .le. 1) return
1277
1278 l = n/2+1
1279 ir = n
1280 do while (.true.)
1281 if (l .gt. 1) then
1282 l = l - 1
1283 aa = a(l)
1284 ii = ind(l)
1285 else
1286 aa = a(ir)
1287 ii = ind(ir)
1288 a(ir) = a( 1)
1289 ind(ir) = ind( 1)
1290 ir = ir - 1
1291 if (ir .eq. 1) then
1292 a(1) = aa
1293 ind(1) = ii
1294 return
1295 end if
1296 end if
1297 i = l
1298 j = l + l
1299 do while (j .le. ir)
1300 if (j .lt. ir) then
1301 if ( a(j) .lt. a(j + 1) ) j = j + 1
1302 end if
1303 if (aa .lt. a(j)) then
1304 a(i) = a(j)
1305 ind(i) = ind(j)
1306 i = j
1307 j = j + j
1308 else
1309 j = ir + 1
1310 end if
1311 end do
1312 a(i) = aa
1313 ind(i) = ii
1314 end do
1315 end subroutine sorti4
1316
1321 subroutine swapdp(b, ind, n)
1322 integer, intent(in) :: n
1323 real(kind=rp), intent(inout) :: b(n)
1324 integer, intent(in) :: ind(n)
1325 real(kind=rp) :: temp(n)
1326 integer :: i, jj
1327
1328 do i = 1, n
1329 temp(i) = b(i)
1330 end do
1331 do i = 1, n
1332 jj = ind(i)
1333 b(i) = temp(jj)
1334 end do
1335 end subroutine swapdp
1336
1341 subroutine swapi4(b, ind, n)
1342 integer, intent(in) :: n
1343 integer(i4), intent(inout) :: b(n)
1344 integer, intent(in) :: ind(n)
1345 integer(i4) :: temp(n)
1346 integer :: i, jj
1347
1348 do i = 1, n
1349 temp(i) = b(i)
1350 end do
1351 do i = 1, n
1352 jj = ind(i)
1353 b(i) = temp(jj)
1354 end do
1355 end subroutine swapi4
1356
1361 subroutine reorddp(b, ind, n)
1362 integer, intent(in) :: n
1363 real(kind=rp), intent(inout) :: b(n)
1364 integer, intent(in) :: ind(n)
1365 real(kind=rp) :: temp(n)
1366 integer :: i, jj
1367
1368 do i = 1, n
1369 temp(i) = b(i)
1370 end do
1371 do i = 1, n
1372 jj = ind(i)
1373 b(jj) = temp(i)
1374 end do
1375 end subroutine reorddp
1376
1381 subroutine reordi4(b, ind, n)
1382 integer, intent(in) :: n
1383 integer(i4), intent(inout) :: b(n)
1384 integer, intent(in) :: ind(n)
1385 integer(i4) :: temp(n)
1386 integer :: i, jj
1387
1388 do i = 1, n
1389 temp(i) = b(i)
1390 end do
1391 do i = 1, n
1392 jj = ind(i)
1393 b(jj) = temp(i)
1394 end do
1395 end subroutine reordi4
1396
1401 subroutine flipvdp(b, ind, n)
1402 integer, intent(in) :: n
1403 real(kind=rp), intent(inout) :: b(n)
1404 integer, intent(inout) :: ind(n)
1405 real(kind=rp) :: temp(n)
1406 integer :: tempind(n)
1407 integer :: i, jj
1408
1409 do i = 1, n
1410 jj = n+1-i
1411 temp(jj) = b(i)
1412 tempind(jj) = ind(i)
1413 end do
1414 do i = 1,n
1415 b(i) = temp(i)
1416 ind(i) = tempind(i)
1417 end do
1418 end subroutine flipvdp
1419
1424 subroutine flipvi4(b, ind, n)
1425 integer, intent(in) :: n
1426 integer(i4), intent(inout) :: b(n)
1427 integer, intent(inout) :: ind(n)
1428 integer(i4) :: temp(n)
1429 integer :: tempind(n)
1430 integer :: i, jj
1431
1432 do i = 1, n
1433 jj = n+1-i
1434 temp(jj) = b(i)
1435 tempind(jj) = ind(i)
1436 end do
1437 do i = 1,n
1438 b(i) = temp(i)
1439 ind(i) = tempind(i)
1440 end do
1441 end subroutine flipvi4
1442
1446 subroutine absval(a, n)
1447 integer, intent(in) :: n
1448 real(kind=rp), dimension(n), intent(inout) :: a
1449 integer :: i
1450 do i = 1, n
1451 a(i) = abs(a(i))
1452 end do
1453 end subroutine absval
1454
1455 ! ========================================================================== !
1456 ! Point-wise operations
1457
1459 subroutine pwmax2(a, b, n)
1460 integer, intent(in) :: n
1461 real(kind=rp), dimension(n), intent(inout) :: a
1462 real(kind=rp), dimension(n), intent(in) :: b
1463 integer :: i
1464
1465 do i = 1, n
1466 a(i) = max(a(i), b(i))
1467 end do
1468 end subroutine pwmax2
1469
1471 subroutine pwmax3(a, b, c, n)
1472 integer, intent(in) :: n
1473 real(kind=rp), dimension(n), intent(inout) :: a
1474 real(kind=rp), dimension(n), intent(in) :: b, c
1475 integer :: i
1476
1477 do i = 1, n
1478 a(i) = max(b(i), c(i))
1479 end do
1480 end subroutine pwmax3
1481
1483 subroutine cpwmax2(a, b, n)
1484 integer, intent(in) :: n
1485 real(kind=rp), dimension(n), intent(inout) :: a
1486 real(kind=rp), intent(in) :: b
1487 integer :: i
1488
1489 do i = 1, n
1490 a(i) = max(a(i), b)
1491 end do
1492 end subroutine cpwmax2
1493
1495 subroutine cpwmax3(a, b, c, n)
1496 integer, intent(in) :: n
1497 real(kind=rp), dimension(n), intent(inout) :: a
1498 real(kind=rp), dimension(n), intent(in) :: b
1499 real(kind=rp), intent(in) :: c
1500 integer :: i
1501
1502 do i = 1, n
1503 a(i) = max(b(i), c)
1504 end do
1505 end subroutine cpwmax3
1506
1508 subroutine pwmin2(a, b, n)
1509 integer, intent(in) :: n
1510 real(kind=rp), dimension(n), intent(inout) :: a
1511 real(kind=rp), dimension(n), intent(in) :: b
1512 integer :: i
1513
1514 do i = 1, n
1515 a(i) = min(a(i), b(i))
1516 end do
1517 end subroutine pwmin2
1518
1520 subroutine pwmin3(a, b, c, n)
1521 integer, intent(in) :: n
1522 real(kind=rp), dimension(n), intent(inout) :: a
1523 real(kind=rp), dimension(n), intent(in) :: b, c
1524 integer :: i
1525
1526 do i = 1, n
1527 a(i) = min(b(i), c(i))
1528 end do
1529 end subroutine pwmin3
1530
1532 subroutine cpwmin2(a, b, n)
1533 integer, intent(in) :: n
1534 real(kind=rp), dimension(n), intent(inout) :: a
1535 real(kind=rp), intent(in) :: b
1536 integer :: i
1537
1538 do i = 1, n
1539 a(i) = min(a(i), b)
1540 end do
1541 end subroutine cpwmin2
1542
1544 subroutine cpwmin3(a, b, c, n)
1545 integer, intent(in) :: n
1546 real(kind=rp), dimension(n), intent(inout) :: a
1547 real(kind=rp), dimension(n), intent(in) :: b
1548 real(kind=rp), intent(in) :: c
1549 integer :: i
1550
1551 do i = 1, n
1552 a(i) = min(b(i), c)
1553 end do
1554 end subroutine cpwmin3
1555
1556 ! M33INV and M44INV by David G. Simpson pure function version from
1557 ! https://fortranwiki.org/fortran/show/Matrix+inversion
1558 ! Invert 3x3 matrix
1559 function matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33) &
1560 result(b)
1561 real(kind=rp), intent(in) :: a11, a12, a13, a21, a22, a23, a31, a32, a33
1562 real(xp) :: a(3,3) !! Matrix
1563 real(rp) :: b(3,3) !! Inverse matrix
1564 a(1,1) = a11
1565 a(1,2) = a12
1566 a(1,3) = a13
1567 a(2,1) = a21
1568 a(2,2) = a22
1569 a(2,3) = a23
1570 a(3,1) = a31
1571 a(3,2) = a32
1572 a(3,3) = a33
1573 b = matinv3(a)
1574 end function matinv39
1575
1580 function matinv3(A) result(B)
1581 !! Performs a direct calculation of the inverse of a 3×3 matrix.
1582 real(kind=xp), intent(in) :: a(3,3) !! Matrix
1583 real(kind=xp) :: b(3,3) !! Inverse matrix
1584 real(kind=xp) :: detinv
1585
1586 ! Calculate the inverse determinant of the matrix
1587 ! first index x,y,z, second r, s, t
1588 detinv = 1.0_xp / real(a(1,1)*a(2,2)*a(3,3) - a(1,1)*a(2,3)*a(3,2) &
1589 - a(1,2)*a(2,1)*a(3,3) + a(1,2)*a(2,3)*a(3,1)&
1590 + a(1,3)*a(2,1)*a(3,2) - a(1,3)*a(2,2)*a(3,1), xp)
1591 ! Calculate the inverse of the matrix
1592 ! first index r, s, t, second x, y, z
1593 b(1,1) = +detinv * (a(2,2)*a(3,3) - a(2,3)*a(3,2))
1594 b(2,1) = -detinv * (a(2,1)*a(3,3) - a(2,3)*a(3,1))
1595 b(3,1) = +detinv * (a(2,1)*a(3,2) - a(2,2)*a(3,1))
1596 b(1,2) = -detinv * (a(1,2)*a(3,3) - a(1,3)*a(3,2))
1597 b(2,2) = +detinv * (a(1,1)*a(3,3) - a(1,3)*a(3,1))
1598 b(3,2) = -detinv * (a(1,1)*a(3,2) - a(1,2)*a(3,1))
1599 b(1,3) = +detinv * (a(1,2)*a(2,3) - a(1,3)*a(2,2))
1600 b(2,3) = -detinv * (a(1,1)*a(2,3) - a(1,3)*a(2,1))
1601 b(3,3) = +detinv * (a(1,1)*a(2,2) - a(1,2)*a(2,1))
1602 end function matinv3
1603
1606 function math_stepf(x) result(val)
1607 real(kind=rp), intent(in) :: x
1608 real(kind=rp) :: val
1609 real(kind=rp), parameter :: xdmin = 0.0001_rp
1610 real(kind=rp), parameter :: xdmax = 0.9999_rp
1611 real(kind=rp) :: g
1612
1613 if (x <= xdmin) then
1614 ! Below the lower bound, the function is 0
1615 val = 0.0_rp
1616 else if (x >= xdmax) then
1617 ! Above the upper bound, the function is 1
1618 val = 1.0_rp
1619 else
1620 ! g(x) = 1/(x-1) + 1/x
1621 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1622
1623 ! The sigmoid: S(x) = 1 / (1 + exp(g))
1624 val = 1.0_rp / (1.0_rp + exp(g))
1625 end if
1626 end function math_stepf
1627
1629 function math_dstepf(x) result(val)
1630 real(kind=rp), intent(in) :: x
1631 real(kind=rp) :: val
1632 real(kind=rp), parameter :: xdmin = 0.0001_rp
1633 real(kind=rp), parameter :: xdmax = 0.9999_rp
1634 real(kind=rp) :: arg, g, dg, s_val
1635
1636 if (x <= xdmin .or. x >= xdmax) then
1637 val = 0.0_rp
1638 else
1639 ! The step function is S(x) = 1 / (1 + exp(g(x)))
1640 ! where g(x) = 1/(x-1) + 1/x
1641 ! S'(x) = -S(x) * (1 - S(x)) * g'(x)
1642
1643 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1644
1645 ! Derivative of g(x)
1646 dg = -(1.0_rp / ((x - 1.0_rp)**2)) - (1.0_rp / (x**2))
1647
1648 ! Recompute S(x) locally
1649 s_val = 1.0_rp / (1.0_rp + exp(g))
1650
1651 val = -s_val * (1.0_rp - s_val) * dg
1652 end if
1653 end function math_dstepf
1654
1655end module math
__inline__ __device__ void nonlinear_index(const int idx, const int lx, int *index)
Definition bc_utils.h:44
double real
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:52
Object for handling masks in Neko.
Definition mask.f90:34
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:472
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:484
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
Definition math.f90:255
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:915
real(kind=rp) function, public vlsc2(u, v, n)
Compute multiplication sum .
Definition math.f90:787
pure logical function, public dabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:135
real(kind=rp), parameter, public pi
Definition math.f90:76
pure logical function qabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:150
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1142
subroutine, public ascol5(a, b, c, d, e, n)
Returns .
Definition math.f90:1077
subroutine, public addcol3s2(a, b, c, s, n)
Returns .
Definition math.f90:1062
subroutine, public masked_scatter_copy(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:442
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:714
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:535
subroutine, public face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
Gather values from a face-local SEM field to a reduced contiguous vector.
Definition math.f90:363
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1561
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:523
subroutine, public masked_copy(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:317
subroutine reorddp(b, ind, n)
reorder double precision array - inverse of swap
Definition math.f90:1362
subroutine, public addsqr2s2(a, b, c1, n)
Returns .
Definition math.f90:901
subroutine, public cwrap(a, min_val, max_val, n)
Wrap value around a range [min, max)
Definition math.f90:560
real(kind=rp) function, public glsc4(a, b, c, d, n)
Definition math.f90:1161
subroutine, public cdiv2(a, b, c, n)
Division of constant c by elements of a .
Definition math.f90:510
real(kind=rp) function, public math_stepf(x)
Smooth step function S(x) Returns 0 for x <= 0, 1 for x >= 1, and smooth transition in between.
Definition math.f90:1607
subroutine swapdp(b, ind, n)
sort double precision array acording to ind vector
Definition math.f90:1322
subroutine flipvi4(b, ind, n)
Flip single integer vector b and ind.
Definition math.f90:1425
subroutine, public add2s1(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
Definition math.f90:872
subroutine, public cpwmin2(a, b, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1533
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1123
subroutine, public masked_scatter_copy_0(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:419
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:956
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:266
subroutine flipvdp(b, ind, n)
Flip double precision vector b and ind.
Definition math.f90:1402
subroutine, public cpwmin3(a, b, c, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1545
subroutine, public pwmax3(a, b, c, n)
Point-wise maximum of two vectors .
Definition math.f90:1472
subroutine, public masked_gather_copy(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:396
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1108
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:814
subroutine swapi4(b, ind, n)
sort single integer array acording to ind vector
Definition math.f90:1342
integer function, public glimin(a, n)
Min of an integer vector of length n.
Definition math.f90:636
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:574
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:856
subroutine, public addcol4(a, b, c, d, n)
Returns .
Definition math.f90:1047
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:801
subroutine, public cfill(a, c, n)
Set all elements to a constant c .
Definition math.f90:548
subroutine, public absval(a, n)
Take the absolute value of an array.
Definition math.f90:1447
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:701
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:970
real(kind=xp) function, dimension(3, 3), public matinv3(a)
Performs a direct calculation of the inverse of a 3×3 matrix. M33INV and M44INV by David G....
Definition math.f90:1581
subroutine, public pwmax2(a, b, n)
Point-wise maximum of two vectors .
Definition math.f90:1460
subroutine, public pwmin2(a, b, n)
Point-wise minimum of two vectors .
Definition math.f90:1509
subroutine, public masked_gather_copy_0(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:340
subroutine, public subcol4(a, b, c, d, n)
Returns .
Definition math.f90:1018
subroutine sorti4(a, ind, n)
Heap Sort for single integer arrays.
Definition math.f90:1266
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:1033
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:689
subroutine, public cdiv(a, c, n)
Division of constant c by elements of a .
Definition math.f90:498
real(kind=rp), parameter, public neko_m_ln2
Definition math.f90:73
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:654
subroutine, public cpwmax3(a, b, c, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1496
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:929
subroutine, public izero(a, n)
Zero an integer vector.
Definition math.f90:244
real(kind=rp) function, public glmax(a, n)
Max of a vector of length n.
Definition math.f90:591
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:277
subroutine, public add4s3(a, b, c, d, c1, c2, c3, n)
Returns .
Definition math.f90:985
subroutine, public add4(a, b, c, d, n)
Vector addition .
Definition math.f90:828
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:942
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:1001
real(kind=rp) function, public math_dstepf(x)
Derivative of math_stepf with respect to x: d(stepf)/dx.
Definition math.f90:1630
pure logical function drelcmp(x, y, eps)
Return double precision relative comparison .
Definition math.f90:179
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:70
subroutine, public vdot3(dot, u1, u2, u3, v1, v2, v3, n)
Compute a dot product (3-d version) assuming vector components etc.
Definition math.f90:759
pure logical function, public sabscmp(x, y, tol)
Return single precision absolute comparison .
Definition math.f90:120
pure logical function qrelcmp(x, y, eps)
Return quad precision relative comparison .
Definition math.f90:194
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
real(kind=rp) function, public glsubnorm(a, b, n)
Returns the norm of the difference of two vectors .
Definition math.f90:1184
subroutine, public vdot2(dot, u1, u2, v1, v2, n)
Compute a dot product (2-d version) assuming vector components etc.
Definition math.f90:745
subroutine, public cpwmax2(a, b, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1484
real(kind=rp) function, public vlmin(vec, n)
minimun value of a vector of length n
Definition math.f90:677
subroutine, public cfill_mask(a, c, n, mask, n_mask)
Fill a constant to a masked vector. .
Definition math.f90:458
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
Definition math.f90:666
integer function, public glimax(a, n)
Max of an integer vector of length n.
Definition math.f90:606
subroutine sortrp(a, ind, n)
Heap Sort for double precision arrays.
Definition math.f90:1208
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:843
subroutine, public pwmin3(a, b, c, n)
Point-wise minimum of two vectors .
Definition math.f90:1521
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:887
real(kind=rp) function, public glmin(a, n)
Min of a vector of length n.
Definition math.f90:621
subroutine, public masked_copy_0(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:296
subroutine, public vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
Compute a cross product assuming vector components etc.
Definition math.f90:728
pure logical function srelcmp(x, y, eps)
Return single precision relative comparison .
Definition math.f90:165
pure real(kind=rp) function, public lambert_w0(x, niter)
Approximate the principal real branch of the Lambert W function for non-negative real x.
Definition math.f90:212
real(kind=rp) function, public vlsc3(u, v, w, n)
Compute multiplication sum .
Definition math.f90:773
subroutine reordi4(b, ind, n)
reorder single integer array - inverse of swap
Definition math.f90:1382
subroutine, public p_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1093
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public i4
Definition num_types.f90:6
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35
#define max(a, b)
Definition tensor.cu:40