Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
math.f90
Go to the documentation of this file.
1! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
2!
3! The UChicago Argonne, LLC as Operator of Argonne National
4! Laboratory holds copyright in the Software. The copyright holder
5! reserves all rights except those expressly granted to licensees,
6! and U.S. Government license rights.
7!
8! Redistribution and use in source and binary forms, with or without
9! modification, are permitted provided that the following conditions
10! are met:
11!
12! 1. Redistributions of source code must retain the above copyright
13! notice, this list of conditions and the disclaimer below.
14!
15! 2. Redistributions in binary form must reproduce the above copyright
16! notice, this list of conditions and the disclaimer (as noted below)
17! in the documentation and/or other materials provided with the
18! distribution.
19!
20! 3. Neither the name of ANL nor the names of its contributors
21! may be used to endorse or promote products derived from this software
22! without specific prior written permission.
23!
24! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
29! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36!
37! Additional BSD Notice
38! ---------------------
39! 1. This notice is required to be provided under our contract with
40! the U.S. Department of Energy (DOE). This work was produced at
41! Argonne National Laboratory under Contract
42! No. DE-AC02-06CH11357 with the DOE.
43!
44! 2. Neither the United States Government nor UCHICAGO ARGONNE,
45! LLC nor any of their employees, makes any warranty,
46! express or implied, or assumes any liability or responsibility for the
47! accuracy, completeness, or usefulness of any information, apparatus,
48! product, or process disclosed, or represents that its use would not
49! infringe privately-owned rights.
50!
51! 3. Also, reference herein to any specific commercial products, process,
52! or services by trade name, trademark, manufacturer or otherwise does
53! not necessarily constitute or imply its endorsement, recommendation,
54! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
55! The views and opinions of authors expressed
56! herein do not necessarily state or reflect those of the United States
57! Government or UCHICAGO ARGONNE, LLC, and shall
58! not be used for advertising or product endorsement purposes.
59!
60module math
61 use num_types, only : rp, dp, sp, qp, i4, xp
63 use mpi_f08, only : mpi_min, mpi_max, mpi_sum, mpi_in_place, mpi_integer, &
64 mpi_allreduce
65 use utils, only : nonlinear_index
66 implicit none
67 private
68
70 real(kind=rp), public, parameter :: neko_eps = epsilon(1.0_rp)
71
73 real(kind=rp), public, parameter :: neko_m_ln2 = log(2.0_rp)
74
76 real(kind=rp), public, parameter :: pi = 4._rp*atan(1._rp)
77
78 interface abscmp
79 module procedure sabscmp, dabscmp, qabscmp
80 end interface abscmp
81
82 interface sort
83 module procedure sortrp, sorti4
84 end interface sort
85
86 interface swap
87 module procedure swapdp, swapi4
88 end interface swap
89
90 interface reord
91 module procedure reorddp, reordi4
92 end interface reord
93
94 interface flipv
95 module procedure flipvdp, flipvi4
96 end interface flipv
97
98 interface relcmp
99 module procedure srelcmp, drelcmp, qrelcmp
100 end interface relcmp
101
102 public :: abscmp, rzero, izero, row_zero, rone, copy, cmult, cadd, cfill, &
110 matinv39, &
115
116contains
117
119 pure function sabscmp(x, y, tol)
120 real(kind=sp), intent(in) :: x
121 real(kind=sp), intent(in) :: y
122 real(kind=sp), intent(in), optional :: tol
123 logical :: sabscmp
124
125 if (present(tol)) then
126 sabscmp = abs(x - y) .lt. tol
127 else
128 sabscmp = abs(x - y) .lt. neko_eps
129 end if
130
131 end function sabscmp
132
134 pure function dabscmp(x, y, tol)
135 real(kind=dp), intent(in) :: x
136 real(kind=dp), intent(in) :: y
137 real(kind=dp), intent(in), optional :: tol
138 logical :: dabscmp
139
140 if (present(tol)) then
141 dabscmp = abs(x - y) .lt. tol
142 else
143 dabscmp = abs(x - y) .lt. neko_eps
144 end if
145
146 end function dabscmp
147
149 pure function qabscmp(x, y, tol)
150 real(kind=qp), intent(in) :: x
151 real(kind=qp), intent(in) :: y
152 real(kind=qp), intent(in), optional :: tol
153 logical :: qabscmp
154
155 if (present(tol)) then
156 qabscmp = abs(x - y) .lt. tol
157 else
158 qabscmp = abs(x - y) .lt. neko_eps
159 end if
160
161 end function qabscmp
162
164 pure function srelcmp(x, y, eps)
165 real(kind=sp), intent(in) :: x
166 real(kind=sp), intent(in) :: y
167 real(kind=sp), intent(in), optional :: eps
168 logical :: srelcmp
169 if (present(eps)) then
170 srelcmp = abs(x - y) .le. eps*abs(y)
171 else
172 srelcmp = abs(x - y) .le. neko_eps*abs(y)
173 end if
174
175 end function srelcmp
176
178 pure function drelcmp(x, y, eps)
179 real(kind=dp), intent(in) :: x
180 real(kind=dp), intent(in) :: y
181 real(kind=dp), intent(in), optional :: eps
182 logical :: drelcmp
183 if (present(eps)) then
184 drelcmp = abs(x - y) .le. eps*abs(y)
185 else
186 drelcmp = abs(x - y) .le. neko_eps*abs(y)
187 end if
188
189 end function drelcmp
190
191
193 pure function qrelcmp(x, y, eps)
194 real(kind=qp), intent(in) :: x
195 real(kind=qp), intent(in) :: y
196 real(kind=qp), intent(in), optional :: eps
197 logical :: qrelcmp
198 if (present(eps)) then
199 qrelcmp = abs(x - y)/abs(y) .lt. eps
200 else
201 qrelcmp = abs(x - y)/abs(y) .lt. neko_eps
202 end if
203
204 end function qrelcmp
205
207 subroutine rzero(a, n)
208 integer, intent(in) :: n
209 real(kind=rp), dimension(n), intent(inout) :: a
210 integer :: i
211
212 do concurrent(i = 1:n)
213 a(i) = 0.0_rp
214 end do
215 end subroutine rzero
216
218 subroutine izero(a, n)
219 integer, intent(in) :: n
220 integer, dimension(n), intent(inout) :: a
221 integer :: i
222
223 do concurrent(i = 1:n)
224 a(i) = 0
225 end do
226 end subroutine izero
227
229 subroutine row_zero(a, m, n, e)
230 integer, intent(in) :: m, n, e
231 real(kind=rp), intent(inout) :: a(m,n)
232 integer :: j
233
234 do concurrent(j = 1:n)
235 a(e,j) = 0.0_rp
236 end do
237 end subroutine row_zero
238
240 subroutine rone(a, n)
241 integer, intent(in) :: n
242 real(kind=rp), dimension(n), intent(inout) :: a
243 integer :: i
244
245 do concurrent(i = 1:n)
246 a(i) = 1.0_rp
247 end do
248 end subroutine rone
249
251 subroutine copy(a, b, n)
252 integer, intent(in) :: n
253 real(kind=rp), dimension(n), intent(in) :: b
254 real(kind=rp), dimension(n), intent(inout) :: a
255 integer :: i
256
257 do concurrent(i = 1:n)
258 a(i) = b(i)
259 end do
260
261 end subroutine copy
262
270 subroutine masked_copy_0(a, b, mask, n, n_mask)
271 integer, intent(in) :: n, n_mask
272 real(kind=rp), dimension(n), intent(in) :: b
273 real(kind=rp), dimension(n), intent(inout) :: a
274 integer, dimension(0:n_mask) :: mask
275 integer :: i, j
276
277 do concurrent(i = 1:n_mask)
278 j = mask(i)
279 a(j) = b(j)
280 end do
281
282 end subroutine masked_copy_0
283
291 subroutine masked_copy(a, b, mask, n, n_mask)
292 integer, intent(in) :: n, n_mask
293 real(kind=rp), dimension(n), intent(in) :: b
294 real(kind=rp), dimension(n), intent(inout) :: a
295 integer, dimension(n_mask) :: mask
296 integer :: i, j
297
298 do concurrent(i = 1:n_mask)
299 j = mask(i)
300 a(j) = b(j)
301 end do
302
303 end subroutine masked_copy
304
314 subroutine masked_gather_copy_0(a, b, mask, n, n_mask)
315 integer, intent(in) :: n, n_mask
316 real(kind=rp), dimension(n), intent(in) :: b
317 real(kind=rp), dimension(n_mask), intent(inout) :: a
318 integer, dimension(0:n_mask) :: mask
319 integer :: i, j
320
321 do concurrent(i = 1:n_mask)
322 j = mask(i)
323 a(i) = b(j)
324 end do
325
326 end subroutine masked_gather_copy_0
327
337 subroutine face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
338 integer, intent(in) :: lx, ly, lz, n_mask
339 real(kind=rp), dimension(n_mask), intent(inout) :: a
340 real(kind=rp), dimension(:, :, :, :), intent(in) :: b
341 integer, dimension(0:n_mask), intent(in) :: mask
342 integer, dimension(0:n_mask), intent(in) :: facet
343 integer :: l
344 integer :: idx(4)
345
346 do l = 1, n_mask
347 idx = nonlinear_index(mask(l), lx, ly, lz)
348
349 select case (facet(l))
350 case (1, 2)
351 a(l) = b(idx(2), idx(3), facet(l), idx(4))
352 case (3, 4)
353 a(l) = b(idx(1), idx(3), facet(l), idx(4))
354 case (5, 6)
355 a(l) = b(idx(1), idx(2), facet(l), idx(4))
356 end select
357 end do
358
359 end subroutine face_masked_gather_copy_0
360
370 subroutine masked_gather_copy(a, b, mask, n, n_mask)
371 integer, intent(in) :: n, n_mask
372 real(kind=rp), dimension(n), intent(in) :: b
373 real(kind=rp), dimension(n_mask), intent(inout) :: a
374 integer, dimension(n_mask) :: mask
375 integer :: i, j
376
377 do concurrent(i = 1:n_mask)
378 j = mask(i)
379 a(i) = b(j)
380 end do
381
382 end subroutine masked_gather_copy
383
393 subroutine masked_scatter_copy_0(a, b, mask, n, n_mask)
394 integer, intent(in) :: n, n_mask
395 real(kind=rp), dimension(n_mask), intent(in) :: b
396 real(kind=rp), dimension(n), intent(inout) :: a
397 integer, dimension(0:n_mask) :: mask
398 integer :: i, j
399
400 do concurrent(i = 1:n_mask)
401 j = mask(i)
402 a(j) = b(i)
403 end do
404
405 end subroutine masked_scatter_copy_0
406
416 subroutine masked_scatter_copy(a, b, mask, n, n_mask)
417 integer, intent(in) :: n, n_mask
418 real(kind=rp), dimension(n_mask), intent(in) :: b
419 real(kind=rp), dimension(n), intent(inout) :: a
420 integer, dimension(n_mask) :: mask
421 integer :: i, j
422
423 do concurrent(i = 1:n_mask)
424 j = mask(i)
425 a(j) = b(i)
426 end do
427
428 end subroutine masked_scatter_copy
429
432 subroutine cfill_mask(a, c, n, mask, n_mask)
433 integer, intent(in) :: n, n_mask
434 real(kind=rp), dimension(n), intent(inout) :: a
435 real(kind=rp), intent(in) :: c
436 integer, dimension(n_mask), intent(in) :: mask
437 integer :: i
438
439 do concurrent(i = 1:n_mask)
440 a(mask(i)) = c
441 end do
442
443 end subroutine cfill_mask
444
446 subroutine cmult(a, c, n)
447 integer, intent(in) :: n
448 real(kind=rp), dimension(n), intent(inout) :: a
449 real(kind=rp), intent(in) :: c
450 integer :: i
451
452 do concurrent(i = 1:n)
453 a(i) = c * a(i)
454 end do
455 end subroutine cmult
456
458 subroutine cmult2(a, b, c, n)
459 integer, intent(in) :: n
460 real(kind=rp), dimension(n), intent(inout) :: a
461 real(kind=rp), dimension(n), intent(in) :: b
462 real(kind=rp), intent(in) :: c
463 integer :: i
464
465 do concurrent(i = 1:n)
466 a(i) = c * b(i)
467 end do
468
469 end subroutine cmult2
470
472 subroutine cdiv(a, c, n)
473 integer, intent(in) :: n
474 real(kind=rp), dimension(n), intent(inout) :: a
475 real(kind=rp), intent(in) :: c
476 integer :: i
477
478 do concurrent(i = 1:n)
479 a(i) = c / a(i)
480 end do
481 end subroutine cdiv
482
484 subroutine cdiv2(a, b, c, n)
485 integer, intent(in) :: n
486 real(kind=rp), dimension(n), intent(inout) :: a
487 real(kind=rp), dimension(n), intent(in) :: b
488 real(kind=rp), intent(in) :: c
489 integer :: i
490
491 do concurrent(i = 1:n)
492 a(i) = c / b(i)
493 end do
494 end subroutine cdiv2
495
497 subroutine cadd(a, s, n)
498 integer, intent(in) :: n
499 real(kind=rp), dimension(n), intent(inout) :: a
500 real(kind=rp), intent(in) :: s
501 integer :: i
502
503 do concurrent(i = 1:n)
504 a(i) = a(i) + s
505 end do
506 end subroutine cadd
507
509 subroutine cadd2(a, b, s, n)
510 integer, intent(in) :: n
511 real(kind=rp), dimension(n), intent(inout) :: a
512 real(kind=rp), dimension(n), intent(in) :: b
513 real(kind=rp), intent(in) :: s
514 integer :: i
515
516 do concurrent(i = 1:n)
517 a(i) = b(i) + s
518 end do
519 end subroutine cadd2
520
522 subroutine cfill(a, c, n)
523 integer, intent(in) :: n
524 real(kind=rp), dimension(n), intent(inout) :: a
525 real(kind=rp), intent(in) :: c
526 integer :: i
527
528 do concurrent(i = 1:n)
529 a(i) = c
530 end do
531 end subroutine cfill
532
534 subroutine cwrap(a, min_val, max_val, n)
535 integer, intent(in) :: n
536 real(kind=rp), dimension(n), intent(inout) :: a
537 real(kind=rp), intent(in) :: min_val, max_val
538 integer :: i
539
540 if (n .lt. 1 .or. max_val .le. min_val) return
541
542 do concurrent(i = 1:n)
543 a(i) = modulo(a(i) - min_val, max_val - min_val) + min_val
544 end do
545 end subroutine cwrap
546
548 function glsum(a, n)
549 integer, intent(in) :: n
550 real(kind=rp), dimension(n) :: a
551 real(kind=rp) :: glsum
552 real(kind=xp) :: tmp
553 integer :: i, ierr
554 tmp = 0.0_rp
555 do i = 1, n
556 tmp = tmp + a(i)
557 end do
558 call mpi_allreduce(mpi_in_place, tmp, 1, &
559 mpi_extra_precision, mpi_sum, neko_comm, ierr)
560 glsum = tmp
561
562 end function glsum
563
565 function glmax(a, n)
566 integer, intent(in) :: n
567 real(kind=rp), dimension(n) :: a
568 real(kind=rp) :: tmp, glmax
569 integer :: i, ierr
570
571 tmp = -huge(0.0_rp)
572 do i = 1, n
573 tmp = max(tmp,a(i))
574 end do
575 call mpi_allreduce(tmp, glmax, 1, &
576 mpi_real_precision, mpi_max, neko_comm, ierr)
577 end function glmax
578
580 function glimax(a, n)
581 integer, intent(in) :: n
582 integer, dimension(n) :: a
583 integer :: tmp, glimax
584 integer :: i, ierr
585
586 tmp = -huge(0)
587 do i = 1, n
588 tmp = max(tmp,a(i))
589 end do
590 call mpi_allreduce(tmp, glimax, 1, &
591 mpi_integer, mpi_max, neko_comm, ierr)
592 end function glimax
593
595 function glmin(a, n)
596 integer, intent(in) :: n
597 real(kind=rp), dimension(n) :: a
598 real(kind=rp) :: tmp, glmin
599 integer :: i, ierr
600
601 tmp = huge(0.0_rp)
602 do i = 1, n
603 tmp = min(tmp,a(i))
604 end do
605 call mpi_allreduce(tmp, glmin, 1, &
606 mpi_real_precision, mpi_min, neko_comm, ierr)
607 end function glmin
608
610 function glimin(a, n)
611 integer, intent(in) :: n
612 integer, dimension(n) :: a
613 integer :: tmp, glimin
614 integer :: i, ierr
615
616 tmp = huge(0)
617 do i = 1, n
618 tmp = min(tmp,a(i))
619 end do
620 call mpi_allreduce(tmp, glimin, 1, &
621 mpi_integer, mpi_min, neko_comm, ierr)
622 end function glimin
623
624
625
626
628 subroutine chsign(a, n)
629 integer, intent(in) :: n
630 real(kind=rp), dimension(n), intent(inout) :: a
631 integer :: i
632
633 do concurrent(i = 1:n)
634 a(i) = -a(i)
635 end do
636
637 end subroutine chsign
638
640 function vlmax(vec,n) result(tmax)
641 integer :: n, i
642 real(kind=rp), intent(in) :: vec(n)
643 real(kind=rp) :: tmax
644 tmax = real(-99d20, rp)
645 do i = 1, n
646 tmax = max(tmax, vec(i))
647 end do
648 end function vlmax
649
651 function vlmin(vec,n) result(tmin)
652 integer, intent(in) :: n
653 real(kind=rp), intent(in) :: vec(n)
654 real(kind=rp) :: tmin
655 integer :: i
656 tmin = real(99.0e20, rp)
657 do i = 1, n
658 tmin = min(tmin, vec(i))
659 end do
660 end function vlmin
661
663 subroutine invcol1(a, n)
664 integer, intent(in) :: n
665 real(kind=rp), dimension(n), intent(inout) :: a
666 integer :: i
667
668 do concurrent(i = 1:n)
669 a(i) = 1.0_xp / real(a(i), xp)
670 end do
671
672 end subroutine invcol1
673
675 subroutine invcol3(a, b, c, n)
676 integer, intent(in) :: n
677 real(kind=rp), dimension(n), intent(inout) :: a
678 real(kind=rp), dimension(n), intent(in) :: b, c
679 integer :: i
680
681 do concurrent(i = 1:n)
682 a(i) = real(b(i), xp) / c(i)
683 end do
684
685 end subroutine invcol3
686
688 subroutine invers2(a, b, n)
689 integer, intent(in) :: n
690 real(kind=rp), dimension(n), intent(inout) :: a
691 real(kind=rp), dimension(n), intent(in) :: b
692 integer :: i
693
694 do concurrent(i = 1:n)
695 a(i) = 1.0_xp / real(b(i), xp)
696 end do
697
698 end subroutine invers2
699
702 subroutine vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
703 integer, intent(in) :: n
704 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
705 real(kind=rp), dimension(n), intent(in) :: w1, w2, w3
706 real(kind=rp), dimension(n), intent(out) :: u1, u2, u3
707 integer :: i
708
709 do concurrent(i = 1:n)
710 u1(i) = v2(i)*w3(i) - v3(i)*w2(i)
711 u2(i) = v3(i)*w1(i) - v1(i)*w3(i)
712 u3(i) = v1(i)*w2(i) - v2(i)*w1(i)
713 end do
714
715 end subroutine vcross
716
719 subroutine vdot2(dot, u1, u2, v1, v2, n)
720 integer, intent(in) :: n
721 real(kind=rp), dimension(n), intent(in) :: u1, u2
722 real(kind=rp), dimension(n), intent(in) :: v1, v2
723 real(kind=rp), dimension(n), intent(out) :: dot
724 integer :: i
725 do concurrent(i = 1:n)
726 dot(i) = u1(i)*v1(i) + u2(i)*v2(i)
727 end do
728
729 end subroutine vdot2
730
733 subroutine vdot3(dot, u1, u2, u3, v1, v2, v3, n)
734 integer, intent(in) :: n
735 real(kind=rp), dimension(n), intent(in) :: u1, u2, u3
736 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
737 real(kind=rp), dimension(n), intent(out) :: dot
738 integer :: i
739
740 do concurrent(i = 1:n)
741 dot(i) = u1(i)*v1(i) + u2(i)*v2(i) + u3(i)*v3(i)
742 end do
743
744 end subroutine vdot3
745
747 function vlsc3(u, v, w, n) result(s)
748 integer, intent(in) :: n
749 real(kind=rp), dimension(n), intent(in) :: u, v, w
750 real(kind=rp) :: s
751 integer :: i
752
753 s = 0.0_rp
754 do i = 1, n
755 s = s + u(i)*v(i)*w(i)
756 end do
757
758 end function vlsc3
759
761 function vlsc2(u, v, n) result(s)
762 integer, intent(in) :: n
763 real(kind=rp), dimension(n), intent(in) :: u, v
764 real(kind=rp) :: s
765 integer :: i
766
767 s = 0.0_rp
768 do i = 1, n
769 s = s + u(i)*v(i)
770 end do
771
772 end function vlsc2
773
775 subroutine add2(a, b, n)
776 integer, intent(in) :: n
777 real(kind=rp), dimension(n), intent(inout) :: a
778 real(kind=rp), dimension(n), intent(in) :: b
779 integer :: i
780
781 do concurrent(i = 1:n)
782 a(i) = a(i) + b(i)
783 end do
784
785 end subroutine add2
786
788 subroutine add3(a, b, c, n)
789 integer, intent(in) :: n
790 real(kind=rp), dimension(n), intent(inout) :: a
791 real(kind=rp), dimension(n), intent(in) :: b
792 real(kind=rp), dimension(n), intent(in) :: c
793 integer :: i
794
795 do concurrent(i = 1:n)
796 a(i) = b(i) + c(i)
797 end do
798
799 end subroutine add3
800
802 subroutine add4(a, b, c, d, n)
803 integer, intent(in) :: n
804 real(kind=rp), dimension(n), intent(out) :: a
805 real(kind=rp), dimension(n), intent(in) :: d
806 real(kind=rp), dimension(n), intent(in) :: c
807 real(kind=rp), dimension(n), intent(in) :: b
808 integer :: i
809
810 do concurrent(i = 1:n)
811 a(i) = b(i) + c(i) + d(i)
812 end do
813
814 end subroutine add4
815
817 subroutine sub2(a, b, n)
818 integer, intent(in) :: n
819 real(kind=rp), dimension(n), intent(inout) :: a
820 real(kind=rp), dimension(n), intent(in) :: b
821 integer :: i
822
823 do concurrent(i = 1:n)
824 a(i) = a(i) - b(i)
825 end do
826
827 end subroutine sub2
828
830 subroutine sub3(a, b, c, n)
831 integer, intent(in) :: n
832 real(kind=rp), dimension(n), intent(inout) :: a
833 real(kind=rp), dimension(n), intent(in) :: b
834 real(kind=rp), dimension(n), intent(in) :: c
835 integer :: i
836
837 do concurrent(i = 1:n)
838 a(i) = b(i) - c(i)
839 end do
840
841 end subroutine sub3
842
843
846 subroutine add2s1(a, b, c1, n)
847 integer, intent(in) :: n
848 real(kind=rp), dimension(n), intent(inout) :: a
849 real(kind=rp), dimension(n), intent(in) :: b
850 real(kind=rp), intent(in) :: c1
851 integer :: i
852
853 do concurrent(i = 1:n)
854 a(i) = c1 * a(i) + b(i)
855 end do
856
857 end subroutine add2s1
858
861 subroutine add2s2(a, b, c1, n)
862 integer, intent(in) :: n
863 real(kind=rp), dimension(n), intent(inout) :: a
864 real(kind=rp), dimension(n), intent(in) :: b
865 real(kind=rp), intent(in) :: c1
866 integer :: i
867
868 do concurrent(i = 1:n)
869 a(i) = a(i) + c1 * b(i)
870 end do
871
872 end subroutine add2s2
873
875 subroutine addsqr2s2(a, b, c1, n)
876 integer, intent(in) :: n
877 real(kind=rp), dimension(n), intent(inout) :: a
878 real(kind=rp), dimension(n), intent(in) :: b
879 real(kind=rp), intent(in) :: c1
880 integer :: i
881
882 do concurrent(i = 1:n)
883 a(i) = a(i) + c1 * ( b(i) * b(i) )
884 end do
885
886 end subroutine addsqr2s2
887
889 subroutine invcol2(a, b, n)
890 integer, intent(in) :: n
891 real(kind=rp), dimension(n), intent(inout) :: a
892 real(kind=rp), dimension(n), intent(in) :: b
893 integer :: i
894
895 do concurrent(i = 1:n)
896 a(i) = real(a(i), xp) / b(i)
897 end do
898
899 end subroutine invcol2
900
901
903 subroutine col2(a, b, n)
904 integer, intent(in) :: n
905 real(kind=rp), dimension(n), intent(inout) :: a
906 real(kind=rp), dimension(n), intent(in) :: b
907 integer :: i
908
909 do concurrent(i = 1:n)
910 a(i) = a(i) * b(i)
911 end do
912
913 end subroutine col2
914
916 subroutine col3(a, b, c, n)
917 integer, intent(in) :: n
918 real(kind=rp), dimension(n), intent(inout) :: a
919 real(kind=rp), dimension(n), intent(in) :: b
920 real(kind=rp), dimension(n), intent(in) :: c
921 integer :: i
922
923 do concurrent(i = 1:n)
924 a(i) = b(i) * c(i)
925 end do
926
927 end subroutine col3
928
930 subroutine subcol3(a, b, c, n)
931 integer, intent(in) :: n
932 real(kind=rp), dimension(n), intent(inout) :: a
933 real(kind=rp), dimension(n), intent(in) :: b
934 real(kind=rp), dimension(n), intent(in) :: c
935 integer :: i
936
937 do concurrent(i = 1:n)
938 a(i) = a(i) - b(i) * c(i)
939 end do
940
941 end subroutine subcol3
942
944 subroutine add3s2(a, b, c, c1, c2 ,n)
945 integer, intent(in) :: n
946 real(kind=rp), dimension(n), intent(inout) :: a
947 real(kind=rp), dimension(n), intent(in) :: b
948 real(kind=rp), dimension(n), intent(in) :: c
949 real(kind=rp), intent(in) :: c1, c2
950 integer :: i
951
952 do concurrent(i = 1:n)
953 a(i) = c1 * b(i) + c2 * c(i)
954 end do
955
956 end subroutine add3s2
957
959 subroutine add4s3(a, b, c, d, c1, c2, c3, n)
960 integer, intent(in) :: n
961 real(kind=rp), dimension(n), intent(inout) :: a
962 real(kind=rp), dimension(n), intent(in) :: b
963 real(kind=rp), dimension(n), intent(in) :: c
964 real(kind=rp), dimension(n), intent(in) :: d
965 real(kind=rp), intent(in) :: c1, c2, c3
966 integer :: i
967
968 do concurrent(i = 1:n)
969 a(i) = c1 * b(i) + c2 * c(i) + c3 * d(i)
970 end do
971
972 end subroutine add4s3
973
975 subroutine add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
976 integer, intent(in) :: n
977 real(kind=rp), dimension(n), intent(inout) :: a
978 real(kind=rp), dimension(n), intent(in) :: b
979 real(kind=rp), dimension(n), intent(in) :: c
980 real(kind=rp), dimension(n), intent(in) :: d
981 real(kind=rp), dimension(n), intent(in) :: e
982 real(kind=rp), intent(in) :: c1, c2, c3, c4
983 integer :: i
984
985 do concurrent(i = 1:n)
986 a(i) = a(i) + c1 * b(i) + c2 * c(i) + c3 * d(i) + c4 * e(i)
987 end do
988
989 end subroutine add5s4
990
992 subroutine subcol4(a, b, c, d, n)
993 integer, intent(in) :: n
994 real(kind=rp), dimension(n), intent(inout) :: a
995 real(kind=rp), dimension(n), intent(in) :: b
996 real(kind=rp), dimension(n), intent(in) :: c
997 real(kind=rp), dimension(n), intent(in) :: d
998 integer :: i
999
1000 do concurrent(i = 1:n)
1001 a(i) = a(i) - b(i) * c(i) * d(i)
1002 end do
1003
1004 end subroutine subcol4
1005
1007 subroutine addcol3(a, b, c, n)
1008 integer, intent(in) :: n
1009 real(kind=rp), dimension(n), intent(inout) :: a
1010 real(kind=rp), dimension(n), intent(in) :: b
1011 real(kind=rp), dimension(n), intent(in) :: c
1012 integer :: i
1013
1014 do concurrent(i = 1:n)
1015 a(i) = a(i) + b(i) * c(i)
1016 end do
1017
1018 end subroutine addcol3
1019
1021 subroutine addcol4(a, b, c, d, n)
1022 integer, intent(in) :: n
1023 real(kind=rp), dimension(n), intent(inout) :: a
1024 real(kind=rp), dimension(n), intent(in) :: b
1025 real(kind=rp), dimension(n), intent(in) :: c
1026 real(kind=rp), dimension(n), intent(in) :: d
1027 integer :: i
1028
1029 do concurrent(i = 1:n)
1030 a(i) = a(i) + b(i) * c(i) * d(i)
1031 end do
1032
1033 end subroutine addcol4
1034
1036 subroutine addcol3s2(a, b, c, s, n)
1037 integer, intent(in) :: n
1038 real(kind=rp), dimension(n), intent(inout) :: a
1039 real(kind=rp), dimension(n), intent(in) :: b
1040 real(kind=rp), dimension(n), intent(in) :: c
1041 real(kind=rp), intent(in) :: s
1042 integer :: i
1043
1044 do concurrent(i = 1:n)
1045 a(i) = a(i) + s * b(i) * c(i)
1046 end do
1047
1048 end subroutine addcol3s2
1049
1051 subroutine ascol5(a, b, c, d, e, n)
1052 integer, intent(in) :: n
1053 real(kind=rp), dimension(n), intent(inout) :: a
1054 real(kind=rp), dimension(n), intent(in) :: b
1055 real(kind=rp), dimension(n), intent(in) :: c
1056 real(kind=rp), dimension(n), intent(in) :: d
1057 real(kind=rp), dimension(n), intent(in) :: e
1058 integer :: i
1059
1060 do concurrent(i = 1:n)
1061 a(i) = b(i)*c(i) - d(i)*e(i)
1062 end do
1063
1064 end subroutine ascol5
1065
1067 subroutine p_update(a, b, c, c1, c2, n)
1068 integer, intent(in) :: n
1069 real(kind=rp), dimension(n), intent(inout) :: a
1070 real(kind=rp), dimension(n), intent(in) :: b
1071 real(kind=rp), dimension(n), intent(in) :: c
1072 real(kind=rp), intent(in) :: c1, c2
1073 integer :: i
1074
1075 do concurrent(i = 1:n)
1076 a(i) = b(i) + c1*(a(i)-c2*c(i))
1077 end do
1078
1079 end subroutine p_update
1080
1082 subroutine x_update(a, b, c, c1, c2, n)
1083 integer, intent(in) :: n
1084 real(kind=rp), dimension(n), intent(inout) :: a
1085 real(kind=rp), dimension(n), intent(in) :: b
1086 real(kind=rp), dimension(n), intent(in) :: c
1087 real(kind=rp), intent(in) :: c1, c2
1088 integer :: i
1089
1090 do concurrent(i = 1:n)
1091 a(i) = a(i) + c1*b(i)+c2*c(i)
1092 end do
1093
1094 end subroutine x_update
1095
1097 function glsc2(a, b, n)
1098 integer, intent(in) :: n
1099 real(kind=rp), dimension(n), intent(in) :: a
1100 real(kind=rp), dimension(n), intent(in) :: b
1101 real(kind=rp) :: glsc2
1102 real(kind=xp) :: tmp
1103 integer :: i, ierr
1104
1105 tmp = 0.0_xp
1106 do i = 1, n
1107 tmp = tmp + a(i) * b(i)
1108 end do
1109
1110 call mpi_allreduce(mpi_in_place, tmp, 1, &
1111 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1112 glsc2 = tmp
1113 end function glsc2
1114
1116 function glsc3(a, b, c, n)
1117 integer, intent(in) :: n
1118 real(kind=rp), dimension(n), intent(in) :: a
1119 real(kind=rp), dimension(n), intent(in) :: b
1120 real(kind=rp), dimension(n), intent(in) :: c
1121 real(kind=rp) :: glsc3
1122 real(kind=xp) :: tmp
1123 integer :: i, ierr
1124
1125 tmp = 0.0_xp
1126 do i = 1, n
1127 tmp = tmp + a(i) * b(i) * c(i)
1128 end do
1129
1130 call mpi_allreduce(mpi_in_place, tmp, 1, &
1131 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1132 glsc3 = tmp
1133
1134 end function glsc3
1135 function glsc4(a, b, c, d, n)
1136 integer, intent(in) :: n
1137 real(kind=rp), dimension(n), intent(in) :: a
1138 real(kind=rp), dimension(n), intent(in) :: b
1139 real(kind=rp), dimension(n), intent(in) :: c
1140 real(kind=rp), dimension(n), intent(in) :: d
1141 real(kind=rp) :: glsc4
1142 real(kind=xp) :: tmp
1143 integer :: i, ierr
1144
1145 tmp = 0.0_xp
1146 do i = 1, n
1147 tmp = tmp + a(i) * b(i) * c(i) * d(i)
1148 end do
1149
1150 call mpi_allreduce(mpi_in_place, tmp, 1, &
1151 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1152 glsc4 = tmp
1153
1154 end function glsc4
1155
1158 function glsubnorm(a, b, n)
1159 integer, intent(in) :: n
1160 real(kind=rp), dimension(n), intent(in) :: a
1161 real(kind=rp), dimension(n), intent(in) :: b
1162 real(kind=rp) :: glsubnorm
1163 real(kind=xp) :: tmp
1164 integer :: i, ierr
1165
1166 tmp = 0.0_xp
1167 do i = 1, n
1168 tmp = tmp + (a(i) - b(i))**2
1169 end do
1170
1171 call mpi_allreduce(mpi_in_place, tmp, 1, &
1172 mpi_extra_precision, mpi_sum, neko_comm, ierr)
1173 glsubnorm = sqrt(tmp)
1174
1175 end function glsubnorm
1176
1182 subroutine sortrp(a, ind, n)
1183 integer, intent(in) :: n
1184 real(kind=rp), intent(inout) :: a(n)
1185 integer, intent(out) :: ind(n)
1186 real(kind=rp) :: aa
1187 integer :: j, ir, i, ii, l
1188
1189 do j = 1, n
1190 ind(j) = j
1191 end do
1192
1193 if (n .le. 1) return
1194
1195
1196 l = n/2+1
1197 ir = n
1198 do while (.true.)
1199 if (l .gt. 1) then
1200 l = l-1
1201 aa = a(l)
1202 ii = ind(l)
1203 else
1204 aa = a(ir)
1205 ii = ind(ir)
1206 a(ir) = a(1)
1207 ind(ir) = ind(1)
1208 ir = ir - 1
1209 if (ir .eq. 1) then
1210 a(1) = aa
1211 ind(1) = ii
1212 return
1213 end if
1214 end if
1215 i = l
1216 j = l+l
1217 do while (j .le. ir)
1218 if (j .lt. ir) then
1219 if ( a(j) .lt. a(j+1) ) j = j + 1
1220 end if
1221 if (aa .lt. a(j)) then
1222 a(i) = a(j)
1223 ind(i) = ind(j)
1224 i = j
1225 j = j+j
1226 else
1227 j = ir+1
1228 end if
1229 end do
1230 a(i) = aa
1231 ind(i) = ii
1232 end do
1233 end subroutine sortrp
1234
1240 subroutine sorti4(a, ind, n)
1241 integer, intent(in) :: n
1242 integer(i4), intent(inout) :: a(n)
1243 integer, intent(out) :: ind(n)
1244 integer(i4) :: aa
1245 integer :: j, ir, i, ii, l
1246
1247 do j = 1, n
1248 ind(j) = j
1249 end do
1250
1251 if (n .le. 1) return
1252
1253 l = n/2+1
1254 ir = n
1255 do while (.true.)
1256 if (l .gt. 1) then
1257 l = l - 1
1258 aa = a(l)
1259 ii = ind(l)
1260 else
1261 aa = a(ir)
1262 ii = ind(ir)
1263 a(ir) = a( 1)
1264 ind(ir) = ind( 1)
1265 ir = ir - 1
1266 if (ir .eq. 1) then
1267 a(1) = aa
1268 ind(1) = ii
1269 return
1270 end if
1271 end if
1272 i = l
1273 j = l + l
1274 do while (j .le. ir)
1275 if (j .lt. ir) then
1276 if ( a(j) .lt. a(j + 1) ) j = j + 1
1277 end if
1278 if (aa .lt. a(j)) then
1279 a(i) = a(j)
1280 ind(i) = ind(j)
1281 i = j
1282 j = j + j
1283 else
1284 j = ir + 1
1285 end if
1286 end do
1287 a(i) = aa
1288 ind(i) = ii
1289 end do
1290 end subroutine sorti4
1291
1296 subroutine swapdp(b, ind, n)
1297 integer, intent(in) :: n
1298 real(kind=rp), intent(inout) :: b(n)
1299 integer, intent(in) :: ind(n)
1300 real(kind=rp) :: temp(n)
1301 integer :: i, jj
1302
1303 do i = 1, n
1304 temp(i) = b(i)
1305 end do
1306 do i = 1, n
1307 jj = ind(i)
1308 b(i) = temp(jj)
1309 end do
1310 end subroutine swapdp
1311
1316 subroutine swapi4(b, ind, n)
1317 integer, intent(in) :: n
1318 integer(i4), intent(inout) :: b(n)
1319 integer, intent(in) :: ind(n)
1320 integer(i4) :: temp(n)
1321 integer :: i, jj
1322
1323 do i = 1, n
1324 temp(i) = b(i)
1325 end do
1326 do i = 1, n
1327 jj = ind(i)
1328 b(i) = temp(jj)
1329 end do
1330 end subroutine swapi4
1331
1336 subroutine reorddp(b, ind, n)
1337 integer, intent(in) :: n
1338 real(kind=rp), intent(inout) :: b(n)
1339 integer, intent(in) :: ind(n)
1340 real(kind=rp) :: temp(n)
1341 integer :: i, jj
1342
1343 do i = 1, n
1344 temp(i) = b(i)
1345 end do
1346 do i = 1, n
1347 jj = ind(i)
1348 b(jj) = temp(i)
1349 end do
1350 end subroutine reorddp
1351
1356 subroutine reordi4(b, ind, n)
1357 integer, intent(in) :: n
1358 integer(i4), intent(inout) :: b(n)
1359 integer, intent(in) :: ind(n)
1360 integer(i4) :: temp(n)
1361 integer :: i, jj
1362
1363 do i = 1, n
1364 temp(i) = b(i)
1365 end do
1366 do i = 1, n
1367 jj = ind(i)
1368 b(jj) = temp(i)
1369 end do
1370 end subroutine reordi4
1371
1376 subroutine flipvdp(b, ind, n)
1377 integer, intent(in) :: n
1378 real(kind=rp), intent(inout) :: b(n)
1379 integer, intent(inout) :: ind(n)
1380 real(kind=rp) :: temp(n)
1381 integer :: tempind(n)
1382 integer :: i, jj
1383
1384 do i = 1, n
1385 jj = n+1-i
1386 temp(jj) = b(i)
1387 tempind(jj) = ind(i)
1388 end do
1389 do i = 1,n
1390 b(i) = temp(i)
1391 ind(i) = tempind(i)
1392 end do
1393 end subroutine flipvdp
1394
1399 subroutine flipvi4(b, ind, n)
1400 integer, intent(in) :: n
1401 integer(i4), intent(inout) :: b(n)
1402 integer, intent(inout) :: ind(n)
1403 integer(i4) :: temp(n)
1404 integer :: tempind(n)
1405 integer :: i, jj
1406
1407 do i = 1, n
1408 jj = n+1-i
1409 temp(jj) = b(i)
1410 tempind(jj) = ind(i)
1411 end do
1412 do i = 1,n
1413 b(i) = temp(i)
1414 ind(i) = tempind(i)
1415 end do
1416 end subroutine flipvi4
1417
1421 subroutine absval(a, n)
1422 integer, intent(in) :: n
1423 real(kind=rp), dimension(n), intent(inout) :: a
1424 integer :: i
1425 do i = 1, n
1426 a(i) = abs(a(i))
1427 end do
1428 end subroutine absval
1429
1430 ! ========================================================================== !
1431 ! Point-wise operations
1432
1434 subroutine pwmax2(a, b, n)
1435 integer, intent(in) :: n
1436 real(kind=rp), dimension(n), intent(inout) :: a
1437 real(kind=rp), dimension(n), intent(in) :: b
1438 integer :: i
1439
1440 do i = 1, n
1441 a(i) = max(a(i), b(i))
1442 end do
1443 end subroutine pwmax2
1444
1446 subroutine pwmax3(a, b, c, n)
1447 integer, intent(in) :: n
1448 real(kind=rp), dimension(n), intent(inout) :: a
1449 real(kind=rp), dimension(n), intent(in) :: b, c
1450 integer :: i
1451
1452 do i = 1, n
1453 a(i) = max(b(i), c(i))
1454 end do
1455 end subroutine pwmax3
1456
1458 subroutine cpwmax2(a, b, n)
1459 integer, intent(in) :: n
1460 real(kind=rp), dimension(n), intent(inout) :: a
1461 real(kind=rp), intent(in) :: b
1462 integer :: i
1463
1464 do i = 1, n
1465 a(i) = max(a(i), b)
1466 end do
1467 end subroutine cpwmax2
1468
1470 subroutine cpwmax3(a, b, c, n)
1471 integer, intent(in) :: n
1472 real(kind=rp), dimension(n), intent(inout) :: a
1473 real(kind=rp), dimension(n), intent(in) :: b
1474 real(kind=rp), intent(in) :: c
1475 integer :: i
1476
1477 do i = 1, n
1478 a(i) = max(b(i), c)
1479 end do
1480 end subroutine cpwmax3
1481
1483 subroutine pwmin2(a, b, n)
1484 integer, intent(in) :: n
1485 real(kind=rp), dimension(n), intent(inout) :: a
1486 real(kind=rp), dimension(n), intent(in) :: b
1487 integer :: i
1488
1489 do i = 1, n
1490 a(i) = min(a(i), b(i))
1491 end do
1492 end subroutine pwmin2
1493
1495 subroutine pwmin3(a, b, c, n)
1496 integer, intent(in) :: n
1497 real(kind=rp), dimension(n), intent(inout) :: a
1498 real(kind=rp), dimension(n), intent(in) :: b, c
1499 integer :: i
1500
1501 do i = 1, n
1502 a(i) = min(b(i), c(i))
1503 end do
1504 end subroutine pwmin3
1505
1507 subroutine cpwmin2(a, b, n)
1508 integer, intent(in) :: n
1509 real(kind=rp), dimension(n), intent(inout) :: a
1510 real(kind=rp), intent(in) :: b
1511 integer :: i
1512
1513 do i = 1, n
1514 a(i) = min(a(i), b)
1515 end do
1516 end subroutine cpwmin2
1517
1519 subroutine cpwmin3(a, b, c, n)
1520 integer, intent(in) :: n
1521 real(kind=rp), dimension(n), intent(inout) :: a
1522 real(kind=rp), dimension(n), intent(in) :: b
1523 real(kind=rp), intent(in) :: c
1524 integer :: i
1525
1526 do i = 1, n
1527 a(i) = min(b(i), c)
1528 end do
1529 end subroutine cpwmin3
1530
1531 ! M33INV and M44INV by David G. Simpson pure function version from
1532 ! https://fortranwiki.org/fortran/show/Matrix+inversion
1533 ! Invert 3x3 matrix
1534 function matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33) &
1535 result(b)
1536 real(kind=rp), intent(in) :: a11, a12, a13, a21, a22, a23, a31, a32, a33
1537 real(xp) :: a(3,3) !! Matrix
1538 real(rp) :: b(3,3) !! Inverse matrix
1539 a(1,1) = a11
1540 a(1,2) = a12
1541 a(1,3) = a13
1542 a(2,1) = a21
1543 a(2,2) = a22
1544 a(2,3) = a23
1545 a(3,1) = a31
1546 a(3,2) = a32
1547 a(3,3) = a33
1548 b = matinv3(a)
1549 end function matinv39
1550
1555 function matinv3(A) result(B)
1556 !! Performs a direct calculation of the inverse of a 3×3 matrix.
1557 real(kind=xp), intent(in) :: a(3,3) !! Matrix
1558 real(kind=xp) :: b(3,3) !! Inverse matrix
1559 real(kind=xp) :: detinv
1560
1561 ! Calculate the inverse determinant of the matrix
1562 ! first index x,y,z, second r, s, t
1563 detinv = 1.0_xp / real(a(1,1)*a(2,2)*a(3,3) - a(1,1)*a(2,3)*a(3,2) &
1564 - a(1,2)*a(2,1)*a(3,3) + a(1,2)*a(2,3)*a(3,1)&
1565 + a(1,3)*a(2,1)*a(3,2) - a(1,3)*a(2,2)*a(3,1), xp)
1566 ! Calculate the inverse of the matrix
1567 ! first index r, s, t, second x, y, z
1568 b(1,1) = +detinv * (a(2,2)*a(3,3) - a(2,3)*a(3,2))
1569 b(2,1) = -detinv * (a(2,1)*a(3,3) - a(2,3)*a(3,1))
1570 b(3,1) = +detinv * (a(2,1)*a(3,2) - a(2,2)*a(3,1))
1571 b(1,2) = -detinv * (a(1,2)*a(3,3) - a(1,3)*a(3,2))
1572 b(2,2) = +detinv * (a(1,1)*a(3,3) - a(1,3)*a(3,1))
1573 b(3,2) = -detinv * (a(1,1)*a(3,2) - a(1,2)*a(3,1))
1574 b(1,3) = +detinv * (a(1,2)*a(2,3) - a(1,3)*a(2,2))
1575 b(2,3) = -detinv * (a(1,1)*a(2,3) - a(1,3)*a(2,1))
1576 b(3,3) = +detinv * (a(1,1)*a(2,2) - a(1,2)*a(2,1))
1577 end function matinv3
1578
1581 function math_stepf(x) result(val)
1582 real(kind=rp), intent(in) :: x
1583 real(kind=rp) :: val
1584 real(kind=rp), parameter :: xdmin = 0.0001_rp
1585 real(kind=rp), parameter :: xdmax = 0.9999_rp
1586 real(kind=rp) :: g
1587
1588 if (x <= xdmin) then
1589 ! Below the lower bound, the function is 0
1590 val = 0.0_rp
1591 else if (x >= xdmax) then
1592 ! Above the upper bound, the function is 1
1593 val = 1.0_rp
1594 else
1595 ! g(x) = 1/(x-1) + 1/x
1596 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1597
1598 ! The sigmoid: S(x) = 1 / (1 + exp(g))
1599 val = 1.0_rp / (1.0_rp + exp(g))
1600 end if
1601 end function math_stepf
1602
1604 function math_dstepf(x) result(val)
1605 real(kind=rp), intent(in) :: x
1606 real(kind=rp) :: val
1607 real(kind=rp), parameter :: xdmin = 0.0001_rp
1608 real(kind=rp), parameter :: xdmax = 0.9999_rp
1609 real(kind=rp) :: arg, g, dg, s_val
1610
1611 if (x <= xdmin .or. x >= xdmax) then
1612 val = 0.0_rp
1613 else
1614 ! The step function is S(x) = 1 / (1 + exp(g(x)))
1615 ! where g(x) = 1/(x-1) + 1/x
1616 ! S'(x) = -S(x) * (1 - S(x)) * g'(x)
1617
1618 g = (1.0_rp / (x - 1.0_rp)) + (1.0_rp / x)
1619
1620 ! Derivative of g(x)
1621 dg = -(1.0_rp / ((x - 1.0_rp)**2)) - (1.0_rp / (x**2))
1622
1623 ! Recompute S(x) locally
1624 s_val = 1.0_rp / (1.0_rp + exp(g))
1625
1626 val = -s_val * (1.0_rp - s_val) * dg
1627 end if
1628 end function math_dstepf
1629
1630end module math
__inline__ __device__ void nonlinear_index(const int idx, const int lx, int *index)
Definition bc_utils.h:44
double real
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:52
Object for handling masks in Neko.
Definition mask.f90:34
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:447
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:459
subroutine, public row_zero(a, m, n, e)
Sets row e to 0 in matrix a.
Definition math.f90:230
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:890
real(kind=rp) function, public vlsc2(u, v, n)
Compute multiplication sum .
Definition math.f90:762
pure logical function, public dabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:135
real(kind=rp), parameter, public pi
Definition math.f90:76
pure logical function qabscmp(x, y, tol)
Return double precision absolute comparison .
Definition math.f90:150
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1117
subroutine, public ascol5(a, b, c, d, e, n)
Returns .
Definition math.f90:1052
subroutine, public addcol3s2(a, b, c, s, n)
Returns .
Definition math.f90:1037
subroutine, public masked_scatter_copy(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:417
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:689
subroutine, public cadd2(a, b, s, n)
Add a scalar to vector .
Definition math.f90:510
subroutine, public face_masked_gather_copy_0(a, b, mask, facet, lx, ly, lz, n_mask)
Gather values from a face-local SEM field to a reduced contiguous vector.
Definition math.f90:338
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1536
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:498
subroutine, public masked_copy(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:292
subroutine reorddp(b, ind, n)
reorder double precision array - inverse of swap
Definition math.f90:1337
subroutine, public addsqr2s2(a, b, c1, n)
Returns .
Definition math.f90:876
subroutine, public cwrap(a, min_val, max_val, n)
Wrap value around a range [min, max)
Definition math.f90:535
real(kind=rp) function, public glsc4(a, b, c, d, n)
Definition math.f90:1136
subroutine, public cdiv2(a, b, c, n)
Division of constant c by elements of a .
Definition math.f90:485
real(kind=rp) function, public math_stepf(x)
Smooth step function S(x) Returns 0 for x <= 0, 1 for x >= 1, and smooth transition in between.
Definition math.f90:1582
subroutine swapdp(b, ind, n)
sort double precision array acording to ind vector
Definition math.f90:1297
subroutine flipvi4(b, ind, n)
Flip single integer vector b and ind.
Definition math.f90:1400
subroutine, public add2s1(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
Definition math.f90:847
subroutine, public cpwmin2(a, b, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1508
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1098
subroutine, public masked_scatter_copy_0(a, b, mask, n, n_mask)
Scatter a contigous vector to masked positions in a target array .
Definition math.f90:394
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:931
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:241
subroutine flipvdp(b, ind, n)
Flip double precision vector b and ind.
Definition math.f90:1377
subroutine, public cpwmin3(a, b, c, n)
Point-wise minimum of scalar and vector .
Definition math.f90:1520
subroutine, public pwmax3(a, b, c, n)
Point-wise maximum of two vectors .
Definition math.f90:1447
subroutine, public masked_gather_copy(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:371
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1083
subroutine, public add3(a, b, c, n)
Vector addition .
Definition math.f90:789
subroutine swapi4(b, ind, n)
sort single integer array acording to ind vector
Definition math.f90:1317
integer function, public glimin(a, n)
Min of an integer vector of length n.
Definition math.f90:611
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:549
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:831
subroutine, public addcol4(a, b, c, d, n)
Returns .
Definition math.f90:1022
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:776
subroutine, public cfill(a, c, n)
Set all elements to a constant c .
Definition math.f90:523
subroutine, public absval(a, n)
Take the absolute value of an array.
Definition math.f90:1422
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:676
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:945
real(kind=xp) function, dimension(3, 3), public matinv3(a)
Performs a direct calculation of the inverse of a 3×3 matrix. M33INV and M44INV by David G....
Definition math.f90:1556
subroutine, public pwmax2(a, b, n)
Point-wise maximum of two vectors .
Definition math.f90:1435
subroutine, public pwmin2(a, b, n)
Point-wise minimum of two vectors .
Definition math.f90:1484
subroutine, public masked_gather_copy_0(a, b, mask, n, n_mask)
Gather a masked vector to reduced contigous vector .
Definition math.f90:315
subroutine, public subcol4(a, b, c, d, n)
Returns .
Definition math.f90:993
subroutine sorti4(a, ind, n)
Heap Sort for single integer arrays.
Definition math.f90:1241
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:1008
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:664
subroutine, public cdiv(a, c, n)
Division of constant c by elements of a .
Definition math.f90:473
real(kind=rp), parameter, public neko_m_ln2
Definition math.f90:73
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:629
subroutine, public cpwmax3(a, b, c, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1471
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:904
subroutine, public izero(a, n)
Zero an integer vector.
Definition math.f90:219
real(kind=rp) function, public glmax(a, n)
Max of a vector of length n.
Definition math.f90:566
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:252
subroutine, public add4s3(a, b, c, d, c1, c2, c3, n)
Returns .
Definition math.f90:960
subroutine, public add4(a, b, c, d, n)
Vector addition .
Definition math.f90:803
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:917
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:976
real(kind=rp) function, public math_dstepf(x)
Derivative of math_stepf with respect to x: d(stepf)/dx.
Definition math.f90:1605
pure logical function drelcmp(x, y, eps)
Return double precision relative comparison .
Definition math.f90:179
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:70
subroutine, public vdot3(dot, u1, u2, u3, v1, v2, v3, n)
Compute a dot product (3-d version) assuming vector components etc.
Definition math.f90:734
pure logical function, public sabscmp(x, y, tol)
Return single precision absolute comparison .
Definition math.f90:120
pure logical function qrelcmp(x, y, eps)
Return quad precision relative comparison .
Definition math.f90:194
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:208
real(kind=rp) function, public glsubnorm(a, b, n)
Returns the norm of the difference of two vectors .
Definition math.f90:1159
subroutine, public vdot2(dot, u1, u2, v1, v2, n)
Compute a dot product (2-d version) assuming vector components etc.
Definition math.f90:720
subroutine, public cpwmax2(a, b, n)
Point-wise maximum of scalar and vector .
Definition math.f90:1459
real(kind=rp) function, public vlmin(vec, n)
minimun value of a vector of length n
Definition math.f90:652
subroutine, public cfill_mask(a, c, n, mask, n_mask)
Fill a constant to a masked vector. .
Definition math.f90:433
real(kind=rp) function, public vlmax(vec, n)
maximum value of a vector of length n
Definition math.f90:641
integer function, public glimax(a, n)
Max of an integer vector of length n.
Definition math.f90:581
subroutine sortrp(a, ind, n)
Heap Sort for double precision arrays.
Definition math.f90:1183
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:818
subroutine, public pwmin3(a, b, c, n)
Point-wise minimum of two vectors .
Definition math.f90:1496
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:862
real(kind=rp) function, public glmin(a, n)
Min of a vector of length n.
Definition math.f90:596
subroutine, public masked_copy_0(a, b, mask, n, n_mask)
Copy a masked vector .
Definition math.f90:271
subroutine, public vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n)
Compute a cross product assuming vector components etc.
Definition math.f90:703
pure logical function srelcmp(x, y, eps)
Return single precision relative comparison .
Definition math.f90:165
real(kind=rp) function, public vlsc3(u, v, w, n)
Compute multiplication sum .
Definition math.f90:748
subroutine reordi4(b, ind, n)
reorder single integer array - inverse of swap
Definition math.f90:1357
subroutine, public p_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1068
integer, parameter, public qp
Definition num_types.f90:10
integer, parameter, public i4
Definition num_types.f90:6
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public sp
Definition num_types.f90:8
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35
#define max(a, b)
Definition tensor.cu:40