Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
profiler.F90
Go to the documentation of this file.
1! Copyright (c) 2022-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use nvtx
38 use roctx
39 use craypat
41 use, intrinsic :: iso_c_binding
42 !$ use omp_lib
43 implicit none
44 private
45
48
49contains
50
52 subroutine profiler_start
53 if ((neko_bcknd_cuda .eq. 1)) then
54#if defined(HAVE_NVTX)
56#endif
57 else
58#ifdef CRAYPAT
59 call craypat_record_start
60#endif
61 end if
62 end subroutine profiler_start
63
65 subroutine profiler_stop
66 if ((neko_bcknd_cuda .eq. 1)) then
67#if defined(HAVE_NVTX)
69#endif
70 else
71#ifdef CRAYPAT
72 call craypat_record_stop
73#endif
74 end if
75 end subroutine profiler_stop
76
78 subroutine profiler_start_region(name, region_id)
79 character(kind=c_char,len=*) :: name
80 integer, optional :: region_id
81 logical :: in_parallel
82
83#ifdef HAVE_NVTX
84 if (present(region_id)) then
85 call nvtxstartrange(name, region_id)
86 else
87 call nvtxstartrange(name)
88 end if
89#elif HAVE_ROCTX
90 call roctxstartrange(name)
91#elif CRAYPAT
92 if (present(region_id)) then
93 call craypat_region_begin(name, region_id)
94 end if
95#endif
96
97#if defined(__FUJITSU) && defined(NEKO_FJPROF)
98 if (present(region_id)) then
99 call fapp_start(trim(name), region_id, 0)
100 end if
101#endif
102
103 ! Skip runtime stats inside OMP parallel regions: neko_rt_stats uses a
104 ! single shared LIFO stack that is not thread-safe. Concurrent pushes from
105 ! different threads corrupt the stack order and cause region mismatches.
106 in_parallel = .false.
107 !$ in_parallel = omp_in_parallel()
108 if (.not. in_parallel) then
109 call neko_rt_stats%start_region(name, region_id)
110 end if
111
112 end subroutine profiler_start_region
113
115 subroutine profiler_end_region(name, region_id)
116 character(kind=c_char, len=*), optional :: name
117 integer, optional :: region_id
118 logical :: in_parallel
119
120#ifdef HAVE_NVTX
121 call nvtxrangepop
122#elif HAVE_ROCTX
123 call roctxrangepop
124#elif CRAYPAT
125 if (present(region_id)) then
126 call craypat_region_end(region_id)
127 end if
128#endif
129
130#if defined(__FUJITSU) && defined(NEKO_FJPROF)
131 if (present(name) .and. present(region_id)) then
132 call fapp_stop(trim(name), region_id, 0)
133 end if
134#endif
135
136 in_parallel = .false.
137 !$ in_parallel = omp_in_parallel()
138 if (.not. in_parallel) then
139 call neko_rt_stats%end_region(name, region_id)
140 end if
141
142 end subroutine profiler_end_region
143
144end module profiler
Interface to CrayPat F77 API.
Definition craypat.F90:34
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1441
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1432
Build configurations.
integer, parameter neko_bcknd_cuda
Interface to NVTX Based on https://github.com/maxcuda/NVTX_example.
Definition nvtx.F90:3
Profiling interface.
Definition profiler.F90:34
subroutine, public profiler_start
Start profiling.
Definition profiler.F90:53
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
Definition profiler.F90:79
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Definition profiler.F90:116
subroutine, public profiler_stop
Stop profiling.
Definition profiler.F90:66
Interfxace to ROCTX.
Definition roctx.F90:2
Runtime statistics.
type(runtime_stats_t), public neko_rt_stats