Neko  0.9.0
A portable framework for high-order spectral element flow simulations
runtime_statistics.f90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
35  use logger, only : neko_log, log_size
36  use stack, only : stack_r8_t, stack_i4r8t2_t
37  use tuple, only : tuple_i4r8_t
38  use num_types, only : dp
40  use json_module, only : json_file
41  use file, only : file_t
42  use matrix, only : matrix_t
43  use comm
44  use mpi_f08
45  implicit none
46  private
47 
48  integer :: rt_stats_max_regions = 50
49 
52  character(len=19), allocatable :: rt_stats_id(:)
54  type(stack_r8_t), allocatable :: elapsed_time_(:)
56  type(stack_i4r8t2_t) :: region_timestamp_
57  logical :: enabled_
58  logical :: output_profile_
59  contains
60  procedure, pass(this) :: init => runtime_stats_init
61  procedure, pass(this) :: free => runtime_stats_free
62  procedure, pass(this) :: start_region => runtime_stats_start_region
63  procedure, pass(this) :: end_region => runtime_stats_end_region
64  procedure, pass(this) :: report => runtime_stats_report
65  end type runtime_stats_t
66 
67  type(runtime_stats_t), public :: neko_rt_stats
68 
69 contains
70 
72  subroutine runtime_stats_init(this, params)
73  class(runtime_stats_t), intent(inout) :: this
74  type(json_file), intent(inout) :: params
75  integer :: i
76 
77  call this%free()
78 
79  call json_get_or_default(params, 'case.runtime_statistics.enabled', &
80  this%enabled_, .false.)
81  call json_get_or_default(params, &
82  'case.runtime_statistics.output_profile', &
83  this%output_profile_, .false.)
84 
85  if (this%enabled_) then
86 
87  allocate(this%rt_stats_id(rt_stats_max_regions))
88 
89  this%rt_stats_id = ''
90 
91  allocate(this%elapsed_time_(rt_stats_max_regions))
92  do i = 1, rt_stats_max_regions
93  call this%elapsed_time_(i)%init()
94  end do
95 
96  call this%region_timestamp_%init(100)
97 
98  end if
99 
100  end subroutine runtime_stats_init
101 
103  subroutine runtime_stats_free(this)
104  class(runtime_stats_t), intent(inout) :: this
105  integer :: i
106 
107  if (allocated(this%rt_stats_id)) then
108  deallocate(this%rt_stats_id)
109  end if
110 
111  if (allocated(this%elapsed_time_)) then
112  do i = 1, size(this%elapsed_time_)
113  call this%elapsed_time_(i)%free()
114  end do
115  deallocate(this%elapsed_time_)
116  end if
117 
118  call this%region_timestamp_%free()
119 
120  end subroutine runtime_stats_free
121 
124  subroutine runtime_stats_start_region(this, name, region_id)
125  class(runtime_stats_t), intent(inout) :: this
126  character(len=*) :: name
127  integer, intent(in) :: region_id
128  type(tuple_i4r8_t) :: region_data
129 
130  if (.not. this%enabled_) then
131  return
132  end if
133 
134  if (region_id .gt. 0 .and. region_id .le. rt_stats_max_regions) then
135  if (len_trim(this%rt_stats_id(region_id)) .eq. 0) then
136  this%rt_stats_id(region_id) = trim(name)
137  else
138  if (trim(this%rt_stats_id(region_id)) .ne. trim(name)) then
139  call neko_error('Profile region renamed')
140  end if
141  end if
142  region_data%x = region_id
143  region_data%y = mpi_wtime()
144  call this%region_timestamp_%push(region_data)
145  else
146  call neko_error('Invalid profiling region id')
147  end if
148 
149  end subroutine runtime_stats_start_region
150 
152  subroutine runtime_stats_end_region(this, name, region_id)
153  class(runtime_stats_t), intent(inout) :: this
154  character(len=*) :: name
155  integer, intent(in) :: region_id
156  real(kind=dp) :: end_time, elapsed_time
157  type(tuple_i4r8_t) :: region_data
158 
159  if (.not. this%enabled_) then
160  return
161  end if
162 
163  end_time = mpi_wtime()
164 
165  if (trim(this%rt_stats_id(region_id)) .ne. trim(name)) then
166  call neko_error('Invalid profiler region closed (' // name // ', &
167  &expected: ' // trim(this%rt_stats_id(region_id)) // ')')
168  end if
169  region_data = this%region_timestamp_%pop()
170 
171  if (region_data%x .gt. 0) then
172  elapsed_time = end_time - region_data%y
173  call this%elapsed_time_(region_data%x)%push(elapsed_time)
174  end if
175 
176  end subroutine runtime_stats_end_region
177 
179  subroutine runtime_stats_report(this)
180  class(runtime_stats_t), intent(inout) :: this
181  character(len=LOG_SIZE) :: log_buf
182  character(len=1250) :: hdr
183  real(kind=dp) :: avg, std, sem, total
184  integer :: i, nsamples, ncols, nrows, col_idx
185  type(matrix_t) :: profile_data
186 
187  if (.not. this%enabled_) then
188  return
189  end if
190 
191  call neko_log%section('Runtime statistics')
192  call neko_log%newline()
193  write(log_buf, '(A,A,1x,A,1x,A)') ' ',&
194  ' Total time ',' Avg. time ',' Range +/-'
195  call neko_log%message(log_buf)
196  write(log_buf, '(A)') &
197  '--------------------------------------------------------------------'
198  call neko_log%message(log_buf)
199 
200  ncols = 0
201  nrows = 0
202  hdr = ''
203  do i = 1, size(this%elapsed_time_)
204  if (len_trim(this%rt_stats_id(i)) .gt. 0) then
205  nsamples = this%elapsed_time_(i)%size()
206  ncols = ncols + 1
207  hdr = trim(hdr) // trim(this%rt_stats_id(i)) // ', '
208  nrows = max(nrows, nsamples)
209  if (nsamples .gt. 0) then
210  select type (region_sample => this%elapsed_time_(i)%data)
211  type is (double precision)
212  total = sum(region_sample(1:nsamples))
213  call mpi_allreduce(mpi_in_place, total, 1, &
214  mpi_double_precision, mpi_sum, neko_comm)
215  total = total / pe_size
216  avg = total / nsamples
217  std = (total - avg)**2 / nsamples
218  sem = std /sqrt(real(nsamples, dp))
219  end select
220  write(log_buf, '(A, E15.7,1x,1x,E15.7,1x,1x,E15.7)') &
221  this%rt_stats_id(i), total, avg, 2.5758_dp * sem
222  call neko_log%message(log_buf)
223  end if
224  end if
225  end do
226 
227  call neko_log%newline()
228 
229  if (this%output_profile_) then
230  col_idx = 0
231  call profile_data%init(nrows, ncols)
232  do i = 1, size(this%elapsed_time_)
233  if (len_trim(this%rt_stats_id(i)) .gt. 0) then
234  nsamples = this%elapsed_time_(i)%size()
235  col_idx = col_idx + 1
236  if (nsamples .gt. 0) then
237  select type (region_sample => this%elapsed_time_(i)%data)
238  type is (double precision)
239  profile_data%x(1:nsamples,col_idx) = &
240  region_sample(1:nsamples)
241  call mpi_allreduce(mpi_in_place, &
242  profile_data%x(1:nsamples,col_idx), nsamples, &
243  mpi_double_precision, mpi_sum, neko_comm)
244  profile_data%x(1:nsamples, col_idx) = &
245  profile_data%x(1:nsamples, col_idx) / pe_size
246  end select
247  end if
248  end if
249  end do
250 
251  if (pe_rank .eq. 0) then
252  block
253  type(file_t) :: profile_file
254  profile_file = file_t('profile.csv')
255  call profile_file%set_header(hdr)
256  call profile_file%write(profile_data)
257  end block
258  end if
259  end if
260  call neko_log%end_section()
261 
262  call profile_data%free()
263 
264  end subroutine runtime_stats_report
265 
266 end module runtime_stats
double real
Definition: device_config.h:12
Retrieves a parameter by name or assigns a provided default value. In the latter case also adds the m...
Definition: json_utils.f90:54
Retrieves a parameter by name or throws an error.
Definition: json_utils.f90:45
Definition: comm.F90:1
integer pe_rank
MPI rank.
Definition: comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
integer pe_size
MPI size of communicator.
Definition: comm.F90:31
Module for file I/O operations.
Definition: file.f90:34
Utilities for retrieving parameters from the case files.
Definition: json_utils.f90:34
Logging routines.
Definition: log.f90:34
type(log_t), public neko_log
Global log stream.
Definition: log.f90:65
integer, parameter, public log_size
Definition: log.f90:42
Defines a matrix.
Definition: matrix.f90:34
integer, parameter, public dp
Definition: num_types.f90:9
Runtime statistics.
subroutine runtime_stats_init(this, params)
Initialise runtime statistics.
subroutine runtime_stats_start_region(this, name, region_id)
Start measuring time for the region named name with id region_id.
subroutine runtime_stats_report(this)
Report runtime statistics for all recorded regions.
subroutine runtime_stats_free(this)
Destroy runtime statistics.
type(runtime_stats_t), public neko_rt_stats
integer rt_stats_max_regions
subroutine runtime_stats_end_region(this, name, region_id)
Compute elapsed time for the current region.
Implements a dynamic stack ADT.
Definition: stack.f90:35
Implements a n-tuple.
Definition: tuple.f90:34
A wrapper around a polymorphic generic_file_t that handles its init. This is essentially a factory fo...
Definition: file.f90:54
Mixed integer-double precision 2-tuple based stack.
Definition: stack.f90:98
Double precision based stack.
Definition: stack.f90:77
Mixed integer ( ) double precision ( ) 2-tuple .
Definition: tuple.f90:87
#define max(a, b)
Definition: tensor.cu:40