1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "search_list.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym
*, double);
43 static int cmp_time (const PTR
, const PTR
);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp
);
48 static histogram
*find_histogram (bfd_vma lowpc
, bfd_vma highpc
);
49 static histogram
*find_histogram_for_pc (bfd_vma pc
);
51 histogram
* histograms
;
52 unsigned num_histograms
;
54 static char hist_dimension
[16] = "seconds";
55 static char hist_dimension_abbrev
= 's';
57 static double accum_time
; /* Accumulated time so far for print_line(). */
58 static double total_time
; /* Total time for all routines. */
60 /* Table of SI prefixes for powers of 10 (used to automatically
61 scale some of the values in the flat profile). */
69 { 'T', 1e-12 }, /* tera */
70 { 'G', 1e-09 }, /* giga */
71 { 'M', 1e-06 }, /* mega */
72 { 'K', 1e-03 }, /* kilo */
74 { 'm', 1e+03 }, /* milli */
75 { 'u', 1e+06 }, /* micro */
76 { 'n', 1e+09 }, /* nano */
77 { 'p', 1e+12 }, /* pico */
78 { 'f', 1e+15 }, /* femto */
79 { 'a', 1e+18 } /* ato */
82 /* Reads just the header part of histogram record into
83 *RECORD from IFP. FILENAME is the name of IFP and
84 is provided for formatting error messages only.
86 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
87 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
88 that the new histogram is compatible with already-set values
89 of those variables and emits an error if that's not so. */
91 read_histogram_header (histogram
*record
,
92 FILE *ifp
, const char *filename
,
95 unsigned int profrate
;
96 char n_hist_dimension
[15];
97 char n_hist_dimension_abbrev
;
100 if (gmon_io_read_vma (ifp
, &record
->lowpc
)
101 || gmon_io_read_vma (ifp
, &record
->highpc
)
102 || gmon_io_read_32 (ifp
, &record
->num_bins
)
103 || gmon_io_read_32 (ifp
, &profrate
)
104 || gmon_io_read (ifp
, n_hist_dimension
, 15)
105 || gmon_io_read (ifp
, &n_hist_dimension_abbrev
, 1))
107 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
113 n_hist_scale
= (double)((record
->highpc
- record
->lowpc
) / sizeof (UNIT
))
118 /* We don't try to veryfy profrate is the same for all histogram
119 records. If we have two histogram records for the same
120 address range and profiling samples is done as often
121 as possible as opposed on timer, then the actual profrate will
122 be slightly different. Most of the time the difference does not
123 matter and insisting that profiling rate is exactly the same
124 will only create inconvenient. */
126 memcpy (hist_dimension
, n_hist_dimension
, 15);
127 hist_dimension_abbrev
= n_hist_dimension_abbrev
;
128 hist_scale
= n_hist_scale
;
132 if (strncmp (n_hist_dimension
, hist_dimension
, 15) != 0)
135 _("%s: dimension unit changed between histogram records\n"
138 whoami
, whoami
, hist_dimension
, whoami
, n_hist_dimension
);
142 if (n_hist_dimension_abbrev
!= hist_dimension_abbrev
)
145 _("%s: dimension abbreviation changed between histogram records\n"
148 whoami
, whoami
, hist_dimension_abbrev
, whoami
, n_hist_dimension_abbrev
);
152 /* The only reason we require the same scale for histograms is that
153 there's code (notably printing code), that prints units,
154 and it would be very confusing to have one unit mean different
155 things for different functions. */
156 if (fabs (hist_scale
- n_hist_scale
) > 0.000001)
159 _("%s: different scales in histogram records"),
166 /* Read the histogram from file IFP. FILENAME is the name of IFP and
167 is provided for formatting error messages only. */
170 hist_read_rec (FILE * ifp
, const char *filename
)
172 bfd_vma lowpc
, highpc
;
174 histogram
*record
, *existing_record
;
177 /* 1. Read the header and see if there's existing record for the
178 same address range and that there are no overlapping records. */
179 read_histogram_header (&n_record
, ifp
, filename
, num_histograms
== 0);
181 existing_record
= find_histogram (n_record
.lowpc
, n_record
.highpc
);
184 record
= existing_record
;
188 /* If this record overlaps, but does not completely match an existing
189 record, it's an error. */
190 lowpc
= n_record
.lowpc
;
191 highpc
= n_record
.highpc
;
192 hist_clip_symbol_address (&lowpc
, &highpc
);
196 _("%s: overlapping histogram records\n"),
201 /* This is new record. Add it to global array and allocate space for
203 histograms
= (struct histogram
*)
204 xrealloc (histograms
, sizeof (histogram
) * (num_histograms
+ 1));
205 memcpy (histograms
+ num_histograms
,
206 &n_record
, sizeof (histogram
));
207 record
= &histograms
[num_histograms
];
210 record
->sample
= (int *) xmalloc (record
->num_bins
211 * sizeof (record
->sample
[0]));
212 memset (record
->sample
, 0, record
->num_bins
* sizeof (record
->sample
[0]));
215 /* 2. We have either a new record (with zeroed histogram data), or an existing
216 record with some data in the histogram already. Read new data into the
217 record, adding hit counts. */
220 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
221 (unsigned long) record
->lowpc
, (unsigned long) record
->highpc
,
224 for (i
= 0; i
< record
->num_bins
; ++i
)
227 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
230 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
231 whoami
, filename
, i
, record
->num_bins
);
234 record
->sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
236 printf ("[hist_read_rec] 0x%lx: %u\n",
237 (unsigned long) (record
->lowpc
238 + i
* (record
->highpc
- record
->lowpc
)
245 /* Write all execution histograms file OFP. FILENAME is the name
246 of OFP and is provided for formatting error-messages only. */
249 hist_write_hist (FILE * ofp
, const char *filename
)
254 for (r
= 0; r
< num_histograms
; ++r
)
256 histogram
*record
= &histograms
[r
];
260 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
261 || gmon_io_write_vma (ofp
, record
->lowpc
)
262 || gmon_io_write_vma (ofp
, record
->highpc
)
263 || gmon_io_write_32 (ofp
, record
->num_bins
)
264 || gmon_io_write_32 (ofp
, hz
)
265 || gmon_io_write (ofp
, hist_dimension
, 15)
266 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
272 for (i
= 0; i
< record
->num_bins
; ++i
)
274 bfd_put_16 (core_bfd
, (bfd_vma
) record
->sample
[i
], (bfd_byte
*) &count
[0]);
276 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
285 /* Calculate scaled entry point addresses (to save time in
286 hist_assign_samples), and, on architectures that have procedure
287 entry masks at the start of a function, possibly push the scaled
288 entry points over the procedure entry mask, if it turns out that
289 the entry point is in one bin and the code for a routine is in the
293 scale_and_align_entries ()
296 bfd_vma bin_of_entry
;
299 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
301 histogram
*r
= find_histogram_for_pc (sym
->addr
);
303 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
307 bin_of_entry
= (sym
->hist
.scaled_addr
- r
->lowpc
) / hist_scale
;
308 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- r
->lowpc
)
310 if (bin_of_entry
< bin_of_code
)
313 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
314 (unsigned long) sym
->hist
.scaled_addr
,
315 (unsigned long) (sym
->hist
.scaled_addr
317 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
324 /* Assign samples to the symbol to which they belong.
326 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
327 which may overlap one more symbol address ranges. If a symbol
328 overlaps with the bin's address range by O percent, then O percent
329 of the bin's count is credited to that symbol.
331 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
332 with respect to the symbol's address range [SYM_LOW_PC,
333 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
334 the distance (in UNITs) between the arrows, the fraction of the
335 sample that is to be credited to the symbol which starts at
338 sym_low_pc sym_high_pc
342 +-----------------------------------------------+
344 | ->| |<- ->| |<- ->| |<- |
346 +---------+ +---------+ +---------+
350 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
352 For the VAX we assert that samples will never fall in the first two
353 bytes of any routine, since that is the entry mask, thus we call
354 scale_and_align_entries() to adjust the entry points if the entry
355 mask falls in one bin but the code for the routine doesn't start
356 until the next bin. In conjunction with the alignment of routine
357 addresses, this should allow us to have only one sample for every
358 four bytes of text space and never have any overlap (the two end
362 hist_assign_samples_1 (histogram
*r
)
364 bfd_vma bin_low_pc
, bin_high_pc
;
365 bfd_vma sym_low_pc
, sym_high_pc
;
366 bfd_vma overlap
, addr
;
367 unsigned int bin_count
;
369 double count_time
, credit
;
371 bfd_vma lowpc
= r
->lowpc
/ sizeof (UNIT
);
373 /* Iterate over all sample bins. */
374 for (i
= 0, j
= 1; i
< r
->num_bins
; ++i
)
376 bin_count
= r
->sample
[i
];
380 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
381 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
382 count_time
= bin_count
;
386 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
387 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
388 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
390 total_time
+= count_time
;
392 /* Credit all symbols that are covered by bin I. */
393 for (j
= j
- 1; j
< symtab
.len
; ++j
)
395 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
396 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
398 /* If high end of bin is below entry address,
400 if (bin_high_pc
< sym_low_pc
)
402 /* PR gprof/13325: Make sure that j does not go below 1. */
408 /* If low end of bin is above high end of symbol,
409 go for next symbol. */
410 if (bin_low_pc
>= sym_high_pc
)
414 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
419 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
420 (unsigned long) symtab
.base
[j
].addr
,
421 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
422 symtab
.base
[j
].name
, overlap
* count_time
/ hist_scale
,
425 addr
= symtab
.base
[j
].addr
;
426 credit
= overlap
* count_time
/ hist_scale
;
428 /* Credit symbol if it appears in INCL_FLAT or that
429 table is empty and it does not appear it in
431 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
432 || (syms
[INCL_FLAT
].len
== 0
433 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
435 symtab
.base
[j
].hist
.time
+= credit
;
439 total_time
-= credit
;
445 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
449 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
451 hist_assign_samples ()
455 scale_and_align_entries ();
457 for (i
= 0; i
< num_histograms
; ++i
)
458 hist_assign_samples_1 (&histograms
[i
]);
462 /* Print header for flag histogram profile. */
465 print_header (int prefix
)
469 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
471 if (bsd_style_output
)
473 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
474 (long) hist_scale
* (long) sizeof (UNIT
));
475 if (total_time
> 0.0)
477 printf (_(" for %.2f%% of %.2f %s\n\n"),
478 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
483 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
486 if (total_time
<= 0.0)
488 printf (_(" no time accumulated\n\n"));
490 /* This doesn't hurt since all the numerators will be zero. */
494 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
495 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
497 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
498 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
504 print_line (Sym
*sym
, double scale
)
506 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
509 accum_time
+= sym
->hist
.time
;
511 if (bsd_style_output
)
512 printf ("%5.1f %10.2f %8.2f",
513 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
514 accum_time
/ hz
, sym
->hist
.time
/ hz
);
516 printf ("%6.2f %9.2f %8.2f",
517 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
518 accum_time
/ hz
, sym
->hist
.time
/ hz
);
520 if (sym
->ncalls
!= 0)
521 printf (" %8lu %8.2f %8.2f ",
522 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
523 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
525 printf (" %8.8s %8.8s %8.8s ", "", "", "");
527 if (bsd_style_output
)
530 print_name_only (sym
);
536 /* Compare LP and RP. The primary comparison key is execution time,
537 the secondary is number of invocation, and the tertiary is the
538 lexicographic order of the function names. */
541 cmp_time (const PTR lp
, const PTR rp
)
543 const Sym
*left
= *(const Sym
**) lp
;
544 const Sym
*right
= *(const Sym
**) rp
;
547 time_diff
= right
->hist
.time
- left
->hist
.time
;
555 if (right
->ncalls
> left
->ncalls
)
558 if (right
->ncalls
< left
->ncalls
)
561 return strcmp (left
->name
, right
->name
);
565 /* Print the flat histogram profile. */
570 Sym
**time_sorted_syms
, *top_dog
, *sym
;
571 unsigned int sym_index
;
577 first_output
= FALSE
;
583 if (bsd_style_output
)
585 if (print_descriptions
)
587 printf (_("\n\n\nflat profile:\n"));
593 printf (_("Flat profile:\n"));
596 /* Sort the symbol table by time (call-count and name as secondary
597 and tertiary keys). */
598 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
600 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
601 time_sorted_syms
[sym_index
] = &symtab
.base
[sym_index
];
603 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
605 if (bsd_style_output
)
607 log_scale
= 5; /* Milli-seconds is BSD-default. */
611 /* Search for symbol with highest per-call
612 execution time and scale accordingly. */
617 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
619 sym
= time_sorted_syms
[sym_index
];
621 if (sym
->ncalls
!= 0)
625 call_time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
627 if (call_time
> top_time
)
630 top_time
= call_time
;
635 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
639 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
641 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
643 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
649 /* For now, the dimension is always seconds. In the future, we
650 may also want to support other (pseudo-)dimensions (such as
651 I-cache misses etc.). */
652 print_header (SItab
[log_scale
].prefix
);
654 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
656 addr
= time_sorted_syms
[sym_index
]->addr
;
658 /* Print symbol if its in INCL_FLAT table or that table
659 is empty and the symbol is not in EXCL_FLAT. */
660 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
661 || (syms
[INCL_FLAT
].len
== 0
662 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
663 print_line (time_sorted_syms
[sym_index
], SItab
[log_scale
].scale
);
666 free (time_sorted_syms
);
668 if (print_descriptions
&& !bsd_style_output
)
673 hist_check_address (unsigned address
)
677 for (i
= 0; i
< num_histograms
; ++i
)
678 if (histograms
[i
].lowpc
<= address
&& address
< histograms
[i
].highpc
)
685 #define min(a,b) (((a)<(b)) ? (a) : (b))
688 #define max(a,b) (((a)>(b)) ? (a) : (b))
692 hist_clip_symbol_address (bfd_vma
*p_lowpc
, bfd_vma
*p_highpc
)
697 if (num_histograms
== 0)
699 *p_highpc
= *p_lowpc
;
703 for (i
= 0; i
< num_histograms
; ++i
)
705 bfd_vma common_low
, common_high
;
706 common_low
= max (histograms
[i
].lowpc
, *p_lowpc
);
707 common_high
= min (histograms
[i
].highpc
, *p_highpc
);
709 if (common_low
< common_high
)
714 _("%s: found a symbol that covers "
715 "several histogram records"),
721 *p_lowpc
= common_low
;
722 *p_highpc
= common_high
;
727 *p_highpc
= *p_lowpc
;
730 /* Find and return exising histogram record having the same lowpc and
731 highpc as passed via the parameters. Return NULL if nothing is found.
732 The return value is valid until any new histogram is read. */
734 find_histogram (bfd_vma lowpc
, bfd_vma highpc
)
737 for (i
= 0; i
< num_histograms
; ++i
)
739 if (histograms
[i
].lowpc
== lowpc
&& histograms
[i
].highpc
== highpc
)
740 return &histograms
[i
];
745 /* Given a PC, return histogram record which address range include this PC.
746 Return NULL if there's no such record. */
748 find_histogram_for_pc (bfd_vma pc
)
751 for (i
= 0; i
< num_histograms
; ++i
)
753 if (histograms
[i
].lowpc
<= pc
&& pc
< histograms
[i
].highpc
)
754 return &histograms
[i
];
This page took 0.045899 seconds and 5 git commands to generate.