2003-09-28 Andrew Cagney <cagney@redhat.com>
[deliverable/binutils-gdb.git] / gprof / hist.c
1 /* hist.c - Histogram related operations.
2
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21 \f
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "corefile.h"
28 #include "gmon_io.h"
29 #include "gmon_out.h"
30 #include "hist.h"
31 #include "sym_ids.h"
32 #include "utils.h"
33
34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
35
36 static void scale_and_align_entries PARAMS ((void));
37 static void print_header PARAMS ((int));
38 static void print_line PARAMS ((Sym *, double));
39 static int cmp_time PARAMS ((const PTR, const PTR));
40
41 /* Declarations of automatically generated functions to output blurbs. */
42 extern void flat_blurb PARAMS ((FILE * fp));
43
44 bfd_vma s_lowpc; /* Lowest address in .text. */
45 bfd_vma s_highpc = 0; /* Highest address in .text. */
46 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */
47 int hist_num_bins = 0; /* Number of histogram samples. */
48 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */
49 double hist_scale;
50 char hist_dimension[16] = "seconds";
51 char hist_dimension_abbrev = 's';
52
53 static double accum_time; /* Accumulated time so far for print_line(). */
54 static double total_time; /* Total time for all routines. */
55
56 /* Table of SI prefixes for powers of 10 (used to automatically
57 scale some of the values in the flat profile). */
58 const struct
59 {
60 char prefix;
61 double scale;
62 }
63 SItab[] =
64 {
65 { 'T', 1e-12 }, /* tera */
66 { 'G', 1e-09 }, /* giga */
67 { 'M', 1e-06 }, /* mega */
68 { 'K', 1e-03 }, /* kilo */
69 { ' ', 1e-00 },
70 { 'm', 1e+03 }, /* milli */
71 { 'u', 1e+06 }, /* micro */
72 { 'n', 1e+09 }, /* nano */
73 { 'p', 1e+12 }, /* pico */
74 { 'f', 1e+15 }, /* femto */
75 { 'a', 1e+18 } /* ato */
76 };
77
78
79 /* Read the histogram from file IFP. FILENAME is the name of IFP and
80 is provided for formatting error messages only. */
81
82 void
83 hist_read_rec (ifp, filename)
84 FILE * ifp;
85 const char *filename;
86 {
87 bfd_vma n_lowpc, n_highpc;
88 int i, ncnt, profrate;
89 UNIT count;
90
91 if (gmon_io_read_vma (ifp, &n_lowpc)
92 || gmon_io_read_vma (ifp, &n_highpc)
93 || gmon_io_read_32 (ifp, &ncnt)
94 || gmon_io_read_32 (ifp, &profrate)
95 || gmon_io_read (ifp, hist_dimension, 15)
96 || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
97 {
98 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
99 whoami, filename);
100
101 done (1);
102 }
103
104 if (!s_highpc)
105 {
106 /* This is the first histogram record. */
107 s_lowpc = n_lowpc;
108 s_highpc = n_highpc;
109 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
110 highpc = (bfd_vma) n_highpc / sizeof (UNIT);
111 hist_num_bins = ncnt;
112 hz = profrate;
113 }
114
115 DBG (SAMPLEDEBUG,
116 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
117 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
118 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
119 (unsigned long) s_lowpc, (unsigned long) s_highpc,
120 hist_num_bins);
121 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
122 (unsigned long) lowpc, (unsigned long) highpc));
123
124 if (n_lowpc != s_lowpc || n_highpc != s_highpc
125 || ncnt != hist_num_bins || hz != profrate)
126 {
127 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
128 whoami, filename);
129 done (1);
130 }
131
132 if (!hist_sample)
133 {
134 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
135 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
136 }
137
138 for (i = 0; i < hist_num_bins; ++i)
139 {
140 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
141 {
142 fprintf (stderr,
143 _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
144 whoami, filename, i, hist_num_bins);
145 done (1);
146 }
147 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
148 DBG (SAMPLEDEBUG,
149 printf ("[hist_read_rec] 0x%lx: %u\n",
150 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
151 hist_sample[i]));
152 }
153 }
154
155
156 /* Write execution histogram to file OFP. FILENAME is the name
157 of OFP and is provided for formatting error-messages only. */
158
159 void
160 hist_write_hist (ofp, filename)
161 FILE * ofp;
162 const char *filename;
163 {
164 UNIT count;
165 int i;
166
167 /* Write header. */
168
169 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
170 || gmon_io_write_vma (ofp, s_lowpc)
171 || gmon_io_write_vma (ofp, s_highpc)
172 || gmon_io_write_32 (ofp, hist_num_bins)
173 || gmon_io_write_32 (ofp, hz)
174 || gmon_io_write (ofp, hist_dimension, 15)
175 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
176 {
177 perror (filename);
178 done (1);
179 }
180
181 for (i = 0; i < hist_num_bins; ++i)
182 {
183 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
184
185 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
186 {
187 perror (filename);
188 done (1);
189 }
190 }
191 }
192
193
194 /* Calculate scaled entry point addresses (to save time in
195 hist_assign_samples), and, on architectures that have procedure
196 entry masks at the start of a function, possibly push the scaled
197 entry points over the procedure entry mask, if it turns out that
198 the entry point is in one bin and the code for a routine is in the
199 next bin. */
200
201 static void
202 scale_and_align_entries ()
203 {
204 Sym *sym;
205 bfd_vma bin_of_entry;
206 bfd_vma bin_of_code;
207
208 for (sym = symtab.base; sym < symtab.limit; sym++)
209 {
210 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
211 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
212 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
213 / hist_scale);
214 if (bin_of_entry < bin_of_code)
215 {
216 DBG (SAMPLEDEBUG,
217 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
218 (unsigned long) sym->hist.scaled_addr,
219 (unsigned long) (sym->hist.scaled_addr
220 + UNITS_TO_CODE)));
221 sym->hist.scaled_addr += UNITS_TO_CODE;
222 }
223 }
224 }
225
226
227 /* Assign samples to the symbol to which they belong.
228
229 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
230 which may overlap one more symbol address ranges. If a symbol
231 overlaps with the bin's address range by O percent, then O percent
232 of the bin's count is credited to that symbol.
233
234 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
235 with respect to the symbol's address range [SYM_LOW_PC,
236 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
237 the distance (in UNITs) between the arrows, the fraction of the
238 sample that is to be credited to the symbol which starts at
239 SYM_LOW_PC.
240
241 sym_low_pc sym_high_pc
242 | |
243 v v
244
245 +-----------------------------------------------+
246 | |
247 | ->| |<- ->| |<- ->| |<- |
248 | | | | | |
249 +---------+ +---------+ +---------+
250
251 ^ ^ ^ ^ ^ ^
252 | | | | | |
253 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
254
255 For the VAX we assert that samples will never fall in the first two
256 bytes of any routine, since that is the entry mask, thus we call
257 scale_and_align_entries() to adjust the entry points if the entry
258 mask falls in one bin but the code for the routine doesn't start
259 until the next bin. In conjunction with the alignment of routine
260 addresses, this should allow us to have only one sample for every
261 four bytes of text space and never have any overlap (the two end
262 cases, above). */
263
264 void
265 hist_assign_samples ()
266 {
267 bfd_vma bin_low_pc, bin_high_pc;
268 bfd_vma sym_low_pc, sym_high_pc;
269 bfd_vma overlap, addr;
270 int bin_count, i;
271 unsigned int j;
272 double time, credit;
273
274 /* Read samples and assign to symbols. */
275 hist_scale = highpc - lowpc;
276 hist_scale /= hist_num_bins;
277 scale_and_align_entries ();
278
279 /* Iterate over all sample bins. */
280 for (i = 0, j = 1; i < hist_num_bins; ++i)
281 {
282 bin_count = hist_sample[i];
283 if (! bin_count)
284 continue;
285
286 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
287 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
288 time = bin_count;
289
290 DBG (SAMPLEDEBUG,
291 printf (
292 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
293 (unsigned long) (sizeof (UNIT) * bin_low_pc),
294 (unsigned long) (sizeof (UNIT) * bin_high_pc),
295 bin_count));
296 total_time += time;
297
298 /* Credit all symbols that are covered by bin I. */
299 for (j = j - 1; j < symtab.len; ++j)
300 {
301 sym_low_pc = symtab.base[j].hist.scaled_addr;
302 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
303
304 /* If high end of bin is below entry address,
305 go for next bin. */
306 if (bin_high_pc < sym_low_pc)
307 break;
308
309 /* If low end of bin is above high end of symbol,
310 go for next symbol. */
311 if (bin_low_pc >= sym_high_pc)
312 continue;
313
314 overlap =
315 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
316 if (overlap > 0)
317 {
318 DBG (SAMPLEDEBUG,
319 printf (
320 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
321 (unsigned long) symtab.base[j].addr,
322 (unsigned long) (sizeof (UNIT) * sym_high_pc),
323 symtab.base[j].name, overlap * time / hist_scale,
324 (long) overlap));
325
326 addr = symtab.base[j].addr;
327 credit = overlap * time / hist_scale;
328
329 /* Credit symbol if it appears in INCL_FLAT or that
330 table is empty and it does not appear it in
331 EXCL_FLAT. */
332 if (sym_lookup (&syms[INCL_FLAT], addr)
333 || (syms[INCL_FLAT].len == 0
334 && !sym_lookup (&syms[EXCL_FLAT], addr)))
335 {
336 symtab.base[j].hist.time += credit;
337 }
338 else
339 {
340 total_time -= credit;
341 }
342 }
343 }
344 }
345
346 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
347 total_time));
348 }
349
350
351 /* Print header for flag histogram profile. */
352
353 static void
354 print_header (prefix)
355 int prefix;
356 {
357 char unit[64];
358
359 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
360
361 if (bsd_style_output)
362 {
363 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
364 (long) hist_scale * sizeof (UNIT));
365 if (total_time > 0.0)
366 {
367 printf (_(" for %.2f%% of %.2f %s\n\n"),
368 100.0 / total_time, total_time / hz, hist_dimension);
369 }
370 }
371 else
372 {
373 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
374 }
375
376 if (total_time <= 0.0)
377 {
378 printf (_(" no time accumulated\n\n"));
379
380 /* This doesn't hurt since all the numerators will be zero. */
381 total_time = 1.0;
382 }
383
384 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
385 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
386 "");
387 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
388 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
389 _("name"));
390 }
391
392
393 static void
394 print_line (sym, scale)
395 Sym *sym;
396 double scale;
397 {
398 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
399 return;
400
401 accum_time += sym->hist.time;
402
403 if (bsd_style_output)
404 printf ("%5.1f %10.2f %8.2f",
405 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
406 accum_time / hz, sym->hist.time / hz);
407 else
408 printf ("%6.2f %9.2f %8.2f",
409 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
410 accum_time / hz, sym->hist.time / hz);
411
412 if (sym->ncalls != 0)
413 printf (" %8lu %8.2f %8.2f ",
414 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
415 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
416 else
417 printf (" %8.8s %8.8s %8.8s ", "", "", "");
418
419 if (bsd_style_output)
420 print_name (sym);
421 else
422 print_name_only (sym);
423
424 printf ("\n");
425 }
426
427
428 /* Compare LP and RP. The primary comparison key is execution time,
429 the secondary is number of invocation, and the tertiary is the
430 lexicographic order of the function names. */
431
432 static int
433 cmp_time (lp, rp)
434 const PTR lp;
435 const PTR rp;
436 {
437 const Sym *left = *(const Sym **) lp;
438 const Sym *right = *(const Sym **) rp;
439 double time_diff;
440
441 time_diff = right->hist.time - left->hist.time;
442
443 if (time_diff > 0.0)
444 return 1;
445
446 if (time_diff < 0.0)
447 return -1;
448
449 if (right->ncalls > left->ncalls)
450 return 1;
451
452 if (right->ncalls < left->ncalls)
453 return -1;
454
455 return strcmp (left->name, right->name);
456 }
457
458
459 /* Print the flat histogram profile. */
460
461 void
462 hist_print ()
463 {
464 Sym **time_sorted_syms, *top_dog, *sym;
465 unsigned int index;
466 unsigned log_scale;
467 double top_time, time;
468 bfd_vma addr;
469
470 if (first_output)
471 first_output = FALSE;
472 else
473 printf ("\f\n");
474
475 accum_time = 0.0;
476
477 if (bsd_style_output)
478 {
479 if (print_descriptions)
480 {
481 printf (_("\n\n\nflat profile:\n"));
482 flat_blurb (stdout);
483 }
484 }
485 else
486 {
487 printf (_("Flat profile:\n"));
488 }
489
490 /* Sort the symbol table by time (call-count and name as secondary
491 and tertiary keys). */
492 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
493
494 for (index = 0; index < symtab.len; ++index)
495 time_sorted_syms[index] = &symtab.base[index];
496
497 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
498
499 if (bsd_style_output)
500 {
501 log_scale = 5; /* Milli-seconds is BSD-default. */
502 }
503 else
504 {
505 /* Search for symbol with highest per-call
506 execution time and scale accordingly. */
507 log_scale = 0;
508 top_dog = 0;
509 top_time = 0.0;
510
511 for (index = 0; index < symtab.len; ++index)
512 {
513 sym = time_sorted_syms[index];
514
515 if (sym->ncalls != 0)
516 {
517 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
518
519 if (time > top_time)
520 {
521 top_dog = sym;
522 top_time = time;
523 }
524 }
525 }
526
527 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
528 {
529 top_time /= hz;
530
531 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
532 {
533 double scaled_value = SItab[log_scale].scale * top_time;
534
535 if (scaled_value >= 1.0 && scaled_value < 1000.0)
536 break;
537 }
538 }
539 }
540
541 /* For now, the dimension is always seconds. In the future, we
542 may also want to support other (pseudo-)dimensions (such as
543 I-cache misses etc.). */
544 print_header (SItab[log_scale].prefix);
545
546 for (index = 0; index < symtab.len; ++index)
547 {
548 addr = time_sorted_syms[index]->addr;
549
550 /* Print symbol if its in INCL_FLAT table or that table
551 is empty and the symbol is not in EXCL_FLAT. */
552 if (sym_lookup (&syms[INCL_FLAT], addr)
553 || (syms[INCL_FLAT].len == 0
554 && !sym_lookup (&syms[EXCL_FLAT], addr)))
555 print_line (time_sorted_syms[index], SItab[log_scale].scale);
556 }
557
558 free (time_sorted_syms);
559
560 if (print_descriptions && !bsd_style_output)
561 flat_blurb (stdout);
562 }
This page took 0.041103 seconds and 4 git commands to generate.