Commit | Line | Data |
---|---|---|
252b5132 RH |
1 | /* |
2 | * Histogram related operations. | |
3 | */ | |
4 | #include <stdio.h> | |
5 | #include "libiberty.h" | |
6 | #include "gprof.h" | |
7 | #include "corefile.h" | |
8 | #include "gmon_io.h" | |
9 | #include "gmon_out.h" | |
10 | #include "hist.h" | |
11 | #include "symtab.h" | |
12 | #include "sym_ids.h" | |
13 | #include "utils.h" | |
14 | ||
15 | #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT)) | |
16 | ||
17 | static void scale_and_align_entries PARAMS ((void)); | |
18 | ||
19 | /* declarations of automatically generated functions to output blurbs: */ | |
20 | extern void flat_blurb PARAMS ((FILE * fp)); | |
21 | ||
22 | bfd_vma s_lowpc; /* lowest address in .text */ | |
23 | bfd_vma s_highpc = 0; /* highest address in .text */ | |
24 | bfd_vma lowpc, highpc; /* same, but expressed in UNITs */ | |
25 | int hist_num_bins = 0; /* number of histogram samples */ | |
26 | int *hist_sample = 0; /* histogram samples (shorts in the file!) */ | |
27 | double hist_scale; | |
28 | char hist_dimension[sizeof (((struct gmon_hist_hdr *) 0)->dimen) + 1] = | |
29 | "seconds"; | |
30 | char hist_dimension_abbrev = 's'; | |
31 | ||
32 | static double accum_time; /* accumulated time so far for print_line() */ | |
33 | static double total_time; /* total time for all routines */ | |
34 | /* | |
35 | * Table of SI prefixes for powers of 10 (used to automatically | |
36 | * scale some of the values in the flat profile). | |
37 | */ | |
38 | const struct | |
39 | { | |
40 | char prefix; | |
41 | double scale; | |
42 | } | |
43 | SItab[] = | |
44 | { | |
45 | { | |
46 | 'T', 1e-12 | |
47 | } | |
48 | , /* tera */ | |
49 | { | |
50 | 'G', 1e-09 | |
51 | } | |
52 | , /* giga */ | |
53 | { | |
54 | 'M', 1e-06 | |
55 | } | |
56 | , /* mega */ | |
57 | { | |
58 | 'K', 1e-03 | |
59 | } | |
60 | , /* kilo */ | |
61 | { | |
62 | ' ', 1e-00 | |
63 | } | |
64 | , | |
65 | { | |
66 | 'm', 1e+03 | |
67 | } | |
68 | , /* milli */ | |
69 | { | |
70 | 'u', 1e+06 | |
71 | } | |
72 | , /* micro */ | |
73 | { | |
74 | 'n', 1e+09 | |
75 | } | |
76 | , /* nano */ | |
77 | { | |
78 | 'p', 1e+12 | |
79 | } | |
80 | , /* pico */ | |
81 | { | |
82 | 'f', 1e+15 | |
83 | } | |
84 | , /* femto */ | |
85 | { | |
86 | 'a', 1e+18 | |
87 | } | |
88 | , /* ato */ | |
89 | }; | |
90 | ||
91 | /* | |
92 | * Read the histogram from file IFP. FILENAME is the name of IFP and | |
93 | * is provided for formatting error messages only. | |
94 | */ | |
95 | void | |
96 | DEFUN (hist_read_rec, (ifp, filename), FILE * ifp AND const char *filename) | |
97 | { | |
98 | struct gmon_hist_hdr hdr; | |
99 | bfd_vma n_lowpc, n_highpc; | |
100 | int i, ncnt, profrate; | |
101 | UNIT count; | |
102 | ||
103 | if (fread (&hdr, sizeof (hdr), 1, ifp) != 1) | |
104 | { | |
105 | fprintf (stderr, _("%s: %s: unexpected end of file\n"), | |
106 | whoami, filename); | |
107 | done (1); | |
108 | } | |
109 | ||
110 | n_lowpc = (bfd_vma) get_vma (core_bfd, (bfd_byte *) hdr.low_pc); | |
111 | n_highpc = (bfd_vma) get_vma (core_bfd, (bfd_byte *) hdr.high_pc); | |
112 | ncnt = bfd_get_32 (core_bfd, (bfd_byte *) hdr.hist_size); | |
113 | profrate = bfd_get_32 (core_bfd, (bfd_byte *) hdr.prof_rate); | |
114 | strncpy (hist_dimension, hdr.dimen, sizeof (hdr.dimen)); | |
115 | hist_dimension[sizeof (hdr.dimen)] = '\0'; | |
116 | hist_dimension_abbrev = hdr.dimen_abbrev; | |
117 | ||
118 | if (!s_highpc) | |
119 | { | |
120 | ||
121 | /* this is the first histogram record: */ | |
122 | ||
123 | s_lowpc = n_lowpc; | |
124 | s_highpc = n_highpc; | |
125 | lowpc = (bfd_vma) n_lowpc / sizeof (UNIT); | |
126 | highpc = (bfd_vma) n_highpc / sizeof (UNIT); | |
127 | hist_num_bins = ncnt; | |
128 | hz = profrate; | |
129 | } | |
130 | ||
131 | DBG (SAMPLEDEBUG, | |
132 | printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n", | |
133 | n_lowpc, n_highpc, ncnt); | |
134 | printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n", | |
135 | s_lowpc, s_highpc, hist_num_bins); | |
136 | printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n", | |
137 | lowpc, highpc)); | |
138 | ||
139 | if (n_lowpc != s_lowpc || n_highpc != s_highpc | |
140 | || ncnt != hist_num_bins || hz != profrate) | |
141 | { | |
142 | fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"), | |
143 | whoami, filename); | |
144 | done (1); | |
145 | } | |
146 | ||
147 | if (!hist_sample) | |
148 | { | |
149 | hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0])); | |
150 | memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0])); | |
151 | } | |
152 | ||
153 | for (i = 0; i < hist_num_bins; ++i) | |
154 | { | |
155 | if (fread (&count[0], sizeof (count), 1, ifp) != 1) | |
156 | { | |
157 | fprintf (stderr, | |
158 | _("%s: %s: unexpected EOF after reading %d of %d samples\n"), | |
159 | whoami, filename, i, hist_num_bins); | |
160 | done (1); | |
161 | } | |
162 | hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]); | |
163 | } | |
164 | } | |
165 | ||
166 | ||
167 | /* | |
168 | * Write execution histogram to file OFP. FILENAME is the name | |
169 | * of OFP and is provided for formatting error-messages only. | |
170 | */ | |
171 | void | |
172 | DEFUN (hist_write_hist, (ofp, filename), FILE * ofp AND const char *filename) | |
173 | { | |
174 | struct gmon_hist_hdr hdr; | |
175 | unsigned char tag; | |
176 | UNIT count; | |
177 | int i; | |
178 | ||
179 | /* write header: */ | |
180 | ||
181 | tag = GMON_TAG_TIME_HIST; | |
182 | put_vma (core_bfd, s_lowpc, (bfd_byte *) hdr.low_pc); | |
183 | put_vma (core_bfd, s_highpc, (bfd_byte *) hdr.high_pc); | |
184 | bfd_put_32 (core_bfd, hist_num_bins, (bfd_byte *) hdr.hist_size); | |
185 | bfd_put_32 (core_bfd, hz, (bfd_byte *) hdr.prof_rate); | |
186 | strncpy (hdr.dimen, hist_dimension, sizeof (hdr.dimen)); | |
187 | hdr.dimen_abbrev = hist_dimension_abbrev; | |
188 | ||
189 | if (fwrite (&tag, sizeof (tag), 1, ofp) != 1 | |
190 | || fwrite (&hdr, sizeof (hdr), 1, ofp) != 1) | |
191 | { | |
192 | perror (filename); | |
193 | done (1); | |
194 | } | |
195 | ||
196 | for (i = 0; i < hist_num_bins; ++i) | |
197 | { | |
198 | bfd_put_16 (core_bfd, hist_sample[i], (bfd_byte *) & count[0]); | |
199 | if (fwrite (&count[0], sizeof (count), 1, ofp) != 1) | |
200 | { | |
201 | perror (filename); | |
202 | done (1); | |
203 | } | |
204 | } | |
205 | } | |
206 | ||
207 | ||
208 | /* | |
209 | * Calculate scaled entry point addresses (to save time in | |
210 | * hist_assign_samples), and, on architectures that have procedure | |
211 | * entry masks at the start of a function, possibly push the scaled | |
212 | * entry points over the procedure entry mask, if it turns out that | |
213 | * the entry point is in one bin and the code for a routine is in the | |
214 | * next bin. | |
215 | */ | |
216 | static void | |
217 | scale_and_align_entries () | |
218 | { | |
219 | Sym *sym; | |
220 | bfd_vma bin_of_entry; | |
221 | bfd_vma bin_of_code; | |
222 | ||
223 | for (sym = symtab.base; sym < symtab.limit; sym++) | |
224 | { | |
225 | sym->hist.scaled_addr = sym->addr / sizeof (UNIT); | |
226 | bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale; | |
227 | bin_of_code = (sym->hist.scaled_addr + UNITS_TO_CODE - lowpc) / hist_scale; | |
228 | if (bin_of_entry < bin_of_code) | |
229 | { | |
230 | DBG (SAMPLEDEBUG, | |
231 | printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n", | |
232 | sym->hist.scaled_addr, | |
233 | sym->hist.scaled_addr + UNITS_TO_CODE)); | |
234 | sym->hist.scaled_addr += UNITS_TO_CODE; | |
235 | } | |
236 | } | |
237 | } | |
238 | ||
239 | ||
240 | /* | |
241 | * Assign samples to the symbol to which they belong. | |
242 | * | |
243 | * Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC) | |
244 | * which may overlap one more symbol address ranges. If a symbol | |
245 | * overlaps with the bin's address range by O percent, then O percent | |
246 | * of the bin's count is credited to that symbol. | |
247 | * | |
248 | * There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be | |
249 | * with respect to the symbol's address range [SYM_LOW_PC, | |
250 | * SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes | |
251 | * the distance (in UNITs) between the arrows, the fraction of the | |
252 | * sample that is to be credited to the symbol which starts at | |
253 | * SYM_LOW_PC. | |
254 | * | |
255 | * sym_low_pc sym_high_pc | |
256 | * | | | |
257 | * v v | |
258 | * | |
259 | * +-----------------------------------------------+ | |
260 | * | | | |
261 | * | ->| |<- ->| |<- ->| |<- | | |
262 | * | | | | | | | |
263 | * +---------+ +---------+ +---------+ | |
264 | * | |
265 | * ^ ^ ^ ^ ^ ^ | |
266 | * | | | | | | | |
267 | * bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc | |
268 | * | |
269 | * For the VAX we assert that samples will never fall in the first two | |
270 | * bytes of any routine, since that is the entry mask, thus we call | |
271 | * scale_and_align_entries() to adjust the entry points if the entry | |
272 | * mask falls in one bin but the code for the routine doesn't start | |
273 | * until the next bin. In conjunction with the alignment of routine | |
274 | * addresses, this should allow us to have only one sample for every | |
275 | * four bytes of text space and never have any overlap (the two end | |
276 | * cases, above). | |
277 | */ | |
278 | void | |
279 | DEFUN_VOID (hist_assign_samples) | |
280 | { | |
281 | bfd_vma bin_low_pc, bin_high_pc; | |
282 | bfd_vma sym_low_pc, sym_high_pc; | |
283 | bfd_vma overlap, addr; | |
284 | int bin_count, i; | |
285 | unsigned int j; | |
286 | double time, credit; | |
287 | ||
288 | /* read samples and assign to symbols: */ | |
289 | hist_scale = highpc - lowpc; | |
290 | hist_scale /= hist_num_bins; | |
291 | scale_and_align_entries (); | |
292 | ||
293 | /* iterate over all sample bins: */ | |
294 | ||
295 | for (i = 0, j = 1; i < hist_num_bins; ++i) | |
296 | { | |
297 | bin_count = hist_sample[i]; | |
298 | if (!bin_count) | |
299 | { | |
300 | continue; | |
301 | } | |
302 | bin_low_pc = lowpc + (bfd_vma) (hist_scale * i); | |
303 | bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1)); | |
304 | time = bin_count; | |
305 | DBG (SAMPLEDEBUG, | |
306 | printf ( | |
307 | "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n", | |
308 | sizeof (UNIT) * bin_low_pc, sizeof (UNIT) * bin_high_pc, | |
309 | bin_count)); | |
310 | total_time += time; | |
311 | ||
312 | /* credit all symbols that are covered by bin I: */ | |
313 | ||
314 | for (j = j - 1; j < symtab.len; ++j) | |
315 | { | |
316 | sym_low_pc = symtab.base[j].hist.scaled_addr; | |
317 | sym_high_pc = symtab.base[j + 1].hist.scaled_addr; | |
318 | /* | |
319 | * If high end of bin is below entry address, go for next | |
320 | * bin: | |
321 | */ | |
322 | if (bin_high_pc < sym_low_pc) | |
323 | { | |
324 | break; | |
325 | } | |
326 | /* | |
327 | * If low end of bin is above high end of symbol, go for | |
328 | * next symbol. | |
329 | */ | |
330 | if (bin_low_pc >= sym_high_pc) | |
331 | { | |
332 | continue; | |
333 | } | |
334 | overlap = | |
335 | MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc); | |
336 | if (overlap > 0) | |
337 | { | |
338 | DBG (SAMPLEDEBUG, | |
339 | printf ( | |
340 | "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n", | |
341 | symtab.base[j].addr, sizeof (UNIT) * sym_high_pc, | |
342 | symtab.base[j].name, overlap * time / hist_scale, | |
343 | overlap)); | |
344 | addr = symtab.base[j].addr; | |
345 | credit = overlap * time / hist_scale; | |
346 | /* | |
347 | * Credit symbol if it appears in INCL_FLAT or that | |
348 | * table is empty and it does not appear it in | |
349 | * EXCL_FLAT. | |
350 | */ | |
351 | if (sym_lookup (&syms[INCL_FLAT], addr) | |
352 | || (syms[INCL_FLAT].len == 0 | |
353 | && !sym_lookup (&syms[EXCL_FLAT], addr))) | |
354 | { | |
355 | symtab.base[j].hist.time += credit; | |
356 | } | |
357 | else | |
358 | { | |
359 | total_time -= credit; | |
360 | } | |
361 | } | |
362 | } | |
363 | } | |
364 | DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n", | |
365 | total_time)); | |
366 | } | |
367 | ||
368 | ||
369 | /* | |
370 | * Print header for flag histogram profile: | |
371 | */ | |
372 | static void | |
373 | DEFUN (print_header, (prefix), const char prefix) | |
374 | { | |
375 | char unit[64]; | |
376 | ||
377 | sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev); | |
378 | ||
379 | if (bsd_style_output) | |
380 | { | |
381 | printf (_("\ngranularity: each sample hit covers %ld byte(s)"), | |
382 | (long) hist_scale * sizeof (UNIT)); | |
383 | if (total_time > 0.0) | |
384 | { | |
385 | printf (_(" for %.2f%% of %.2f %s\n\n"), | |
386 | 100.0 / total_time, total_time / hz, hist_dimension); | |
387 | } | |
388 | } | |
389 | else | |
390 | { | |
391 | printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension); | |
392 | } | |
393 | ||
394 | if (total_time <= 0.0) | |
395 | { | |
396 | printf (_(" no time accumulated\n\n")); | |
397 | /* this doesn't hurt since all the numerators will be zero: */ | |
398 | total_time = 1.0; | |
399 | } | |
400 | ||
401 | printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", | |
402 | "% ", _("cumulative"), _("self "), "", _("self "), _("total "), ""); | |
403 | printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", | |
404 | _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit, | |
405 | _("name")); | |
406 | } | |
407 | ||
408 | ||
409 | static void | |
410 | DEFUN (print_line, (sym, scale), Sym * sym AND double scale) | |
411 | { | |
412 | if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0) | |
413 | { | |
414 | return; | |
415 | } | |
416 | ||
417 | accum_time += sym->hist.time; | |
418 | if (bsd_style_output) | |
419 | { | |
420 | printf ("%5.1f %10.2f %8.2f", | |
421 | total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, | |
422 | accum_time / hz, sym->hist.time / hz); | |
423 | } | |
424 | else | |
425 | { | |
426 | printf ("%6.2f %9.2f %8.2f", | |
427 | total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, | |
428 | accum_time / hz, sym->hist.time / hz); | |
429 | } | |
430 | if (sym->ncalls != 0) | |
431 | { | |
432 | printf (" %8lu %8.2f %8.2f ", | |
433 | sym->ncalls, scale * sym->hist.time / hz / sym->ncalls, | |
434 | scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls); | |
435 | } | |
436 | else | |
437 | { | |
438 | printf (" %8.8s %8.8s %8.8s ", "", "", ""); | |
439 | } | |
440 | if (bsd_style_output) | |
441 | { | |
442 | print_name (sym); | |
443 | } | |
444 | else | |
445 | { | |
446 | print_name_only (sym); | |
447 | } | |
448 | printf ("\n"); | |
449 | } | |
450 | ||
451 | ||
452 | /* | |
453 | * Compare LP and RP. The primary comparison key is execution time, | |
454 | * the secondary is number of invocation, and the tertiary is the | |
455 | * lexicographic order of the function names. | |
456 | */ | |
457 | static int | |
458 | DEFUN (cmp_time, (lp, rp), const PTR lp AND const PTR rp) | |
459 | { | |
460 | const Sym *left = *(const Sym **) lp; | |
461 | const Sym *right = *(const Sym **) rp; | |
462 | double time_diff; | |
463 | ||
464 | time_diff = right->hist.time - left->hist.time; | |
465 | if (time_diff > 0.0) | |
466 | { | |
467 | return 1; | |
468 | } | |
469 | if (time_diff < 0.0) | |
470 | { | |
471 | return -1; | |
472 | } | |
473 | ||
474 | if (right->ncalls > left->ncalls) | |
475 | { | |
476 | return 1; | |
477 | } | |
478 | if (right->ncalls < left->ncalls) | |
479 | { | |
480 | return -1; | |
481 | } | |
482 | ||
483 | return strcmp (left->name, right->name); | |
484 | } | |
485 | ||
486 | ||
487 | /* | |
488 | * Print the flat histogram profile. | |
489 | */ | |
490 | void | |
491 | DEFUN_VOID (hist_print) | |
492 | { | |
493 | Sym **time_sorted_syms, *top_dog, *sym; | |
494 | unsigned int index; | |
495 | int log_scale; | |
496 | double top_time, time; | |
497 | bfd_vma addr; | |
498 | ||
499 | if (first_output) | |
500 | { | |
501 | first_output = FALSE; | |
502 | } | |
503 | else | |
504 | { | |
505 | printf ("\f\n"); | |
506 | } | |
507 | ||
508 | accum_time = 0.0; | |
509 | if (bsd_style_output) | |
510 | { | |
511 | if (print_descriptions) | |
512 | { | |
513 | printf (_("\n\n\nflat profile:\n")); | |
514 | flat_blurb (stdout); | |
515 | } | |
516 | } | |
517 | else | |
518 | { | |
519 | printf (_("Flat profile:\n")); | |
520 | } | |
521 | /* | |
522 | * Sort the symbol table by time (call-count and name as secondary | |
523 | * and tertiary keys): | |
524 | */ | |
525 | time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); | |
526 | for (index = 0; index < symtab.len; ++index) | |
527 | { | |
528 | time_sorted_syms[index] = &symtab.base[index]; | |
529 | } | |
530 | qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time); | |
531 | ||
532 | if (bsd_style_output) | |
533 | { | |
534 | log_scale = 5; /* milli-seconds is BSD-default */ | |
535 | } | |
536 | else | |
537 | { | |
538 | /* | |
539 | * Search for symbol with highest per-call execution time and | |
540 | * scale accordingly: | |
541 | */ | |
542 | log_scale = 0; | |
543 | top_dog = 0; | |
544 | top_time = 0.0; | |
545 | for (index = 0; index < symtab.len; ++index) | |
546 | { | |
547 | sym = time_sorted_syms[index]; | |
548 | if (sym->ncalls != 0) | |
549 | { | |
550 | time = (sym->hist.time + sym->cg.child_time) / sym->ncalls; | |
551 | if (time > top_time) | |
552 | { | |
553 | top_dog = sym; | |
554 | top_time = time; | |
555 | } | |
556 | } | |
557 | } | |
558 | if (top_dog && top_dog->ncalls != 0 && top_time > 0.0) | |
559 | { | |
560 | top_time /= hz; | |
561 | while (SItab[log_scale].scale * top_time < 1000.0 | |
562 | && ((size_t) log_scale | |
563 | < sizeof (SItab) / sizeof (SItab[0]) - 1)) | |
564 | { | |
565 | ++log_scale; | |
566 | } | |
567 | } | |
568 | } | |
569 | ||
570 | /* | |
571 | * For now, the dimension is always seconds. In the future, we | |
572 | * may also want to support other (pseudo-)dimensions (such as | |
573 | * I-cache misses etc.). | |
574 | */ | |
575 | print_header (SItab[log_scale].prefix); | |
576 | for (index = 0; index < symtab.len; ++index) | |
577 | { | |
578 | addr = time_sorted_syms[index]->addr; | |
579 | /* | |
580 | * Print symbol if its in INCL_FLAT table or that table | |
581 | * is empty and the symbol is not in EXCL_FLAT. | |
582 | */ | |
583 | if (sym_lookup (&syms[INCL_FLAT], addr) | |
584 | || (syms[INCL_FLAT].len == 0 | |
585 | && !sym_lookup (&syms[EXCL_FLAT], addr))) | |
586 | { | |
587 | print_line (time_sorted_syms[index], SItab[log_scale].scale); | |
588 | } | |
589 | } | |
590 | free (time_sorted_syms); | |
591 | ||
592 | if (print_descriptions && !bsd_style_output) | |
593 | { | |
594 | flat_blurb (stdout); | |
595 | } | |
596 | } |