gdb/
[deliverable/binutils-gdb.git] / gprof / cg_print.c
1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22 \f
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
27 #include "source.h"
28 #include "symtab.h"
29 #include "cg_arcs.h"
30 #include "cg_print.h"
31 #include "hist.h"
32 #include "utils.h"
33 #include "corefile.h"
34
35 /* Return value of comparison functions used to sort tables. */
36 #define LESSTHAN -1
37 #define EQUALTO 0
38 #define GREATERTHAN 1
39
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const PTR, const PTR);
52 static int cmp_arc_count (const PTR, const PTR);
53 static int cmp_fun_nuses (const PTR, const PTR);
54 static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
56
57 /* Declarations of automatically generated functions to output blurbs. */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
60
61 double print_time = 0.0;
62
63
64 static void
65 print_header ()
66 {
67 if (first_output)
68 first_output = FALSE;
69 else
70 printf ("\f\n");
71
72 if (!bsd_style_output)
73 {
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 else
77 printf (_("\t\t\tCall graph\n\n"));
78 }
79
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
82
83 if (print_time > 0.0)
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
86 else
87 {
88 printf (_(" no time propagated\n\n"));
89
90 /* This doesn't hurt, since all the numerators will be 0.0. */
91 print_time = 1.0;
92 }
93
94 if (bsd_style_output)
95 {
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 _("index"),
100 /* xgettext:no-c-format */
101 _("%time"),
102 _("self"), _("descendants"), _("called"), _("self"),
103 _("name"), _("index"));
104 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
105 "", "", "", "", _("called"), _("total"), _("children"));
106 printf ("\n");
107 }
108 else
109 {
110 printf (_("index %% time self children called name\n"));
111 }
112 }
113
114 /* Print a cycle header. */
115
116 static void
117 print_cycle (Sym *cyc)
118 {
119 char buf[BUFSIZ];
120
121 sprintf (buf, "[%d]", cyc->cg.index);
122 printf (bsd_style_output
123 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
127
128 if (cyc->cg.self_calls != 0)
129 printf ("+%-7lu", cyc->cg.self_calls);
130 else
131 printf (" %7.7s", "");
132
133 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
134 }
135
136 /* Compare LEFT and RIGHT membmer. Major comparison key is
137 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
138
139 static int
140 cmp_member (Sym *left, Sym *right)
141 {
142 double left_time = left->cg.prop.self + left->cg.prop.child;
143 double right_time = right->cg.prop.self + right->cg.prop.child;
144 unsigned long left_calls = left->ncalls + left->cg.self_calls;
145 unsigned long right_calls = right->ncalls + right->cg.self_calls;
146
147 if (left_time > right_time)
148 return GREATERTHAN;
149
150 if (left_time < right_time)
151 return LESSTHAN;
152
153 if (left_calls > right_calls)
154 return GREATERTHAN;
155
156 if (left_calls < right_calls)
157 return LESSTHAN;
158
159 return EQUALTO;
160 }
161
162 /* Sort members of a cycle. */
163
164 static void
165 sort_members (Sym *cyc)
166 {
167 Sym *todo, *doing, *prev;
168
169 /* Detach cycle members from cyclehead,
170 and insertion sort them back on. */
171 todo = cyc->cg.cyc.next;
172 cyc->cg.cyc.next = 0;
173
174 for (doing = todo; doing != NULL; doing = todo)
175 {
176 todo = doing->cg.cyc.next;
177
178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179 {
180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181 break;
182 }
183
184 doing->cg.cyc.next = prev->cg.cyc.next;
185 prev->cg.cyc.next = doing;
186 }
187 }
188
189 /* Print the members of a cycle. */
190
191 static void
192 print_members (Sym *cyc)
193 {
194 Sym *member;
195
196 sort_members (cyc);
197
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199 {
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
205
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
208 else
209 printf (" %7.7s", "");
210
211 printf (" ");
212 print_name (member);
213 printf ("\n");
214 }
215 }
216
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
224
225 static int
226 cmp_arc (Arc *left, Arc *right)
227 {
228 Sym *left_parent = left->parent;
229 Sym *left_child = left->child;
230 Sym *right_parent = right->parent;
231 Sym *right_child = right->child;
232 double left_time, right_time;
233
234 DBG (TIMEDEBUG,
235 printf ("[cmp_arc] ");
236 print_name (left_parent);
237 printf (" calls ");
238 print_name (left_child);
239 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240 left->count, left_child->ncalls);
241 printf ("[cmp_arc] ");
242 print_name (right_parent);
243 printf (" calls ");
244 print_name (right_child);
245 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246 right->count, right_child->ncalls);
247 printf ("\n");
248 );
249
250 if (left_parent == left_child)
251 return LESSTHAN; /* Left is a self call. */
252
253 if (right_parent == right_child)
254 return GREATERTHAN; /* Right is a self call. */
255
256 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
258 {
259 /* Left is a call within a cycle. */
260 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
262 {
263 /* Right is a call within the cycle, too. */
264 if (left->count < right->count)
265 return LESSTHAN;
266
267 if (left->count > right->count)
268 return GREATERTHAN;
269
270 return EQUALTO;
271 }
272 else
273 {
274 /* Right isn't a call within the cycle. */
275 return LESSTHAN;
276 }
277 }
278 else
279 {
280 /* Left isn't a call within a cycle. */
281 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
283 {
284 /* Right is a call within a cycle. */
285 return GREATERTHAN;
286 }
287 else
288 {
289 /* Neither is a call within a cycle. */
290 left_time = left->time + left->child_time;
291 right_time = right->time + right->child_time;
292
293 if (left_time < right_time)
294 return LESSTHAN;
295
296 if (left_time > right_time)
297 return GREATERTHAN;
298
299 if (left->count < right->count)
300 return LESSTHAN;
301
302 if (left->count > right->count)
303 return GREATERTHAN;
304
305 return EQUALTO;
306 }
307 }
308 }
309
310
311 static void
312 sort_parents (Sym * child)
313 {
314 Arc *arc, *detached, sorted, *prev;
315
316 /* Unlink parents from child, then insertion sort back on to
317 sorted's parents.
318 *arc the arc you have detached and are inserting.
319 *detached the rest of the arcs to be sorted.
320 sorted arc list onto which you insertion sort.
321 *prev arc before the arc you are comparing. */
322 sorted.next_parent = 0;
323
324 for (arc = child->cg.parents; arc; arc = detached)
325 {
326 detached = arc->next_parent;
327
328 /* Consider *arc as disconnected; insert it into sorted. */
329 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
330 {
331 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
332 break;
333 }
334
335 arc->next_parent = prev->next_parent;
336 prev->next_parent = arc;
337 }
338
339 /* Reattach sorted arcs to child. */
340 child->cg.parents = sorted.next_parent;
341 }
342
343
344 static void
345 print_parents (Sym *child)
346 {
347 Sym *parent;
348 Arc *arc;
349 Sym *cycle_head;
350
351 if (child->cg.cyc.head != 0)
352 cycle_head = child->cg.cyc.head;
353 else
354 cycle_head = child;
355
356 if (!child->cg.parents)
357 {
358 printf (bsd_style_output
359 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
360 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
361 "", "", "", "", "", "");
362 return;
363 }
364
365 sort_parents (child);
366
367 for (arc = child->cg.parents; arc; arc = arc->next_parent)
368 {
369 parent = arc->parent;
370 if (child == parent || (child->cg.cyc.num != 0
371 && parent->cg.cyc.num == child->cg.cyc.num))
372 {
373 /* Selfcall or call among siblings. */
374 printf (bsd_style_output
375 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
376 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
377 "", "", "", "",
378 arc->count, "");
379 print_name (parent);
380 printf ("\n");
381 }
382 else
383 {
384 /* Regular parent of child. */
385 printf (bsd_style_output
386 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
387 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
388 "", "",
389 arc->time / hz, arc->child_time / hz,
390 arc->count, cycle_head->ncalls);
391 print_name (parent);
392 printf ("\n");
393 }
394 }
395 }
396
397
398 static void
399 sort_children (Sym *parent)
400 {
401 Arc *arc, *detached, sorted, *prev;
402
403 /* Unlink children from parent, then insertion sort back on to
404 sorted's children.
405 *arc the arc you have detached and are inserting.
406 *detached the rest of the arcs to be sorted.
407 sorted arc list onto which you insertion sort.
408 *prev arc before the arc you are comparing. */
409 sorted.next_child = 0;
410
411 for (arc = parent->cg.children; arc; arc = detached)
412 {
413 detached = arc->next_child;
414
415 /* Consider *arc as disconnected; insert it into sorted. */
416 for (prev = &sorted; prev->next_child; prev = prev->next_child)
417 {
418 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
419 break;
420 }
421
422 arc->next_child = prev->next_child;
423 prev->next_child = arc;
424 }
425
426 /* Reattach sorted children to parent. */
427 parent->cg.children = sorted.next_child;
428 }
429
430
431 static void
432 print_children (Sym *parent)
433 {
434 Sym *child;
435 Arc *arc;
436
437 sort_children (parent);
438 arc = parent->cg.children;
439
440 for (arc = parent->cg.children; arc; arc = arc->next_child)
441 {
442 child = arc->child;
443 if (child == parent || (child->cg.cyc.num != 0
444 && child->cg.cyc.num == parent->cg.cyc.num))
445 {
446 /* Self call or call to sibling. */
447 printf (bsd_style_output
448 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
449 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
450 "", "", "", "", arc->count, "");
451 print_name (child);
452 printf ("\n");
453 }
454 else
455 {
456 /* Regular child of parent. */
457 printf (bsd_style_output
458 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
459 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
460 "", "",
461 arc->time / hz, arc->child_time / hz,
462 arc->count, child->cg.cyc.head->ncalls);
463 print_name (child);
464 printf ("\n");
465 }
466 }
467 }
468
469
470 static void
471 print_line (Sym *np)
472 {
473 char buf[BUFSIZ];
474
475 sprintf (buf, "[%d]", np->cg.index);
476 printf (bsd_style_output
477 ? "%-6.6s %5.1f %7.2f %11.2f"
478 : "%-6.6s %5.1f %7.2f %7.2f", buf,
479 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480 np->cg.prop.self / hz, np->cg.prop.child / hz);
481
482 if ((np->ncalls + np->cg.self_calls) != 0)
483 {
484 printf (" %7lu", np->ncalls);
485
486 if (np->cg.self_calls != 0)
487 printf ("+%-7lu ", np->cg.self_calls);
488 else
489 printf (" %7.7s ", "");
490 }
491 else
492 {
493 printf (" %7.7s %7.7s ", "", "");
494 }
495
496 print_name (np);
497 printf ("\n");
498 }
499
500
501 /* Print dynamic call graph. */
502
503 void
504 cg_print (Sym ** timesortsym)
505 {
506 unsigned int sym_index;
507 Sym *parent;
508
509 if (print_descriptions && bsd_style_output)
510 bsd_callg_blurb (stdout);
511
512 print_header ();
513
514 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
515 {
516 parent = timesortsym[sym_index];
517
518 if ((ignore_zeros && parent->ncalls == 0
519 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520 && parent->cg.prop.child == 0)
521 || !parent->cg.print_flag
522 || (line_granularity && ! parent->is_func))
523 continue;
524
525 if (!parent->name && parent->cg.cyc.num != 0)
526 {
527 /* Cycle header. */
528 print_cycle (parent);
529 print_members (parent);
530 }
531 else
532 {
533 print_parents (parent);
534 print_line (parent);
535 print_children (parent);
536 }
537
538 if (bsd_style_output)
539 printf ("\n");
540
541 printf ("-----------------------------------------------\n");
542
543 if (bsd_style_output)
544 printf ("\n");
545 }
546
547 free (timesortsym);
548
549 if (print_descriptions && !bsd_style_output)
550 fsf_callg_blurb (stdout);
551 }
552
553
554 static int
555 cmp_name (const PTR left, const PTR right)
556 {
557 const Sym **npp1 = (const Sym **) left;
558 const Sym **npp2 = (const Sym **) right;
559
560 return strcmp ((*npp1)->name, (*npp2)->name);
561 }
562
563
564 void
565 cg_print_index ()
566 {
567 unsigned int sym_index;
568 unsigned int nnames, todo, i, j;
569 int col, starting_col;
570 Sym **name_sorted_syms, *sym;
571 const char *filename;
572 char buf[20];
573 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
574
575 /* Now, sort regular function name
576 alphabetically to create an index. */
577 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
578
579 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
580 {
581 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582 && symtab.base[sym_index].hist.time == 0)
583 continue;
584
585 name_sorted_syms[nnames++] = &symtab.base[sym_index];
586 }
587
588 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
589
590 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591 name_sorted_syms[todo++] = &cycle_header[sym_index];
592
593 printf ("\f\n");
594 printf (_("Index by function name\n\n"));
595 sym_index = (todo + 2) / 3;
596
597 for (i = 0; i < sym_index; i++)
598 {
599 col = 0;
600 starting_col = 0;
601
602 for (j = i; j < todo; j += sym_index)
603 {
604 sym = name_sorted_syms[j];
605
606 if (sym->cg.print_flag)
607 sprintf (buf, "[%d]", sym->cg.index);
608 else
609 sprintf (buf, "(%d)", sym->cg.index);
610
611 if (j < nnames)
612 {
613 if (bsd_style_output)
614 {
615 printf ("%6.6s %-19.19s", buf, sym->name);
616 }
617 else
618 {
619 col += strlen (buf);
620
621 for (; col < starting_col + 5; ++col)
622 putchar (' ');
623
624 printf (" %s ", buf);
625 col += print_name_only (sym);
626
627 if (!line_granularity && sym->is_static && sym->file)
628 {
629 filename = sym->file->name;
630
631 if (!print_path)
632 {
633 filename = strrchr (filename, '/');
634
635 if (filename)
636 ++filename;
637 else
638 filename = sym->file->name;
639 }
640
641 printf (" (%s)", filename);
642 col += strlen (filename) + 3;
643 }
644 }
645 }
646 else
647 {
648 if (bsd_style_output)
649 {
650 printf ("%6.6s ", buf);
651 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652 printf ("%-19.19s", buf);
653 }
654 else
655 {
656 col += strlen (buf);
657 for (; col < starting_col + 5; ++col)
658 putchar (' ');
659 printf (" %s ", buf);
660 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
661 printf ("%s", buf);
662 col += strlen (buf);
663 }
664 }
665
666 starting_col += column_width;
667 }
668
669 printf ("\n");
670 }
671
672 free (name_sorted_syms);
673 }
674
675 /* Compare two arcs based on their usage counts.
676 We want to sort in descending order. */
677
678 static int
679 cmp_arc_count (const PTR left, const PTR right)
680 {
681 const Arc **npp1 = (const Arc **) left;
682 const Arc **npp2 = (const Arc **) right;
683
684 if ((*npp1)->count > (*npp2)->count)
685 return -1;
686 else if ((*npp1)->count < (*npp2)->count)
687 return 1;
688 else
689 return 0;
690 }
691
692 /* Compare two funtions based on their usage counts.
693 We want to sort in descending order. */
694
695 static int
696 cmp_fun_nuses (const PTR left, const PTR right)
697 {
698 const Sym **npp1 = (const Sym **) left;
699 const Sym **npp2 = (const Sym **) right;
700
701 if ((*npp1)->nuses > (*npp2)->nuses)
702 return -1;
703 else if ((*npp1)->nuses < (*npp2)->nuses)
704 return 1;
705 else
706 return 0;
707 }
708
709 /* Print a suggested function ordering based on the profiling data.
710
711 We perform 4 major steps when ordering functions:
712
713 * Group unused functions together and place them at the
714 end of the function order.
715
716 * Search the highest use arcs (those which account for 90% of
717 the total arc count) for functions which have several parents.
718
719 Group those with the most call sites together (currently the
720 top 1.25% which have at least five different call sites).
721
722 These are emitted at the start of the function order.
723
724 * Use a greedy placement algorithm to place functions which
725 occur in the top 99% of the arcs in the profile. Some provisions
726 are made to handle high usage arcs where the parent and/or
727 child has already been placed.
728
729 * Run the same greedy placement algorithm on the remaining
730 arcs to place the leftover functions.
731
732
733 The various "magic numbers" should (one day) be tuneable by command
734 line options. They were arrived at by benchmarking a few applications
735 with various values to see which values produced better overall function
736 orderings.
737
738 Of course, profiling errors, machine limitations (PA long calls), and
739 poor cutoff values for the placement algorithm may limit the usefullness
740 of the resulting function order. Improvements would be greatly appreciated.
741
742 Suggestions:
743
744 * Place the functions with many callers near the middle of the
745 list to reduce long calls.
746
747 * Propagate arc usage changes as functions are placed. Ie if
748 func1 and func2 are placed together, arcs to/from those arcs
749 to the same parent/child should be combined, then resort the
750 arcs to choose the next one.
751
752 * Implement some global positioning algorithm to place the
753 chains made by the greedy local positioning algorithm. Probably
754 by examining arcs which haven't been placed yet to tie two
755 chains together.
756
757 * Take a function's size and time into account in the algorithm;
758 size in particular is important on the PA (long calls). Placing
759 many small functions onto their own page may be wise.
760
761 * Use better profiling information; many published algorithms
762 are based on call sequences through time, rather than just
763 arc counts.
764
765 * Prodecure cloning could improve performance when a small number
766 of arcs account for most of the calls to a particular function.
767
768 * Use relocation information to avoid moving unused functions
769 completely out of the code stream; this would avoid severe lossage
770 when the profile data bears little resemblance to actual runs.
771
772 * Propagation of arc usages should also improve .o link line
773 ordering which shares the same arc placement algorithm with
774 the function ordering code (in fact it is a degenerate case
775 of function ordering). */
776
777 void
778 cg_print_function_ordering (void)
779 {
780 unsigned long sym_index;
781 unsigned long arc_index;
782 unsigned long used, unused, scratch_index;
783 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
784 #ifdef __GNUC__
785 unsigned long long total_arcs, tmp_arcs_count;
786 #else
787 unsigned long total_arcs, tmp_arcs_count;
788 #endif
789 Sym **unused_syms, **used_syms, **scratch_syms;
790 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
791
792 sym_index = 0;
793 used = 0;
794 unused = 0;
795 scratch_index = 0;
796 unplaced_arc_count = 0;
797 high_arc_count = 0;
798 scratch_arc_count = 0;
799
800 /* First group all the unused functions together. */
801 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
807
808 /* Walk through all the functions; mark those which are never
809 called as placed (we'll emit them as a group later). */
810 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
811 {
812 if (symtab.base[sym_index].ncalls == 0)
813 {
814 unused_syms[unused++] = &symtab.base[sym_index];
815 symtab.base[sym_index].has_been_placed = 1;
816 }
817 else
818 {
819 used_syms[used++] = &symtab.base[sym_index];
820 symtab.base[sym_index].has_been_placed = 0;
821 symtab.base[sym_index].next = 0;
822 symtab.base[sym_index].prev = 0;
823 symtab.base[sym_index].nuses = 0;
824 }
825 }
826
827 /* Sort the arcs from most used to least used. */
828 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
829
830 /* Compute the total arc count. Also mark arcs as unplaced.
831
832 Note we don't compensate for overflow if that happens!
833 Overflow is much less likely when this file is compiled
834 with GCC as it can double-wide integers via long long. */
835 total_arcs = 0;
836 for (arc_index = 0; arc_index < numarcs; arc_index++)
837 {
838 total_arcs += arcs[arc_index]->count;
839 arcs[arc_index]->has_been_placed = 0;
840 }
841
842 /* We want to pull out those functions which are referenced
843 by many highly used arcs and emit them as a group. This
844 could probably use some tuning. */
845 tmp_arcs_count = 0;
846 for (arc_index = 0; arc_index < numarcs; arc_index++)
847 {
848 tmp_arcs_count += arcs[arc_index]->count;
849
850 /* Count how many times each parent and child are used up
851 to our threshhold of arcs (90%). */
852 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
853 break;
854
855 arcs[arc_index]->child->nuses++;
856 }
857
858 /* Now sort a temporary symbol table based on the number of
859 times each function was used in the highest used arcs. */
860 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
862
863 /* Now pick out those symbols we're going to emit as
864 a group. We take up to 1.25% of the used symbols. */
865 for (sym_index = 0; sym_index < used / 80; sym_index++)
866 {
867 Sym *sym = scratch_syms[sym_index];
868 Arc *arc;
869
870 /* If we hit symbols that aren't used from many call sites,
871 then we can quit. We choose five as the low limit for
872 no particular reason. */
873 if (sym->nuses == 5)
874 break;
875
876 /* We're going to need the arcs between these functions.
877 Unfortunately, we don't know all these functions
878 until we're done. So we keep track of all the arcs
879 to the functions we care about, then prune out those
880 which are uninteresting.
881
882 An interesting variation would be to quit when we found
883 multi-call site functions which account for some percentage
884 of the arcs. */
885 arc = sym->cg.children;
886
887 while (arc)
888 {
889 if (arc->parent != arc->child)
890 scratch_arcs[scratch_arc_count++] = arc;
891 arc->has_been_placed = 1;
892 arc = arc->next_child;
893 }
894
895 arc = sym->cg.parents;
896
897 while (arc)
898 {
899 if (arc->parent != arc->child)
900 scratch_arcs[scratch_arc_count++] = arc;
901 arc->has_been_placed = 1;
902 arc = arc->next_parent;
903 }
904
905 /* Keep track of how many symbols we're going to place. */
906 scratch_index = sym_index;
907
908 /* A lie, but it makes identifying
909 these functions easier later. */
910 sym->has_been_placed = 1;
911 }
912
913 /* Now walk through the temporary arcs and copy
914 those we care about into the high arcs array. */
915 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
916 {
917 Arc *arc = scratch_arcs[arc_index];
918
919 /* If this arc refers to highly used functions, then
920 then we want to keep it. */
921 if (arc->child->has_been_placed
922 && arc->parent->has_been_placed)
923 {
924 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
925
926 /* We need to turn of has_been_placed since we're going to
927 use the main arc placement algorithm on these arcs. */
928 arc->child->has_been_placed = 0;
929 arc->parent->has_been_placed = 0;
930 }
931 }
932
933 /* Dump the multi-site high usage functions which are not
934 going to be ordered by the main ordering algorithm. */
935 for (sym_index = 0; sym_index < scratch_index; sym_index++)
936 {
937 if (scratch_syms[sym_index]->has_been_placed)
938 printf ("%s\n", scratch_syms[sym_index]->name);
939 }
940
941 /* Now we can order the multi-site high use
942 functions based on the arcs between them. */
943 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945 unplaced_arcs, &unplaced_arc_count);
946
947 /* Order and dump the high use functions left,
948 these typically have only a few call sites. */
949 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950 unplaced_arcs, &unplaced_arc_count);
951
952 /* Now place the rarely used functions. */
953 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954 scratch_arcs, &scratch_arc_count);
955
956 /* Output any functions not emitted by the order_and_dump calls. */
957 for (sym_index = 0; sym_index < used; sym_index++)
958 if (used_syms[sym_index]->has_been_placed == 0)
959 printf("%s\n", used_syms[sym_index]->name);
960
961 /* Output the unused functions. */
962 for (sym_index = 0; sym_index < unused; sym_index++)
963 printf("%s\n", unused_syms[sym_index]->name);
964
965 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
971
972 free (unused_syms);
973 free (used_syms);
974 free (scratch_syms);
975 free (high_arcs);
976 free (scratch_arcs);
977 free (unplaced_arcs);
978 }
979
980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
982
983 If ALL is nonzero, then place all functions referenced by THE_ARCS,
984 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
985
986 #define MOST 0.99
987 static void
988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
989 unplaced_arcs, unplaced_arc_count)
990 Arc **the_arcs;
991 unsigned long arc_count;
992 int all;
993 Arc **unplaced_arcs;
994 unsigned long *unplaced_arc_count;
995 {
996 #ifdef __GNUC__
997 unsigned long long tmp_arcs, total_arcs;
998 #else
999 unsigned long tmp_arcs, total_arcs;
1000 #endif
1001 unsigned int arc_index;
1002
1003 /* If needed, compute the total arc count.
1004
1005 Note we don't compensate for overflow if that happens! */
1006 if (! all)
1007 {
1008 total_arcs = 0;
1009 for (arc_index = 0; arc_index < arc_count; arc_index++)
1010 total_arcs += the_arcs[arc_index]->count;
1011 }
1012 else
1013 total_arcs = 0;
1014
1015 tmp_arcs = 0;
1016
1017 for (arc_index = 0; arc_index < arc_count; arc_index++)
1018 {
1019 Sym *sym1, *sym2;
1020 Sym *child, *parent;
1021
1022 tmp_arcs += the_arcs[arc_index]->count;
1023
1024 /* Ignore this arc if it's already been placed. */
1025 if (the_arcs[arc_index]->has_been_placed)
1026 continue;
1027
1028 child = the_arcs[arc_index]->child;
1029 parent = the_arcs[arc_index]->parent;
1030
1031 /* If we're not using all arcs, and this is a rarely used
1032 arc, then put it on the unplaced_arc list. Similarly
1033 if both the parent and child of this arc have been placed. */
1034 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1035 || child->has_been_placed || parent->has_been_placed)
1036 {
1037 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1038 continue;
1039 }
1040
1041 /* If all slots in the parent and child are full, then there isn't
1042 anything we can do right now. We'll place this arc on the
1043 unplaced arc list in the hope that a global positioning
1044 algorithm can use it to place function chains. */
1045 if (parent->next && parent->prev && child->next && child->prev)
1046 {
1047 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1048 continue;
1049 }
1050
1051 /* If the parent is unattached, then find the closest
1052 place to attach it onto child's chain. Similarly
1053 for the opposite case. */
1054 if (!parent->next && !parent->prev)
1055 {
1056 int next_count = 0;
1057 int prev_count = 0;
1058 Sym *prev = child;
1059 Sym *next = child;
1060
1061 /* Walk to the beginning and end of the child's chain. */
1062 while (next->next)
1063 {
1064 next = next->next;
1065 next_count++;
1066 }
1067
1068 while (prev->prev)
1069 {
1070 prev = prev->prev;
1071 prev_count++;
1072 }
1073
1074 /* Choose the closest. */
1075 child = next_count < prev_count ? next : prev;
1076 }
1077 else if (! child->next && !child->prev)
1078 {
1079 int next_count = 0;
1080 int prev_count = 0;
1081 Sym *prev = parent;
1082 Sym *next = parent;
1083
1084 while (next->next)
1085 {
1086 next = next->next;
1087 next_count++;
1088 }
1089
1090 while (prev->prev)
1091 {
1092 prev = prev->prev;
1093 prev_count++;
1094 }
1095
1096 parent = prev_count < next_count ? prev : next;
1097 }
1098 else
1099 {
1100 /* Couldn't find anywhere to attach the functions,
1101 put the arc on the unplaced arc list. */
1102 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1103 continue;
1104 }
1105
1106 /* Make sure we don't tie two ends together. */
1107 sym1 = parent;
1108 if (sym1->next)
1109 while (sym1->next)
1110 sym1 = sym1->next;
1111 else
1112 while (sym1->prev)
1113 sym1 = sym1->prev;
1114
1115 sym2 = child;
1116 if (sym2->next)
1117 while (sym2->next)
1118 sym2 = sym2->next;
1119 else
1120 while (sym2->prev)
1121 sym2 = sym2->prev;
1122
1123 if (sym1 == child
1124 && sym2 == parent)
1125 {
1126 /* This would tie two ends together. */
1127 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1128 continue;
1129 }
1130
1131 if (parent->next)
1132 {
1133 /* Must attach to the parent's prev field. */
1134 if (! child->next)
1135 {
1136 /* parent-prev and child-next */
1137 parent->prev = child;
1138 child->next = parent;
1139 the_arcs[arc_index]->has_been_placed = 1;
1140 }
1141 }
1142 else if (parent->prev)
1143 {
1144 /* Must attach to the parent's next field. */
1145 if (! child->prev)
1146 {
1147 /* parent-next and child-prev */
1148 parent->next = child;
1149 child->prev = parent;
1150 the_arcs[arc_index]->has_been_placed = 1;
1151 }
1152 }
1153 else
1154 {
1155 /* Can attach to either field in the parent, depends
1156 on where we've got space in the child. */
1157 if (child->prev)
1158 {
1159 /* parent-prev and child-next. */
1160 parent->prev = child;
1161 child->next = parent;
1162 the_arcs[arc_index]->has_been_placed = 1;
1163 }
1164 else
1165 {
1166 /* parent-next and child-prev. */
1167 parent->next = child;
1168 child->prev = parent;
1169 the_arcs[arc_index]->has_been_placed = 1;
1170 }
1171 }
1172 }
1173
1174 /* Dump the chains of functions we've made. */
1175 for (arc_index = 0; arc_index < arc_count; arc_index++)
1176 {
1177 Sym *sym;
1178 if (the_arcs[arc_index]->parent->has_been_placed
1179 || the_arcs[arc_index]->child->has_been_placed)
1180 continue;
1181
1182 sym = the_arcs[arc_index]->parent;
1183
1184 /* If this symbol isn't attached to any other
1185 symbols, then we've got a rarely used arc.
1186
1187 Skip it for now, we'll deal with them later. */
1188 if (sym->next == NULL
1189 && sym->prev == NULL)
1190 continue;
1191
1192 /* Get to the start of this chain. */
1193 while (sym->prev)
1194 sym = sym->prev;
1195
1196 while (sym)
1197 {
1198 /* Mark it as placed. */
1199 sym->has_been_placed = 1;
1200 printf ("%s\n", sym->name);
1201 sym = sym->next;
1202 }
1203 }
1204
1205 /* If we want to place all the arcs, then output
1206 those which weren't placed by the main algorithm. */
1207 if (all)
1208 for (arc_index = 0; arc_index < arc_count; arc_index++)
1209 {
1210 Sym *sym;
1211 if (the_arcs[arc_index]->parent->has_been_placed
1212 || the_arcs[arc_index]->child->has_been_placed)
1213 continue;
1214
1215 sym = the_arcs[arc_index]->parent;
1216
1217 sym->has_been_placed = 1;
1218 printf ("%s\n", sym->name);
1219 }
1220 }
1221
1222 /* Compare two function_map structs based on file name.
1223 We want to sort in ascending order. */
1224
1225 static int
1226 cmp_symbol_map (const void * l, const void * r)
1227 {
1228 return filename_cmp (((struct function_map *) l)->file_name,
1229 ((struct function_map *) r)->file_name);
1230 }
1231
1232 /* Print a suggested .o ordering for files on a link line based
1233 on profiling information. This uses the function placement
1234 code for the bulk of its work. */
1235
1236 void
1237 cg_print_file_ordering (void)
1238 {
1239 unsigned long scratch_arc_count;
1240 unsigned long arc_index;
1241 unsigned long sym_index;
1242 Arc **scratch_arcs;
1243 char *last;
1244
1245 scratch_arc_count = 0;
1246
1247 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1248 for (arc_index = 0; arc_index < numarcs; arc_index++)
1249 {
1250 if (! arcs[arc_index]->parent->mapped
1251 || ! arcs[arc_index]->child->mapped)
1252 arcs[arc_index]->has_been_placed = 1;
1253 }
1254
1255 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1256 scratch_arcs, &scratch_arc_count);
1257
1258 /* Output .o's not handled by the main placement algorithm. */
1259 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1260 {
1261 if (symtab.base[sym_index].mapped
1262 && ! symtab.base[sym_index].has_been_placed)
1263 printf ("%s\n", symtab.base[sym_index].name);
1264 }
1265
1266 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1267
1268 /* Now output any .o's that didn't have any text symbols. */
1269 last = NULL;
1270 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1271 {
1272 unsigned int index2;
1273
1274 /* Don't bother searching if this symbol
1275 is the same as the previous one. */
1276 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1277 continue;
1278
1279 for (index2 = 0; index2 < symtab.len; index2++)
1280 {
1281 if (! symtab.base[index2].mapped)
1282 continue;
1283
1284 if (!filename_cmp (symtab.base[index2].name,
1285 symbol_map[sym_index].file_name))
1286 break;
1287 }
1288
1289 /* If we didn't find it in the symbol table, then it must
1290 be a .o with no text symbols. Output it last. */
1291 if (index2 == symtab.len)
1292 printf ("%s\n", symbol_map[sym_index].file_name);
1293 last = symbol_map[sym_index].file_name;
1294 }
1295 }
This page took 0.066682 seconds and 4 git commands to generate.