1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "search_list.h"
34 /* Return value of comparison functions used to sort tables. */
39 static void print_header (void);
40 static void print_cycle (Sym
*);
41 static int cmp_member (Sym
*, Sym
*);
42 static void sort_members (Sym
*);
43 static void print_members (Sym
*);
44 static int cmp_arc (Arc
*, Arc
*);
45 static void sort_parents (Sym
*);
46 static void print_parents (Sym
*);
47 static void sort_children (Sym
*);
48 static void print_children (Sym
*);
49 static void print_line (Sym
*);
50 static int cmp_name (const PTR
, const PTR
);
51 static int cmp_arc_count (const PTR
, const PTR
);
52 static int cmp_fun_nuses (const PTR
, const PTR
);
53 static void order_and_dump_functions_by_arcs
54 (Arc
**, unsigned long, int, Arc
**, unsigned long *);
56 /* Declarations of automatically generated functions to output blurbs. */
57 extern void bsd_callg_blurb (FILE * fp
);
58 extern void fsf_callg_blurb (FILE * fp
);
60 double print_time
= 0.0;
71 if (!bsd_style_output
)
73 if (print_descriptions
)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 printf (_("\t\t\tCall graph\n\n"));
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale
* (long) sizeof (UNIT
));
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time
, print_time
/ hz
);
87 printf (_(" no time propagated\n\n"));
89 /* This doesn't hurt, since all the numerators will be 0.0. */
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 _("index"), _("%time"), _("self"), _("descendants"),
99 _("called"), _("self"), _("name"), _("index"));
100 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
101 "", "", "", "", _("called"), _("total"), _("children"));
106 printf (_("index %% time self children called name\n"));
110 /* Print a cycle header. */
113 print_cycle (Sym
*cyc
)
117 sprintf (buf
, "[%d]", cyc
->cg
.index
);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf
,
121 100 * (cyc
->cg
.prop
.self
+ cyc
->cg
.prop
.child
) / print_time
,
122 cyc
->cg
.prop
.self
/ hz
, cyc
->cg
.prop
.child
/ hz
, cyc
->ncalls
);
124 if (cyc
->cg
.self_calls
!= 0)
125 printf ("+%-7lu", cyc
->cg
.self_calls
);
127 printf (" %7.7s", "");
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc
->cg
.cyc
.num
, cyc
->cg
.index
);
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
136 cmp_member (Sym
*left
, Sym
*right
)
138 double left_time
= left
->cg
.prop
.self
+ left
->cg
.prop
.child
;
139 double right_time
= right
->cg
.prop
.self
+ right
->cg
.prop
.child
;
140 unsigned long left_calls
= left
->ncalls
+ left
->cg
.self_calls
;
141 unsigned long right_calls
= right
->ncalls
+ right
->cg
.self_calls
;
143 if (left_time
> right_time
)
146 if (left_time
< right_time
)
149 if (left_calls
> right_calls
)
152 if (left_calls
< right_calls
)
158 /* Sort members of a cycle. */
161 sort_members (Sym
*cyc
)
163 Sym
*todo
, *doing
, *prev
;
165 /* Detach cycle members from cyclehead,
166 and insertion sort them back on. */
167 todo
= cyc
->cg
.cyc
.next
;
168 cyc
->cg
.cyc
.next
= 0;
170 for (doing
= todo
; doing
!= NULL
; doing
= todo
)
172 todo
= doing
->cg
.cyc
.next
;
174 for (prev
= cyc
; prev
->cg
.cyc
.next
; prev
= prev
->cg
.cyc
.next
)
176 if (cmp_member (doing
, prev
->cg
.cyc
.next
) == GREATERTHAN
)
180 doing
->cg
.cyc
.next
= prev
->cg
.cyc
.next
;
181 prev
->cg
.cyc
.next
= doing
;
185 /* Print the members of a cycle. */
188 print_members (Sym
*cyc
)
194 for (member
= cyc
->cg
.cyc
.next
; member
; member
= member
->cg
.cyc
.next
)
196 printf (bsd_style_output
197 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
198 : "%6.6s %5.5s %7.2f %7.2f %7lu",
199 "", "", member
->cg
.prop
.self
/ hz
, member
->cg
.prop
.child
/ hz
,
202 if (member
->cg
.self_calls
!= 0)
203 printf ("+%-7lu", member
->cg
.self_calls
);
205 printf (" %7.7s", "");
213 /* Compare two arcs to/from the same child/parent.
214 - if one arc is a self arc, it's least.
215 - if one arc is within a cycle, it's less than.
216 - if both arcs are within a cycle, compare arc counts.
217 - if neither arc is within a cycle, compare with
218 time + child_time as major key
219 arc count as minor key. */
222 cmp_arc (Arc
*left
, Arc
*right
)
224 Sym
*left_parent
= left
->parent
;
225 Sym
*left_child
= left
->child
;
226 Sym
*right_parent
= right
->parent
;
227 Sym
*right_child
= right
->child
;
228 double left_time
, right_time
;
231 printf ("[cmp_arc] ");
232 print_name (left_parent
);
234 print_name (left_child
);
235 printf (" %f + %f %lu/%lu\n", left
->time
, left
->child_time
,
236 left
->count
, left_child
->ncalls
);
237 printf ("[cmp_arc] ");
238 print_name (right_parent
);
240 print_name (right_child
);
241 printf (" %f + %f %lu/%lu\n", right
->time
, right
->child_time
,
242 right
->count
, right_child
->ncalls
);
246 if (left_parent
== left_child
)
247 return LESSTHAN
; /* Left is a self call. */
249 if (right_parent
== right_child
)
250 return GREATERTHAN
; /* Right is a self call. */
252 if (left_parent
->cg
.cyc
.num
!= 0 && left_child
->cg
.cyc
.num
!= 0
253 && left_parent
->cg
.cyc
.num
== left_child
->cg
.cyc
.num
)
255 /* Left is a call within a cycle. */
256 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
257 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
259 /* Right is a call within the cycle, too. */
260 if (left
->count
< right
->count
)
263 if (left
->count
> right
->count
)
270 /* Right isn't a call within the cycle. */
276 /* Left isn't a call within a cycle. */
277 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
278 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
280 /* Right is a call within a cycle. */
285 /* Neither is a call within a cycle. */
286 left_time
= left
->time
+ left
->child_time
;
287 right_time
= right
->time
+ right
->child_time
;
289 if (left_time
< right_time
)
292 if (left_time
> right_time
)
295 if (left
->count
< right
->count
)
298 if (left
->count
> right
->count
)
308 sort_parents (Sym
* child
)
310 Arc
*arc
, *detached
, sorted
, *prev
;
312 /* Unlink parents from child, then insertion sort back on to
314 *arc the arc you have detached and are inserting.
315 *detached the rest of the arcs to be sorted.
316 sorted arc list onto which you insertion sort.
317 *prev arc before the arc you are comparing. */
318 sorted
.next_parent
= 0;
320 for (arc
= child
->cg
.parents
; arc
; arc
= detached
)
322 detached
= arc
->next_parent
;
324 /* Consider *arc as disconnected; insert it into sorted. */
325 for (prev
= &sorted
; prev
->next_parent
; prev
= prev
->next_parent
)
327 if (cmp_arc (arc
, prev
->next_parent
) != GREATERTHAN
)
331 arc
->next_parent
= prev
->next_parent
;
332 prev
->next_parent
= arc
;
335 /* Reattach sorted arcs to child. */
336 child
->cg
.parents
= sorted
.next_parent
;
341 print_parents (Sym
*child
)
347 if (child
->cg
.cyc
.head
!= 0)
348 cycle_head
= child
->cg
.cyc
.head
;
352 if (!child
->cg
.parents
)
354 printf (bsd_style_output
355 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
356 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
357 "", "", "", "", "", "");
361 sort_parents (child
);
363 for (arc
= child
->cg
.parents
; arc
; arc
= arc
->next_parent
)
365 parent
= arc
->parent
;
366 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
367 && parent
->cg
.cyc
.num
== child
->cg
.cyc
.num
))
369 /* Selfcall or call among siblings. */
370 printf (bsd_style_output
371 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
372 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
380 /* Regular parent of child. */
381 printf (bsd_style_output
382 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
383 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
385 arc
->time
/ hz
, arc
->child_time
/ hz
,
386 arc
->count
, cycle_head
->ncalls
);
395 sort_children (Sym
*parent
)
397 Arc
*arc
, *detached
, sorted
, *prev
;
399 /* Unlink children from parent, then insertion sort back on to
401 *arc the arc you have detached and are inserting.
402 *detached the rest of the arcs to be sorted.
403 sorted arc list onto which you insertion sort.
404 *prev arc before the arc you are comparing. */
405 sorted
.next_child
= 0;
407 for (arc
= parent
->cg
.children
; arc
; arc
= detached
)
409 detached
= arc
->next_child
;
411 /* Consider *arc as disconnected; insert it into sorted. */
412 for (prev
= &sorted
; prev
->next_child
; prev
= prev
->next_child
)
414 if (cmp_arc (arc
, prev
->next_child
) != LESSTHAN
)
418 arc
->next_child
= prev
->next_child
;
419 prev
->next_child
= arc
;
422 /* Reattach sorted children to parent. */
423 parent
->cg
.children
= sorted
.next_child
;
428 print_children (Sym
*parent
)
433 sort_children (parent
);
434 arc
= parent
->cg
.children
;
436 for (arc
= parent
->cg
.children
; arc
; arc
= arc
->next_child
)
439 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
440 && child
->cg
.cyc
.num
== parent
->cg
.cyc
.num
))
442 /* Self call or call to sibling. */
443 printf (bsd_style_output
444 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
445 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
446 "", "", "", "", arc
->count
, "");
452 /* Regular child of parent. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
455 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
457 arc
->time
/ hz
, arc
->child_time
/ hz
,
458 arc
->count
, child
->cg
.cyc
.head
->ncalls
);
471 sprintf (buf
, "[%d]", np
->cg
.index
);
472 printf (bsd_style_output
473 ? "%-6.6s %5.1f %7.2f %11.2f"
474 : "%-6.6s %5.1f %7.2f %7.2f", buf
,
475 100 * (np
->cg
.prop
.self
+ np
->cg
.prop
.child
) / print_time
,
476 np
->cg
.prop
.self
/ hz
, np
->cg
.prop
.child
/ hz
);
478 if ((np
->ncalls
+ np
->cg
.self_calls
) != 0)
480 printf (" %7lu", np
->ncalls
);
482 if (np
->cg
.self_calls
!= 0)
483 printf ("+%-7lu ", np
->cg
.self_calls
);
485 printf (" %7.7s ", "");
489 printf (" %7.7s %7.7s ", "", "");
497 /* Print dynamic call graph. */
500 cg_print (Sym
** timesortsym
)
505 if (print_descriptions
&& bsd_style_output
)
506 bsd_callg_blurb (stdout
);
510 for (index
= 0; index
< symtab
.len
+ num_cycles
; ++index
)
512 parent
= timesortsym
[index
];
514 if ((ignore_zeros
&& parent
->ncalls
== 0
515 && parent
->cg
.self_calls
== 0 && parent
->cg
.prop
.self
== 0
516 && parent
->cg
.prop
.child
== 0)
517 || !parent
->cg
.print_flag
518 || (line_granularity
&& ! parent
->is_func
))
521 if (!parent
->name
&& parent
->cg
.cyc
.num
!= 0)
524 print_cycle (parent
);
525 print_members (parent
);
529 print_parents (parent
);
531 print_children (parent
);
534 if (bsd_style_output
)
537 printf ("-----------------------------------------------\n");
539 if (bsd_style_output
)
545 if (print_descriptions
&& !bsd_style_output
)
546 fsf_callg_blurb (stdout
);
551 cmp_name (const PTR left
, const PTR right
)
553 const Sym
**npp1
= (const Sym
**) left
;
554 const Sym
**npp2
= (const Sym
**) right
;
556 return strcmp ((*npp1
)->name
, (*npp2
)->name
);
564 unsigned int nnames
, todo
, i
, j
;
565 int col
, starting_col
;
566 Sym
**name_sorted_syms
, *sym
;
567 const char *filename
;
569 int column_width
= (output_width
- 1) / 3; /* Don't write in last col! */
571 /* Now, sort regular function name
572 alphabetically to create an index. */
573 name_sorted_syms
= (Sym
**) xmalloc ((symtab
.len
+ num_cycles
) * sizeof (Sym
*));
575 for (index
= 0, nnames
= 0; index
< symtab
.len
; index
++)
577 if (ignore_zeros
&& symtab
.base
[index
].ncalls
== 0
578 && symtab
.base
[index
].hist
.time
== 0)
581 name_sorted_syms
[nnames
++] = &symtab
.base
[index
];
584 qsort (name_sorted_syms
, nnames
, sizeof (Sym
*), cmp_name
);
586 for (index
= 1, todo
= nnames
; index
<= num_cycles
; index
++)
587 name_sorted_syms
[todo
++] = &cycle_header
[index
];
590 printf (_("Index by function name\n\n"));
591 index
= (todo
+ 2) / 3;
593 for (i
= 0; i
< index
; i
++)
598 for (j
= i
; j
< todo
; j
+= index
)
600 sym
= name_sorted_syms
[j
];
602 if (sym
->cg
.print_flag
)
603 sprintf (buf
, "[%d]", sym
->cg
.index
);
605 sprintf (buf
, "(%d)", sym
->cg
.index
);
609 if (bsd_style_output
)
611 printf ("%6.6s %-19.19s", buf
, sym
->name
);
617 for (; col
< starting_col
+ 5; ++col
)
620 printf (" %s ", buf
);
621 col
+= print_name_only (sym
);
623 if (!line_granularity
&& sym
->is_static
&& sym
->file
)
625 filename
= sym
->file
->name
;
629 filename
= strrchr (filename
, '/');
634 filename
= sym
->file
->name
;
637 printf (" (%s)", filename
);
638 col
+= strlen (filename
) + 3;
644 if (bsd_style_output
)
646 printf ("%6.6s ", buf
);
647 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
648 printf ("%-19.19s", buf
);
653 for (; col
< starting_col
+ 5; ++col
)
655 printf (" %s ", buf
);
656 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
662 starting_col
+= column_width
;
668 free (name_sorted_syms
);
671 /* Compare two arcs based on their usage counts.
672 We want to sort in descending order. */
675 cmp_arc_count (const PTR left
, const PTR right
)
677 const Arc
**npp1
= (const Arc
**) left
;
678 const Arc
**npp2
= (const Arc
**) right
;
680 if ((*npp1
)->count
> (*npp2
)->count
)
682 else if ((*npp1
)->count
< (*npp2
)->count
)
688 /* Compare two funtions based on their usage counts.
689 We want to sort in descending order. */
692 cmp_fun_nuses (const PTR left
, const PTR right
)
694 const Sym
**npp1
= (const Sym
**) left
;
695 const Sym
**npp2
= (const Sym
**) right
;
697 if ((*npp1
)->nuses
> (*npp2
)->nuses
)
699 else if ((*npp1
)->nuses
< (*npp2
)->nuses
)
705 /* Print a suggested function ordering based on the profiling data.
707 We perform 4 major steps when ordering functions:
709 * Group unused functions together and place them at the
710 end of the function order.
712 * Search the highest use arcs (those which account for 90% of
713 the total arc count) for functions which have several parents.
715 Group those with the most call sites together (currently the
716 top 1.25% which have at least five different call sites).
718 These are emitted at the start of the function order.
720 * Use a greedy placement algorithm to place functions which
721 occur in the top 99% of the arcs in the profile. Some provisions
722 are made to handle high usage arcs where the parent and/or
723 child has already been placed.
725 * Run the same greedy placement algorithm on the remaining
726 arcs to place the leftover functions.
729 The various "magic numbers" should (one day) be tuneable by command
730 line options. They were arrived at by benchmarking a few applications
731 with various values to see which values produced better overall function
734 Of course, profiling errors, machine limitations (PA long calls), and
735 poor cutoff values for the placement algorithm may limit the usefullness
736 of the resulting function order. Improvements would be greatly appreciated.
740 * Place the functions with many callers near the middle of the
741 list to reduce long calls.
743 * Propagate arc usage changes as functions are placed. Ie if
744 func1 and func2 are placed together, arcs to/from those arcs
745 to the same parent/child should be combined, then resort the
746 arcs to choose the next one.
748 * Implement some global positioning algorithm to place the
749 chains made by the greedy local positioning algorithm. Probably
750 by examining arcs which haven't been placed yet to tie two
753 * Take a function's size and time into account in the algorithm;
754 size in particular is important on the PA (long calls). Placing
755 many small functions onto their own page may be wise.
757 * Use better profiling information; many published algorithms
758 are based on call sequences through time, rather than just
761 * Prodecure cloning could improve performance when a small number
762 of arcs account for most of the calls to a particular function.
764 * Use relocation information to avoid moving unused functions
765 completely out of the code stream; this would avoid severe lossage
766 when the profile data bears little resemblance to actual runs.
768 * Propagation of arc usages should also improve .o link line
769 ordering which shares the same arc placement algorithm with
770 the function ordering code (in fact it is a degenerate case
771 of function ordering). */
774 cg_print_function_ordering ()
776 unsigned long index
, used
, unused
, scratch_index
;
777 unsigned long unplaced_arc_count
, high_arc_count
, scratch_arc_count
;
779 unsigned long long total_arcs
, tmp_arcs_count
;
781 unsigned long total_arcs
, tmp_arcs_count
;
783 Sym
**unused_syms
, **used_syms
, **scratch_syms
;
784 Arc
**unplaced_arcs
, **high_arcs
, **scratch_arcs
;
790 unplaced_arc_count
= 0;
792 scratch_arc_count
= 0;
794 /* First group all the unused functions together. */
795 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
796 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
797 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
798 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
799 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
800 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
802 /* Walk through all the functions; mark those which are never
803 called as placed (we'll emit them as a group later). */
804 for (index
= 0, used
= 0, unused
= 0; index
< symtab
.len
; index
++)
806 if (symtab
.base
[index
].ncalls
== 0)
808 unused_syms
[unused
++] = &symtab
.base
[index
];
809 symtab
.base
[index
].has_been_placed
= 1;
813 used_syms
[used
++] = &symtab
.base
[index
];
814 symtab
.base
[index
].has_been_placed
= 0;
815 symtab
.base
[index
].next
= 0;
816 symtab
.base
[index
].prev
= 0;
817 symtab
.base
[index
].nuses
= 0;
821 /* Sort the arcs from most used to least used. */
822 qsort (arcs
, numarcs
, sizeof (Arc
*), cmp_arc_count
);
824 /* Compute the total arc count. Also mark arcs as unplaced.
826 Note we don't compensate for overflow if that happens!
827 Overflow is much less likely when this file is compiled
828 with GCC as it can double-wide integers via long long. */
830 for (index
= 0; index
< numarcs
; index
++)
832 total_arcs
+= arcs
[index
]->count
;
833 arcs
[index
]->has_been_placed
= 0;
836 /* We want to pull out those functions which are referenced
837 by many highly used arcs and emit them as a group. This
838 could probably use some tuning. */
840 for (index
= 0; index
< numarcs
; index
++)
842 tmp_arcs_count
+= arcs
[index
]->count
;
844 /* Count how many times each parent and child are used up
845 to our threshhold of arcs (90%). */
846 if ((double)tmp_arcs_count
/ (double)total_arcs
> 0.90)
849 arcs
[index
]->child
->nuses
++;
852 /* Now sort a temporary symbol table based on the number of
853 times each function was used in the highest used arcs. */
854 memcpy (scratch_syms
, used_syms
, used
* sizeof (Sym
*));
855 qsort (scratch_syms
, used
, sizeof (Sym
*), cmp_fun_nuses
);
857 /* Now pick out those symbols we're going to emit as
858 a group. We take up to 1.25% of the used symbols. */
859 for (index
= 0; index
< used
/ 80; index
++)
861 Sym
*sym
= scratch_syms
[index
];
864 /* If we hit symbols that aren't used from many call sites,
865 then we can quit. We choose five as the low limit for
866 no particular reason. */
870 /* We're going to need the arcs between these functions.
871 Unfortunately, we don't know all these functions
872 until we're done. So we keep track of all the arcs
873 to the functions we care about, then prune out those
874 which are uninteresting.
876 An interesting variation would be to quit when we found
877 multi-call site functions which account for some percentage
879 arc
= sym
->cg
.children
;
883 if (arc
->parent
!= arc
->child
)
884 scratch_arcs
[scratch_arc_count
++] = arc
;
885 arc
->has_been_placed
= 1;
886 arc
= arc
->next_child
;
889 arc
= sym
->cg
.parents
;
893 if (arc
->parent
!= arc
->child
)
894 scratch_arcs
[scratch_arc_count
++] = arc
;
895 arc
->has_been_placed
= 1;
896 arc
= arc
->next_parent
;
899 /* Keep track of how many symbols we're going to place. */
900 scratch_index
= index
;
902 /* A lie, but it makes identifying
903 these functions easier later. */
904 sym
->has_been_placed
= 1;
907 /* Now walk through the temporary arcs and copy
908 those we care about into the high arcs array. */
909 for (index
= 0; index
< scratch_arc_count
; index
++)
911 Arc
*arc
= scratch_arcs
[index
];
913 /* If this arc refers to highly used functions, then
914 then we want to keep it. */
915 if (arc
->child
->has_been_placed
916 && arc
->parent
->has_been_placed
)
918 high_arcs
[high_arc_count
++] = scratch_arcs
[index
];
920 /* We need to turn of has_been_placed since we're going to
921 use the main arc placement algorithm on these arcs. */
922 arc
->child
->has_been_placed
= 0;
923 arc
->parent
->has_been_placed
= 0;
927 /* Dump the multi-site high usage functions which are not
928 going to be ordered by the main ordering algorithm. */
929 for (index
= 0; index
< scratch_index
; index
++)
931 if (scratch_syms
[index
]->has_been_placed
)
932 printf ("%s\n", scratch_syms
[index
]->name
);
935 /* Now we can order the multi-site high use
936 functions based on the arcs between them. */
937 qsort (high_arcs
, high_arc_count
, sizeof (Arc
*), cmp_arc_count
);
938 order_and_dump_functions_by_arcs (high_arcs
, high_arc_count
, 1,
939 unplaced_arcs
, &unplaced_arc_count
);
941 /* Order and dump the high use functions left,
942 these typically have only a few call sites. */
943 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
944 unplaced_arcs
, &unplaced_arc_count
);
946 /* Now place the rarely used functions. */
947 order_and_dump_functions_by_arcs (unplaced_arcs
, unplaced_arc_count
, 1,
948 scratch_arcs
, &scratch_arc_count
);
950 /* Output any functions not emitted by the order_and_dump calls. */
951 for (index
= 0; index
< used
; index
++)
952 if (used_syms
[index
]->has_been_placed
== 0)
953 printf("%s\n", used_syms
[index
]->name
);
955 /* Output the unused functions. */
956 for (index
= 0; index
< unused
; index
++)
957 printf("%s\n", unused_syms
[index
]->name
);
959 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
960 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
961 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
962 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
963 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
964 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
971 free (unplaced_arcs
);
974 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
975 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
977 If ALL is nonzero, then place all functions referenced by THE_ARCS,
978 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
982 order_and_dump_functions_by_arcs (the_arcs
, arc_count
, all
,
983 unplaced_arcs
, unplaced_arc_count
)
985 unsigned long arc_count
;
988 unsigned long *unplaced_arc_count
;
991 unsigned long long tmp_arcs
, total_arcs
;
993 unsigned long tmp_arcs
, total_arcs
;
997 /* If needed, compute the total arc count.
999 Note we don't compensate for overflow if that happens! */
1003 for (index
= 0; index
< arc_count
; index
++)
1004 total_arcs
+= the_arcs
[index
]->count
;
1011 for (index
= 0; index
< arc_count
; index
++)
1014 Sym
*child
, *parent
;
1016 tmp_arcs
+= the_arcs
[index
]->count
;
1018 /* Ignore this arc if it's already been placed. */
1019 if (the_arcs
[index
]->has_been_placed
)
1022 child
= the_arcs
[index
]->child
;
1023 parent
= the_arcs
[index
]->parent
;
1025 /* If we're not using all arcs, and this is a rarely used
1026 arc, then put it on the unplaced_arc list. Similarly
1027 if both the parent and child of this arc have been placed. */
1028 if ((! all
&& (double)tmp_arcs
/ (double)total_arcs
> MOST
)
1029 || child
->has_been_placed
|| parent
->has_been_placed
)
1031 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1035 /* If all slots in the parent and child are full, then there isn't
1036 anything we can do right now. We'll place this arc on the
1037 unplaced arc list in the hope that a global positioning
1038 algorithm can use it to place function chains. */
1039 if (parent
->next
&& parent
->prev
&& child
->next
&& child
->prev
)
1041 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1045 /* If the parent is unattached, then find the closest
1046 place to attach it onto child's chain. Similarly
1047 for the opposite case. */
1048 if (!parent
->next
&& !parent
->prev
)
1055 /* Walk to the beginning and end of the child's chain. */
1068 /* Choose the closest. */
1069 child
= next_count
< prev_count
? next
: prev
;
1071 else if (! child
->next
&& !child
->prev
)
1090 parent
= prev_count
< next_count
? prev
: next
;
1094 /* Couldn't find anywhere to attach the functions,
1095 put the arc on the unplaced arc list. */
1096 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1100 /* Make sure we don't tie two ends together. */
1120 /* This would tie two ends together. */
1121 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1127 /* Must attach to the parent's prev field. */
1130 /* parent-prev and child-next */
1131 parent
->prev
= child
;
1132 child
->next
= parent
;
1133 the_arcs
[index
]->has_been_placed
= 1;
1136 else if (parent
->prev
)
1138 /* Must attach to the parent's next field. */
1141 /* parent-next and child-prev */
1142 parent
->next
= child
;
1143 child
->prev
= parent
;
1144 the_arcs
[index
]->has_been_placed
= 1;
1149 /* Can attach to either field in the parent, depends
1150 on where we've got space in the child. */
1153 /* parent-prev and child-next. */
1154 parent
->prev
= child
;
1155 child
->next
= parent
;
1156 the_arcs
[index
]->has_been_placed
= 1;
1160 /* parent-next and child-prev. */
1161 parent
->next
= child
;
1162 child
->prev
= parent
;
1163 the_arcs
[index
]->has_been_placed
= 1;
1168 /* Dump the chains of functions we've made. */
1169 for (index
= 0; index
< arc_count
; index
++)
1172 if (the_arcs
[index
]->parent
->has_been_placed
1173 || the_arcs
[index
]->child
->has_been_placed
)
1176 sym
= the_arcs
[index
]->parent
;
1178 /* If this symbol isn't attached to any other
1179 symbols, then we've got a rarely used arc.
1181 Skip it for now, we'll deal with them later. */
1182 if (sym
->next
== NULL
1183 && sym
->prev
== NULL
)
1186 /* Get to the start of this chain. */
1192 /* Mark it as placed. */
1193 sym
->has_been_placed
= 1;
1194 printf ("%s\n", sym
->name
);
1199 /* If we want to place all the arcs, then output
1200 those which weren't placed by the main algorithm. */
1202 for (index
= 0; index
< arc_count
; index
++)
1205 if (the_arcs
[index
]->parent
->has_been_placed
1206 || the_arcs
[index
]->child
->has_been_placed
)
1209 sym
= the_arcs
[index
]->parent
;
1211 sym
->has_been_placed
= 1;
1212 printf ("%s\n", sym
->name
);
1216 /* Compare two function_map structs based on file name.
1217 We want to sort in ascending order. */
1220 cmp_symbol_map (const void * l
, const void * r
)
1222 return strcmp (((struct function_map
*) l
)->file_name
,
1223 ((struct function_map
*) r
)->file_name
);
1226 /* Print a suggested .o ordering for files on a link line based
1227 on profiling information. This uses the function placement
1228 code for the bulk of its work. */
1231 cg_print_file_ordering (void)
1233 unsigned long scratch_arc_count
, index
;
1237 scratch_arc_count
= 0;
1239 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
1240 for (index
= 0; index
< numarcs
; index
++)
1242 if (! arcs
[index
]->parent
->mapped
1243 || ! arcs
[index
]->child
->mapped
)
1244 arcs
[index
]->has_been_placed
= 1;
1247 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
1248 scratch_arcs
, &scratch_arc_count
);
1250 /* Output .o's not handled by the main placement algorithm. */
1251 for (index
= 0; index
< symtab
.len
; index
++)
1253 if (symtab
.base
[index
].mapped
1254 && ! symtab
.base
[index
].has_been_placed
)
1255 printf ("%s\n", symtab
.base
[index
].name
);
1258 qsort (symbol_map
, symbol_map_count
, sizeof (struct function_map
), cmp_symbol_map
);
1260 /* Now output any .o's that didn't have any text symbols. */
1262 for (index
= 0; index
< symbol_map_count
; index
++)
1264 unsigned int index2
;
1266 /* Don't bother searching if this symbol
1267 is the same as the previous one. */
1268 if (last
&& !strcmp (last
, symbol_map
[index
].file_name
))
1271 for (index2
= 0; index2
< symtab
.len
; index2
++)
1273 if (! symtab
.base
[index2
].mapped
)
1276 if (!strcmp (symtab
.base
[index2
].name
, symbol_map
[index
].file_name
))
1280 /* If we didn't find it in the symbol table, then it must
1281 be a .o with no text symbols. Output it last. */
1282 if (index2
== symtab
.len
)
1283 printf ("%s\n", symbol_map
[index
].file_name
);
1284 last
= symbol_map
[index
].file_name
;