1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include "libiberty.h"
24 #include "search_list.h"
33 /* Return value of comparison functions used to sort tables. */
38 static void print_header
PARAMS ((void));
39 static void print_cycle
PARAMS ((Sym
*));
40 static int cmp_member
PARAMS ((Sym
*, Sym
*));
41 static void sort_members
PARAMS ((Sym
*));
42 static void print_members
PARAMS ((Sym
*));
43 static int cmp_arc
PARAMS ((Arc
*, Arc
*));
44 static void sort_parents
PARAMS ((Sym
*));
45 static void print_parents
PARAMS ((Sym
*));
46 static void sort_children
PARAMS ((Sym
*));
47 static void print_children
PARAMS ((Sym
*));
48 static void print_line
PARAMS ((Sym
*));
49 static int cmp_name
PARAMS ((const PTR
, const PTR
));
50 static int cmp_arc_count
PARAMS ((const PTR
, const PTR
));
51 static int cmp_fun_nuses
PARAMS ((const PTR
, const PTR
));
52 static void order_and_dump_functions_by_arcs
53 PARAMS ((Arc
**, unsigned long, int, Arc
**, unsigned long *));
55 /* Declarations of automatically generated functions to output blurbs. */
56 extern void bsd_callg_blurb
PARAMS ((FILE * fp
));
57 extern void fsf_callg_blurb
PARAMS ((FILE * fp
));
59 double print_time
= 0.0;
70 if (!bsd_style_output
)
72 if (print_descriptions
)
73 printf (_("\t\t Call graph (explanation follows)\n\n"));
75 printf (_("\t\t\tCall graph\n\n"));
78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 (long) hist_scale
* sizeof (UNIT
));
82 printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 100.0 / print_time
, print_time
/ hz
);
86 printf (_(" no time propagated\n\n"));
88 /* This doesn't hurt, since all the numerators will be 0.0. */
94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
95 "", "", "", "", _("called"), _("total"), _("parents"));
96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 _("index"), _("%time"), _("self"), _("descendants"),
98 _("called"), _("self"), _("name"), _("index"));
99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
100 "", "", "", "", _("called"), _("total"), _("children"));
105 printf (_("index %% time self children called name\n"));
109 /* Print a cycle header. */
117 sprintf (buf
, "[%d]", cyc
->cg
.index
);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf
,
121 100 * (cyc
->cg
.prop
.self
+ cyc
->cg
.prop
.child
) / print_time
,
122 cyc
->cg
.prop
.self
/ hz
, cyc
->cg
.prop
.child
/ hz
, cyc
->ncalls
);
124 if (cyc
->cg
.self_calls
!= 0)
125 printf ("+%-7lu", cyc
->cg
.self_calls
);
127 printf (" %7.7s", "");
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc
->cg
.cyc
.num
, cyc
->cg
.index
);
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
136 cmp_member (left
, right
)
140 double left_time
= left
->cg
.prop
.self
+ left
->cg
.prop
.child
;
141 double right_time
= right
->cg
.prop
.self
+ right
->cg
.prop
.child
;
142 unsigned long left_calls
= left
->ncalls
+ left
->cg
.self_calls
;
143 unsigned long right_calls
= right
->ncalls
+ right
->cg
.self_calls
;
145 if (left_time
> right_time
)
148 if (left_time
< right_time
)
151 if (left_calls
> right_calls
)
154 if (left_calls
< right_calls
)
160 /* Sort members of a cycle. */
166 Sym
*todo
, *doing
, *prev
;
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
170 todo
= cyc
->cg
.cyc
.next
;
171 cyc
->cg
.cyc
.next
= 0;
173 for (doing
= todo
; doing
&& doing
->cg
.cyc
.next
; doing
= todo
)
175 todo
= doing
->cg
.cyc
.next
;
177 for (prev
= cyc
; prev
->cg
.cyc
.next
; prev
= prev
->cg
.cyc
.next
)
179 if (cmp_member (doing
, prev
->cg
.cyc
.next
) == GREATERTHAN
)
183 doing
->cg
.cyc
.next
= prev
->cg
.cyc
.next
;
184 prev
->cg
.cyc
.next
= doing
;
188 /* Print the members of a cycle. */
198 for (member
= cyc
->cg
.cyc
.next
; member
; member
= member
->cg
.cyc
.next
)
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member
->cg
.prop
.self
/ hz
, member
->cg
.prop
.child
/ hz
,
206 if (member
->cg
.self_calls
!= 0)
207 printf ("+%-7lu", member
->cg
.self_calls
);
209 printf (" %7.7s", "");
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
226 cmp_arc (left
, right
)
230 Sym
*left_parent
= left
->parent
;
231 Sym
*left_child
= left
->child
;
232 Sym
*right_parent
= right
->parent
;
233 Sym
*right_child
= right
->child
;
234 double left_time
, right_time
;
237 printf ("[cmp_arc] ");
238 print_name (left_parent
);
240 print_name (left_child
);
241 printf (" %f + %f %lu/%lu\n", left
->time
, left
->child_time
,
242 left
->count
, left_child
->ncalls
);
243 printf ("[cmp_arc] ");
244 print_name (right_parent
);
246 print_name (right_child
);
247 printf (" %f + %f %lu/%lu\n", right
->time
, right
->child_time
,
248 right
->count
, right_child
->ncalls
);
252 if (left_parent
== left_child
)
253 return LESSTHAN
; /* Left is a self call. */
255 if (right_parent
== right_child
)
256 return GREATERTHAN
; /* Right is a self call. */
258 if (left_parent
->cg
.cyc
.num
!= 0 && left_child
->cg
.cyc
.num
!= 0
259 && left_parent
->cg
.cyc
.num
== left_child
->cg
.cyc
.num
)
261 /* Left is a call within a cycle. */
262 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
263 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
265 /* Right is a call within the cycle, too. */
266 if (left
->count
< right
->count
)
269 if (left
->count
> right
->count
)
276 /* Right isn't a call within the cycle. */
282 /* Left isn't a call within a cycle. */
283 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
284 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
286 /* Right is a call within a cycle. */
291 /* Neither is a call within a cycle. */
292 left_time
= left
->time
+ left
->child_time
;
293 right_time
= right
->time
+ right
->child_time
;
295 if (left_time
< right_time
)
298 if (left_time
> right_time
)
301 if (left
->count
< right
->count
)
304 if (left
->count
> right
->count
)
317 Arc
*arc
, *detached
, sorted
, *prev
;
319 /* Unlink parents from child, then insertion sort back on to
321 *arc the arc you have detached and are inserting.
322 *detached the rest of the arcs to be sorted.
323 sorted arc list onto which you insertion sort.
324 *prev arc before the arc you are comparing. */
325 sorted
.next_parent
= 0;
327 for (arc
= child
->cg
.parents
; arc
; arc
= detached
)
329 detached
= arc
->next_parent
;
331 /* Consider *arc as disconnected; insert it into sorted. */
332 for (prev
= &sorted
; prev
->next_parent
; prev
= prev
->next_parent
)
334 if (cmp_arc (arc
, prev
->next_parent
) != GREATERTHAN
)
338 arc
->next_parent
= prev
->next_parent
;
339 prev
->next_parent
= arc
;
342 /* Reattach sorted arcs to child. */
343 child
->cg
.parents
= sorted
.next_parent
;
348 print_parents (child
)
355 if (child
->cg
.cyc
.head
!= 0)
356 cycle_head
= child
->cg
.cyc
.head
;
360 if (!child
->cg
.parents
)
362 printf (bsd_style_output
363 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
364 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
365 "", "", "", "", "", "");
369 sort_parents (child
);
371 for (arc
= child
->cg
.parents
; arc
; arc
= arc
->next_parent
)
373 parent
= arc
->parent
;
374 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
375 && parent
->cg
.cyc
.num
== child
->cg
.cyc
.num
))
377 /* Selfcall or call among siblings. */
378 printf (bsd_style_output
379 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
380 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
388 /* Regular parent of child. */
389 printf (bsd_style_output
390 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
391 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
393 arc
->time
/ hz
, arc
->child_time
/ hz
,
394 arc
->count
, cycle_head
->ncalls
);
403 sort_children (parent
)
406 Arc
*arc
, *detached
, sorted
, *prev
;
408 /* Unlink children from parent, then insertion sort back on to
410 *arc the arc you have detached and are inserting.
411 *detached the rest of the arcs to be sorted.
412 sorted arc list onto which you insertion sort.
413 *prev arc before the arc you are comparing. */
414 sorted
.next_child
= 0;
416 for (arc
= parent
->cg
.children
; arc
; arc
= detached
)
418 detached
= arc
->next_child
;
420 /* Consider *arc as disconnected; insert it into sorted. */
421 for (prev
= &sorted
; prev
->next_child
; prev
= prev
->next_child
)
423 if (cmp_arc (arc
, prev
->next_child
) != LESSTHAN
)
427 arc
->next_child
= prev
->next_child
;
428 prev
->next_child
= arc
;
431 /* Reattach sorted children to parent. */
432 parent
->cg
.children
= sorted
.next_child
;
437 print_children (parent
)
443 sort_children (parent
);
444 arc
= parent
->cg
.children
;
446 for (arc
= parent
->cg
.children
; arc
; arc
= arc
->next_child
)
449 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
450 && child
->cg
.cyc
.num
== parent
->cg
.cyc
.num
))
452 /* Self call or call to sibling. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
455 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
456 "", "", "", "", arc
->count
, "");
462 /* Regular child of parent. */
463 printf (bsd_style_output
464 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
465 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
467 arc
->time
/ hz
, arc
->child_time
/ hz
,
468 arc
->count
, child
->cg
.cyc
.head
->ncalls
);
482 sprintf (buf
, "[%d]", np
->cg
.index
);
483 printf (bsd_style_output
484 ? "%-6.6s %5.1f %7.2f %11.2f"
485 : "%-6.6s %5.1f %7.2f %7.2f", buf
,
486 100 * (np
->cg
.prop
.self
+ np
->cg
.prop
.child
) / print_time
,
487 np
->cg
.prop
.self
/ hz
, np
->cg
.prop
.child
/ hz
);
489 if ((np
->ncalls
+ np
->cg
.self_calls
) != 0)
491 printf (" %7lu", np
->ncalls
);
493 if (np
->cg
.self_calls
!= 0)
494 printf ("+%-7lu ", np
->cg
.self_calls
);
496 printf (" %7.7s ", "");
500 printf (" %7.7s %7.7s ", "", "");
508 /* Print dynamic call graph. */
511 cg_print (timesortsym
)
517 if (print_descriptions
&& bsd_style_output
)
518 bsd_callg_blurb (stdout
);
522 for (index
= 0; index
< symtab
.len
+ num_cycles
; ++index
)
524 parent
= timesortsym
[index
];
526 if ((ignore_zeros
&& parent
->ncalls
== 0
527 && parent
->cg
.self_calls
== 0 && parent
->cg
.prop
.self
== 0
528 && parent
->cg
.prop
.child
== 0)
529 || !parent
->cg
.print_flag
530 || (line_granularity
&& ! parent
->is_func
))
533 if (!parent
->name
&& parent
->cg
.cyc
.num
!= 0)
536 print_cycle (parent
);
537 print_members (parent
);
541 print_parents (parent
);
543 print_children (parent
);
546 if (bsd_style_output
)
549 printf ("-----------------------------------------------\n");
551 if (bsd_style_output
)
557 if (print_descriptions
&& !bsd_style_output
)
558 fsf_callg_blurb (stdout
);
563 cmp_name (left
, right
)
567 const Sym
**npp1
= (const Sym
**) left
;
568 const Sym
**npp2
= (const Sym
**) right
;
570 return strcmp ((*npp1
)->name
, (*npp2
)->name
);
578 unsigned int nnames
, todo
, i
, j
;
579 int col
, starting_col
;
580 Sym
**name_sorted_syms
, *sym
;
581 const char *filename
;
583 int column_width
= (output_width
- 1) / 3; /* Don't write in last col! */
585 /* Now, sort regular function name
586 alphabetically to create an index. */
587 name_sorted_syms
= (Sym
**) xmalloc ((symtab
.len
+ num_cycles
) * sizeof (Sym
*));
589 for (index
= 0, nnames
= 0; index
< symtab
.len
; index
++)
591 if (ignore_zeros
&& symtab
.base
[index
].ncalls
== 0
592 && symtab
.base
[index
].hist
.time
== 0)
595 name_sorted_syms
[nnames
++] = &symtab
.base
[index
];
598 qsort (name_sorted_syms
, nnames
, sizeof (Sym
*), cmp_name
);
600 for (index
= 1, todo
= nnames
; index
<= num_cycles
; index
++)
601 name_sorted_syms
[todo
++] = &cycle_header
[index
];
604 printf (_("Index by function name\n\n"));
605 index
= (todo
+ 2) / 3;
607 for (i
= 0; i
< index
; i
++)
612 for (j
= i
; j
< todo
; j
+= index
)
614 sym
= name_sorted_syms
[j
];
616 if (sym
->cg
.print_flag
)
617 sprintf (buf
, "[%d]", sym
->cg
.index
);
619 sprintf (buf
, "(%d)", sym
->cg
.index
);
623 if (bsd_style_output
)
625 printf ("%6.6s %-19.19s", buf
, sym
->name
);
631 for (; col
< starting_col
+ 5; ++col
)
634 printf (" %s ", buf
);
635 col
+= print_name_only (sym
);
637 if (!line_granularity
&& sym
->is_static
&& sym
->file
)
639 filename
= sym
->file
->name
;
643 filename
= strrchr (filename
, '/');
648 filename
= sym
->file
->name
;
651 printf (" (%s)", filename
);
652 col
+= strlen (filename
) + 3;
658 if (bsd_style_output
)
660 printf ("%6.6s ", buf
);
661 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
662 printf ("%-19.19s", buf
);
667 for (; col
< starting_col
+ 5; ++col
)
669 printf (" %s ", buf
);
670 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
676 starting_col
+= column_width
;
682 free (name_sorted_syms
);
685 /* Compare two arcs based on their usage counts.
686 We want to sort in descending order. */
689 cmp_arc_count (left
, right
)
693 const Arc
**npp1
= (const Arc
**) left
;
694 const Arc
**npp2
= (const Arc
**) right
;
696 if ((*npp1
)->count
> (*npp2
)->count
)
698 else if ((*npp1
)->count
< (*npp2
)->count
)
704 /* Compare two funtions based on their usage counts.
705 We want to sort in descending order. */
708 cmp_fun_nuses (left
, right
)
712 const Sym
**npp1
= (const Sym
**) left
;
713 const Sym
**npp2
= (const Sym
**) right
;
715 if ((*npp1
)->nuses
> (*npp2
)->nuses
)
717 else if ((*npp1
)->nuses
< (*npp2
)->nuses
)
723 /* Print a suggested function ordering based on the profiling data.
725 We perform 4 major steps when ordering functions:
727 * Group unused functions together and place them at the
728 end of the function order.
730 * Search the highest use arcs (those which account for 90% of
731 the total arc count) for functions which have several parents.
733 Group those with the most call sites together (currently the
734 top 1.25% which have at least five different call sites).
736 These are emitted at the start of the function order.
738 * Use a greedy placement algorithm to place functions which
739 occur in the top 99% of the arcs in the profile. Some provisions
740 are made to handle high usage arcs where the parent and/or
741 child has already been placed.
743 * Run the same greedy placement algorithm on the remaining
744 arcs to place the leftover functions.
747 The various "magic numbers" should (one day) be tuneable by command
748 line options. They were arrived at by benchmarking a few applications
749 with various values to see which values produced better overall function
752 Of course, profiling errors, machine limitations (PA long calls), and
753 poor cutoff values for the placement algorithm may limit the usefullness
754 of the resulting function order. Improvements would be greatly appreciated.
758 * Place the functions with many callers near the middle of the
759 list to reduce long calls.
761 * Propagate arc usage changes as functions are placed. Ie if
762 func1 and func2 are placed together, arcs to/from those arcs
763 to the same parent/child should be combined, then resort the
764 arcs to choose the next one.
766 * Implement some global positioning algorithm to place the
767 chains made by the greedy local positioning algorithm. Probably
768 by examining arcs which haven't been placed yet to tie two
771 * Take a function's size and time into account in the algorithm;
772 size in particular is important on the PA (long calls). Placing
773 many small functions onto their own page may be wise.
775 * Use better profiling information; many published algorithms
776 are based on call sequences through time, rather than just
779 * Prodecure cloning could improve performance when a small number
780 of arcs account for most of the calls to a particular function.
782 * Use relocation information to avoid moving unused functions
783 completely out of the code stream; this would avoid severe lossage
784 when the profile data bears little resemblance to actual runs.
786 * Propagation of arc usages should also improve .o link line
787 ordering which shares the same arc placement algorithm with
788 the function ordering code (in fact it is a degenerate case
789 of function ordering). */
792 cg_print_function_ordering ()
794 unsigned long index
, used
, unused
, scratch_index
;
795 unsigned long unplaced_arc_count
, high_arc_count
, scratch_arc_count
;
797 unsigned long long total_arcs
, tmp_arcs_count
;
799 unsigned long total_arcs
, tmp_arcs_count
;
801 Sym
**unused_syms
, **used_syms
, **scratch_syms
;
802 Arc
**unplaced_arcs
, **high_arcs
, **scratch_arcs
;
808 unplaced_arc_count
= 0;
810 scratch_arc_count
= 0;
812 /* First group all the unused functions together. */
813 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
814 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
815 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
816 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
817 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
818 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
820 /* Walk through all the functions; mark those which are never
821 called as placed (we'll emit them as a group later). */
822 for (index
= 0, used
= 0, unused
= 0; index
< symtab
.len
; index
++)
824 if (symtab
.base
[index
].ncalls
== 0)
826 /* Filter out gprof generated names. */
827 if (strcmp (symtab
.base
[index
].name
, "<locore>")
828 && strcmp (symtab
.base
[index
].name
, "<hicore>"))
830 unused_syms
[unused
++] = &symtab
.base
[index
];
831 symtab
.base
[index
].has_been_placed
= 1;
836 used_syms
[used
++] = &symtab
.base
[index
];
837 symtab
.base
[index
].has_been_placed
= 0;
838 symtab
.base
[index
].next
= 0;
839 symtab
.base
[index
].prev
= 0;
840 symtab
.base
[index
].nuses
= 0;
844 /* Sort the arcs from most used to least used. */
845 qsort (arcs
, numarcs
, sizeof (Arc
*), cmp_arc_count
);
847 /* Compute the total arc count. Also mark arcs as unplaced.
849 Note we don't compensate for overflow if that happens!
850 Overflow is much less likely when this file is compiled
851 with GCC as it can double-wide integers via long long. */
853 for (index
= 0; index
< numarcs
; index
++)
855 total_arcs
+= arcs
[index
]->count
;
856 arcs
[index
]->has_been_placed
= 0;
859 /* We want to pull out those functions which are referenced
860 by many highly used arcs and emit them as a group. This
861 could probably use some tuning. */
863 for (index
= 0; index
< numarcs
; index
++)
865 tmp_arcs_count
+= arcs
[index
]->count
;
867 /* Count how many times each parent and child are used up
868 to our threshhold of arcs (90%). */
869 if ((double)tmp_arcs_count
/ (double)total_arcs
> 0.90)
872 arcs
[index
]->child
->nuses
++;
875 /* Now sort a temporary symbol table based on the number of
876 times each function was used in the highest used arcs. */
877 memcpy (scratch_syms
, used_syms
, used
* sizeof (Sym
*));
878 qsort (scratch_syms
, used
, sizeof (Sym
*), cmp_fun_nuses
);
880 /* Now pick out those symbols we're going to emit as
881 a group. We take up to 1.25% of the used symbols. */
882 for (index
= 0; index
< used
/ 80; index
++)
884 Sym
*sym
= scratch_syms
[index
];
887 /* If we hit symbols that aren't used from many call sites,
888 then we can quit. We choose five as the low limit for
889 no particular reason. */
893 /* We're going to need the arcs between these functions.
894 Unfortunately, we don't know all these functions
895 until we're done. So we keep track of all the arcs
896 to the functions we care about, then prune out those
897 which are uninteresting.
899 An interesting variation would be to quit when we found
900 multi-call site functions which account for some percentage
902 arc
= sym
->cg
.children
;
906 if (arc
->parent
!= arc
->child
)
907 scratch_arcs
[scratch_arc_count
++] = arc
;
908 arc
->has_been_placed
= 1;
909 arc
= arc
->next_child
;
912 arc
= sym
->cg
.parents
;
916 if (arc
->parent
!= arc
->child
)
917 scratch_arcs
[scratch_arc_count
++] = arc
;
918 arc
->has_been_placed
= 1;
919 arc
= arc
->next_parent
;
922 /* Keep track of how many symbols we're going to place. */
923 scratch_index
= index
;
925 /* A lie, but it makes identifying
926 these functions easier later. */
927 sym
->has_been_placed
= 1;
930 /* Now walk through the temporary arcs and copy
931 those we care about into the high arcs array. */
932 for (index
= 0; index
< scratch_arc_count
; index
++)
934 Arc
*arc
= scratch_arcs
[index
];
936 /* If this arc refers to highly used functions, then
937 then we want to keep it. */
938 if (arc
->child
->has_been_placed
939 && arc
->parent
->has_been_placed
)
941 high_arcs
[high_arc_count
++] = scratch_arcs
[index
];
943 /* We need to turn of has_been_placed since we're going to
944 use the main arc placement algorithm on these arcs. */
945 arc
->child
->has_been_placed
= 0;
946 arc
->parent
->has_been_placed
= 0;
950 /* Dump the multi-site high usage functions which are not
951 going to be ordered by the main ordering algorithm. */
952 for (index
= 0; index
< scratch_index
; index
++)
954 if (scratch_syms
[index
]->has_been_placed
)
955 printf ("%s\n", scratch_syms
[index
]->name
);
958 /* Now we can order the multi-site high use
959 functions based on the arcs between them. */
960 qsort (high_arcs
, high_arc_count
, sizeof (Arc
*), cmp_arc_count
);
961 order_and_dump_functions_by_arcs (high_arcs
, high_arc_count
, 1,
962 unplaced_arcs
, &unplaced_arc_count
);
964 /* Order and dump the high use functions left,
965 these typically have only a few call sites. */
966 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
967 unplaced_arcs
, &unplaced_arc_count
);
969 /* Now place the rarely used functions. */
970 order_and_dump_functions_by_arcs (unplaced_arcs
, unplaced_arc_count
, 1,
971 scratch_arcs
, &scratch_arc_count
);
973 /* Output any functions not emitted by the order_and_dump calls. */
974 for (index
= 0; index
< used
; index
++)
975 if (used_syms
[index
]->has_been_placed
== 0)
976 printf("%s\n", used_syms
[index
]->name
);
978 /* Output the unused functions. */
979 for (index
= 0; index
< unused
; index
++)
980 printf("%s\n", unused_syms
[index
]->name
);
982 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
983 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
984 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
985 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
986 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
987 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
994 free (unplaced_arcs
);
997 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
998 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
1000 If ALL is nonzero, then place all functions referenced by THE_ARCS,
1001 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
1005 order_and_dump_functions_by_arcs (the_arcs
, arc_count
, all
,
1006 unplaced_arcs
, unplaced_arc_count
)
1008 unsigned long arc_count
;
1010 Arc
**unplaced_arcs
;
1011 unsigned long *unplaced_arc_count
;
1014 unsigned long long tmp_arcs
, total_arcs
;
1016 unsigned long tmp_arcs
, total_arcs
;
1020 /* If needed, compute the total arc count.
1022 Note we don't compensate for overflow if that happens! */
1026 for (index
= 0; index
< arc_count
; index
++)
1027 total_arcs
+= the_arcs
[index
]->count
;
1034 for (index
= 0; index
< arc_count
; index
++)
1037 Sym
*child
, *parent
;
1039 tmp_arcs
+= the_arcs
[index
]->count
;
1041 /* Ignore this arc if it's already been placed. */
1042 if (the_arcs
[index
]->has_been_placed
)
1045 child
= the_arcs
[index
]->child
;
1046 parent
= the_arcs
[index
]->parent
;
1048 /* If we're not using all arcs, and this is a rarely used
1049 arc, then put it on the unplaced_arc list. Similarly
1050 if both the parent and child of this arc have been placed. */
1051 if ((! all
&& (double)tmp_arcs
/ (double)total_arcs
> MOST
)
1052 || child
->has_been_placed
|| parent
->has_been_placed
)
1054 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1058 /* If all slots in the parent and child are full, then there isn't
1059 anything we can do right now. We'll place this arc on the
1060 unplaced arc list in the hope that a global positioning
1061 algorithm can use it to place function chains. */
1062 if (parent
->next
&& parent
->prev
&& child
->next
&& child
->prev
)
1064 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1068 /* If the parent is unattached, then find the closest
1069 place to attach it onto child's chain. Similarly
1070 for the opposite case. */
1071 if (!parent
->next
&& !parent
->prev
)
1078 /* Walk to the beginning and end of the child's chain. */
1091 /* Choose the closest. */
1092 child
= next_count
< prev_count
? next
: prev
;
1094 else if (! child
->next
&& !child
->prev
)
1113 parent
= prev_count
< next_count
? prev
: next
;
1117 /* Couldn't find anywhere to attach the functions,
1118 put the arc on the unplaced arc list. */
1119 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1123 /* Make sure we don't tie two ends together. */
1143 /* This would tie two ends together. */
1144 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[index
];
1150 /* Must attach to the parent's prev field. */
1153 /* parent-prev and child-next */
1154 parent
->prev
= child
;
1155 child
->next
= parent
;
1156 the_arcs
[index
]->has_been_placed
= 1;
1159 else if (parent
->prev
)
1161 /* Must attach to the parent's next field. */
1164 /* parent-next and child-prev */
1165 parent
->next
= child
;
1166 child
->prev
= parent
;
1167 the_arcs
[index
]->has_been_placed
= 1;
1172 /* Can attach to either field in the parent, depends
1173 on where we've got space in the child. */
1176 /* parent-prev and child-next. */
1177 parent
->prev
= child
;
1178 child
->next
= parent
;
1179 the_arcs
[index
]->has_been_placed
= 1;
1183 /* parent-next and child-prev. */
1184 parent
->next
= child
;
1185 child
->prev
= parent
;
1186 the_arcs
[index
]->has_been_placed
= 1;
1191 /* Dump the chains of functions we've made. */
1192 for (index
= 0; index
< arc_count
; index
++)
1195 if (the_arcs
[index
]->parent
->has_been_placed
1196 || the_arcs
[index
]->child
->has_been_placed
)
1199 sym
= the_arcs
[index
]->parent
;
1201 /* If this symbol isn't attached to any other
1202 symbols, then we've got a rarely used arc.
1204 Skip it for now, we'll deal with them later. */
1205 if (sym
->next
== NULL
1206 && sym
->prev
== NULL
)
1209 /* Get to the start of this chain. */
1215 /* Mark it as placed. */
1216 sym
->has_been_placed
= 1;
1217 printf ("%s\n", sym
->name
);
1222 /* If we want to place all the arcs, then output
1223 those which weren't placed by the main algorithm. */
1225 for (index
= 0; index
< arc_count
; index
++)
1228 if (the_arcs
[index
]->parent
->has_been_placed
1229 || the_arcs
[index
]->child
->has_been_placed
)
1232 sym
= the_arcs
[index
]->parent
;
1234 sym
->has_been_placed
= 1;
1235 printf ("%s\n", sym
->name
);
1239 /* Print a suggested .o ordering for files on a link line based
1240 on profiling information. This uses the function placement
1241 code for the bulk of its work. */
1244 cg_print_file_ordering ()
1246 unsigned long scratch_arc_count
, index
;
1250 scratch_arc_count
= 0;
1252 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
1253 for (index
= 0; index
< numarcs
; index
++)
1255 if (! arcs
[index
]->parent
->mapped
1256 || ! arcs
[index
]->child
->mapped
)
1257 arcs
[index
]->has_been_placed
= 1;
1260 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
1261 scratch_arcs
, &scratch_arc_count
);
1263 /* Output .o's not handled by the main placement algorithm. */
1264 for (index
= 0; index
< symtab
.len
; index
++)
1266 if (symtab
.base
[index
].mapped
1267 && ! symtab
.base
[index
].has_been_placed
)
1268 printf ("%s\n", symtab
.base
[index
].name
);
1271 /* Now output any .o's that didn't have any text symbols. */
1273 for (index
= 0; index
< symbol_map_count
; index
++)
1275 unsigned int index2
;
1277 /* Don't bother searching if this symbol
1278 is the same as the previous one. */
1279 if (last
&& !strcmp (last
, symbol_map
[index
].file_name
))
1282 for (index2
= 0; index2
< symtab
.len
; index2
++)
1284 if (! symtab
.base
[index2
].mapped
)
1287 if (!strcmp (symtab
.base
[index2
].name
, symbol_map
[index
].file_name
))
1291 /* If we didn't find it in the symbol table, then it must
1292 be a .o with no text symbols. Output it last. */
1293 if (index2
== symtab
.len
)
1294 printf ("%s\n", symbol_map
[index
].file_name
);
1295 last
= symbol_map
[index
].file_name
;