8 * Return value of comparison functions used to sort tables:
14 static void order_and_dump_functions_by_arcs
PARAMS ((Arc
**, unsigned long,
17 /* declarations of automatically generated functions to output blurbs: */
18 extern void bsd_callg_blurb
PARAMS ((FILE * fp
));
19 extern void fsf_callg_blurb
PARAMS ((FILE * fp
));
21 double print_time
= 0.0;
25 DEFUN_VOID (print_header
)
35 if (!bsd_style_output
)
37 if (print_descriptions
)
39 printf ("\t\t Call graph (explanation follows)\n\n");
43 printf ("\t\t\tCall graph\n\n");
46 printf ("\ngranularity: each sample hit covers %ld byte(s)",
47 (long) hist_scale
* sizeof (UNIT
));
50 printf (" for %.2f%% of %.2f seconds\n\n",
51 100.0 / print_time
, print_time
/ hz
);
55 printf (" no time propagated\n\n");
57 * This doesn't hurt, since all the numerators will be 0.0:
63 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
64 "", "", "", "", "called", "total", "parents");
65 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
66 "index", "%time", "self", "descendents",
67 "called", "self", "name", "index");
68 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
69 "", "", "", "", "called", "total", "children");
74 printf ("index %% time self children called name\n");
80 * Print a cycle header.
83 DEFUN (print_cycle
, (cyc
), Sym
* cyc
)
87 sprintf (buf
, "[%d]", cyc
->cg
.index
);
88 printf (bsd_style_output
89 ? "%-6.6s %5.1f %7.2f %11.2f %7d"
90 : "%-6.6s %5.1f %7.2f %7.2f %7d", buf
,
91 100 * (cyc
->cg
.prop
.self
+ cyc
->cg
.prop
.child
) / print_time
,
92 cyc
->cg
.prop
.self
/ hz
, cyc
->cg
.prop
.child
/ hz
, cyc
->ncalls
);
93 if (cyc
->cg
.self_calls
!= 0)
95 printf ("+%-7d", cyc
->cg
.self_calls
);
99 printf (" %7.7s", "");
101 printf (" <cycle %d as a whole> [%d]\n", cyc
->cg
.cyc
.num
, cyc
->cg
.index
);
106 * Compare LEFT and RIGHT membmer. Major comparison key is
107 * CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.
110 DEFUN (cmp_member
, (left
, right
), Sym
* left AND Sym
* right
)
112 double left_time
= left
->cg
.prop
.self
+ left
->cg
.prop
.child
;
113 double right_time
= right
->cg
.prop
.self
+ right
->cg
.prop
.child
;
114 long left_calls
= left
->ncalls
+ left
->cg
.self_calls
;
115 long right_calls
= right
->ncalls
+ right
->cg
.self_calls
;
117 if (left_time
> right_time
)
121 if (left_time
< right_time
)
126 if (left_calls
> right_calls
)
130 if (left_calls
< right_calls
)
139 * Sort members of a cycle.
142 DEFUN (sort_members
, (cyc
), Sym
* cyc
)
144 Sym
*todo
, *doing
, *prev
;
146 * Detach cycle members from cyclehead, and insertion sort them
149 todo
= cyc
->cg
.cyc
.next
;
150 cyc
->cg
.cyc
.next
= 0;
151 for (doing
= todo
; doing
&& doing
->cg
.cyc
.next
; doing
= todo
)
153 todo
= doing
->cg
.cyc
.next
;
154 for (prev
= cyc
; prev
->cg
.cyc
.next
; prev
= prev
->cg
.cyc
.next
)
156 if (cmp_member (doing
, prev
->cg
.cyc
.next
) == GREATERTHAN
)
161 doing
->cg
.cyc
.next
= prev
->cg
.cyc
.next
;
162 prev
->cg
.cyc
.next
= doing
;
168 * Print the members of a cycle.
171 DEFUN (print_members
, (cyc
), Sym
* cyc
)
176 for (member
= cyc
->cg
.cyc
.next
; member
; member
= member
->cg
.cyc
.next
)
178 printf (bsd_style_output
179 ? "%6.6s %5.5s %7.2f %11.2f %7d"
180 : "%6.6s %5.5s %7.2f %7.2f %7d",
181 "", "", member
->cg
.prop
.self
/ hz
, member
->cg
.prop
.child
/ hz
,
183 if (member
->cg
.self_calls
!= 0)
185 printf ("+%-7d", member
->cg
.self_calls
);
189 printf (" %7.7s", "");
199 * Compare two arcs to/from the same child/parent.
200 * - if one arc is a self arc, it's least.
201 * - if one arc is within a cycle, it's less than.
202 * - if both arcs are within a cycle, compare arc counts.
203 * - if neither arc is within a cycle, compare with
204 * time + child_time as major key
205 * arc count as minor key
208 DEFUN (cmp_arc
, (left
, right
), Arc
* left AND Arc
* right
)
210 Sym
*left_parent
= left
->parent
;
211 Sym
*left_child
= left
->child
;
212 Sym
*right_parent
= right
->parent
;
213 Sym
*right_child
= right
->child
;
214 double left_time
, right_time
;
217 printf ("[cmp_arc] ");
218 print_name (left_parent
);
220 print_name (left_child
);
221 printf (" %f + %f %d/%d\n", left
->time
, left
->child_time
,
222 left
->count
, left_child
->ncalls
);
223 printf ("[cmp_arc] ");
224 print_name (right_parent
);
226 print_name (right_child
);
227 printf (" %f + %f %d/%d\n", right
->time
, right
->child_time
,
228 right
->count
, right_child
->ncalls
);
231 if (left_parent
== left_child
)
233 return LESSTHAN
; /* left is a self call */
235 if (right_parent
== right_child
)
237 return GREATERTHAN
; /* right is a self call */
240 if (left_parent
->cg
.cyc
.num
!= 0 && left_child
->cg
.cyc
.num
!= 0
241 && left_parent
->cg
.cyc
.num
== left_child
->cg
.cyc
.num
)
243 /* left is a call within a cycle */
244 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
245 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
247 /* right is a call within the cycle, too */
248 if (left
->count
< right
->count
)
252 if (left
->count
> right
->count
)
260 /* right isn't a call within the cycle */
266 /* left isn't a call within a cycle */
267 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
268 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
270 /* right is a call within a cycle */
275 /* neither is a call within a cycle */
276 left_time
= left
->time
+ left
->child_time
;
277 right_time
= right
->time
+ right
->child_time
;
278 if (left_time
< right_time
)
282 if (left_time
> right_time
)
286 if (left
->count
< right
->count
)
290 if (left
->count
> right
->count
)
301 DEFUN (sort_parents
, (child
), Sym
* child
)
303 Arc
*arc
, *detached
, sorted
, *prev
;
306 * Unlink parents from child, then insertion sort back on to
308 * *arc the arc you have detached and are inserting.
309 * *detached the rest of the arcs to be sorted.
310 * sorted arc list onto which you insertion sort.
311 * *prev arc before the arc you are comparing.
313 sorted
.next_parent
= 0;
314 for (arc
= child
->cg
.parents
; arc
; arc
= detached
)
316 detached
= arc
->next_parent
;
318 /* consider *arc as disconnected; insert it into sorted: */
319 for (prev
= &sorted
; prev
->next_parent
; prev
= prev
->next_parent
)
321 if (cmp_arc (arc
, prev
->next_parent
) != GREATERTHAN
)
326 arc
->next_parent
= prev
->next_parent
;
327 prev
->next_parent
= arc
;
330 /* reattach sorted arcs to child: */
331 child
->cg
.parents
= sorted
.next_parent
;
336 DEFUN (print_parents
, (child
), Sym
* child
)
342 if (child
->cg
.cyc
.head
!= 0)
344 cycle_head
= child
->cg
.cyc
.head
;
350 if (!child
->cg
.parents
)
352 printf (bsd_style_output
353 ? "%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n"
354 : "%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n",
355 "", "", "", "", "", "");
358 sort_parents (child
);
359 for (arc
= child
->cg
.parents
; arc
; arc
= arc
->next_parent
)
361 parent
= arc
->parent
;
362 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
363 && parent
->cg
.cyc
.num
== child
->cg
.cyc
.num
))
365 /* selfcall or call among siblings: */
366 printf (bsd_style_output
367 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
368 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
376 /* regular parent of child: */
377 printf (bsd_style_output
378 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
379 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
381 arc
->time
/ hz
, arc
->child_time
/ hz
,
382 arc
->count
, cycle_head
->ncalls
);
391 DEFUN (sort_children
, (parent
), Sym
* parent
)
393 Arc
*arc
, *detached
, sorted
, *prev
;
395 * Unlink children from parent, then insertion sort back on to
397 * *arc the arc you have detached and are inserting.
398 * *detached the rest of the arcs to be sorted.
399 * sorted arc list onto which you insertion sort.
400 * *prev arc before the arc you are comparing.
402 sorted
.next_child
= 0;
403 for (arc
= parent
->cg
.children
; arc
; arc
= detached
)
405 detached
= arc
->next_child
;
407 /* consider *arc as disconnected; insert it into sorted: */
408 for (prev
= &sorted
; prev
->next_child
; prev
= prev
->next_child
)
410 if (cmp_arc (arc
, prev
->next_child
) != LESSTHAN
)
415 arc
->next_child
= prev
->next_child
;
416 prev
->next_child
= arc
;
419 /* reattach sorted children to parent: */
420 parent
->cg
.children
= sorted
.next_child
;
425 DEFUN (print_children
, (parent
), Sym
* parent
)
430 sort_children (parent
);
431 arc
= parent
->cg
.children
;
432 for (arc
= parent
->cg
.children
; arc
; arc
= arc
->next_child
)
435 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
436 && child
->cg
.cyc
.num
== parent
->cg
.cyc
.num
))
438 /* self call or call to sibling: */
439 printf (bsd_style_output
440 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
441 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
442 "", "", "", "", arc
->count
, "");
448 /* regular child of parent: */
449 printf (bsd_style_output
450 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
451 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
453 arc
->time
/ hz
, arc
->child_time
/ hz
,
454 arc
->count
, child
->cg
.cyc
.head
->ncalls
);
463 DEFUN (print_line
, (np
), Sym
* np
)
467 sprintf (buf
, "[%d]", np
->cg
.index
);
468 printf (bsd_style_output
469 ? "%-6.6s %5.1f %7.2f %11.2f"
470 : "%-6.6s %5.1f %7.2f %7.2f", buf
,
471 100 * (np
->cg
.prop
.self
+ np
->cg
.prop
.child
) / print_time
,
472 np
->cg
.prop
.self
/ hz
, np
->cg
.prop
.child
/ hz
);
473 if ((np
->ncalls
+ np
->cg
.self_calls
) != 0)
475 printf (" %7d", np
->ncalls
);
476 if (np
->cg
.self_calls
!= 0)
478 printf ("+%-7d ", np
->cg
.self_calls
);
482 printf (" %7.7s ", "");
487 printf (" %7.7s %7.7s ", "", "");
495 * Print dynamic call graph.
498 DEFUN (cg_print
, (timesortsym
), Sym
** timesortsym
)
503 if (print_descriptions
&& bsd_style_output
)
505 bsd_callg_blurb (stdout
);
510 for (index
= 0; index
< symtab
.len
+ num_cycles
; ++index
)
512 parent
= timesortsym
[index
];
513 if ((ignore_zeros
&& parent
->ncalls
== 0
514 && parent
->cg
.self_calls
== 0 && parent
->cg
.prop
.self
== 0
515 && parent
->cg
.prop
.child
== 0)
516 || !parent
->cg
.print_flag
)
520 if (!parent
->name
&& parent
->cg
.cyc
.num
!= 0)
523 print_cycle (parent
);
524 print_members (parent
);
528 print_parents (parent
);
530 print_children (parent
);
532 if (bsd_style_output
)
534 printf ("-----------------------------------------------\n");
535 if (bsd_style_output
)
539 if (print_descriptions
&& !bsd_style_output
)
541 fsf_callg_blurb (stdout
);
547 DEFUN (cmp_name
, (left
, right
), const PTR left AND
const PTR right
)
549 const Sym
**npp1
= (const Sym
**) left
;
550 const Sym
**npp2
= (const Sym
**) right
;
552 return strcmp ((*npp1
)->name
, (*npp2
)->name
);
557 DEFUN_VOID (cg_print_index
)
559 int index
, nnames
, todo
, i
, j
, col
, starting_col
;
560 Sym
**name_sorted_syms
, *sym
;
561 const char *filename
;
563 int column_width
= (output_width
- 1) / 3; /* don't write in last col! */
565 * Now, sort regular function name alphabetically to create an
568 name_sorted_syms
= (Sym
**) xmalloc ((symtab
.len
+ num_cycles
) * sizeof (Sym
*));
569 for (index
= 0, nnames
= 0; index
< symtab
.len
; index
++)
571 if (ignore_zeros
&& symtab
.base
[index
].ncalls
== 0
572 && symtab
.base
[index
].hist
.time
== 0)
576 name_sorted_syms
[nnames
++] = &symtab
.base
[index
];
578 qsort (name_sorted_syms
, nnames
, sizeof (Sym
*), cmp_name
);
579 for (index
= 1, todo
= nnames
; index
<= num_cycles
; index
++)
581 name_sorted_syms
[todo
++] = &cycle_header
[index
];
583 printf ("\f\nIndex by function name\n\n");
584 index
= (todo
+ 2) / 3;
585 for (i
= 0; i
< index
; i
++)
589 for (j
= i
; j
< todo
; j
+= index
)
591 sym
= name_sorted_syms
[j
];
592 if (sym
->cg
.print_flag
)
594 sprintf (buf
, "[%d]", sym
->cg
.index
);
598 sprintf (buf
, "(%d)", sym
->cg
.index
);
602 if (bsd_style_output
)
604 printf ("%6.6s %-19.19s", buf
, sym
->name
);
609 for (; col
< starting_col
+ 5; ++col
)
613 printf (" %s ", buf
);
614 col
+= print_name_only (sym
);
615 if (!line_granularity
&& sym
->is_static
&& sym
->file
)
617 filename
= sym
->file
->name
;
620 filename
= strrchr (filename
, '/');
627 filename
= sym
->file
->name
;
630 printf (" (%s)", filename
);
631 col
+= strlen (filename
) + 3;
637 if (bsd_style_output
)
639 printf ("%6.6s ", buf
);
640 sprintf (buf
, "<cycle %d>", sym
->cg
.cyc
.num
);
641 printf ("%-19.19s", buf
);
646 for (; col
< starting_col
+ 5; ++col
)
648 printf (" %s ", buf
);
649 sprintf (buf
, "<cycle %d>", sym
->cg
.cyc
.num
);
654 starting_col
+= column_width
;
658 free (name_sorted_syms
);
661 /* Compare two arcs based on their usage counts. We want to sort
662 in descending order. */
664 DEFUN (cmp_arc_count
, (left
, right
), const PTR left AND
const PTR right
)
666 const Arc
**npp1
= (const Arc
**) left
;
667 const Arc
**npp2
= (const Arc
**) right
;
669 if ((*npp1
)->count
> (*npp2
)->count
)
671 else if ((*npp1
)->count
< (*npp2
)->count
)
677 /* Compare two funtions based on their usage counts. We want to sort
678 in descending order. */
680 DEFUN (cmp_fun_nuses
, (left
, right
), const PTR left AND
const PTR right
)
682 const Sym
**npp1
= (const Sym
**) left
;
683 const Sym
**npp2
= (const Sym
**) right
;
685 if ((*npp1
)->nuses
> (*npp2
)->nuses
)
687 else if ((*npp1
)->nuses
< (*npp2
)->nuses
)
693 /* Print a suggested function ordering based on the profiling data.
695 We perform 4 major steps when ordering functions:
697 * Group unused functions together and place them at the
698 end of the function order.
700 * Search the highest use arcs (those which account for 90% of
701 the total arc count) for functions which have several parents.
703 Group those with the most call sites together (currently the
704 top 1.25% which have at least five different call sites).
706 These are emitted at the start of the function order.
708 * Use a greedy placement algorithm to place functions which
709 occur in the top 99% of the arcs in the profile. Some provisions
710 are made to handle high usage arcs where the parent and/or
711 child has already been placed.
713 * Run the same greedy placement algorithm on the remaining
714 arcs to place the leftover functions.
717 The various "magic numbers" should (one day) be tuneable by command
718 line options. They were arrived at by benchmarking a few applications
719 with various values to see which values produced better overall function
722 Of course, profiling errors, machine limitations (PA long calls), and
723 poor cutoff values for the placement algorithm may limit the usefullness
724 of the resulting function order. Improvements would be greatly appreciated.
728 * Place the functions with many callers near the middle of the
729 list to reduce long calls.
731 * Propagate arc usage changes as functions are placed. Ie if
732 func1 and func2 are placed together, arcs to/from those arcs
733 to the same parent/child should be combined, then resort the
734 arcs to choose the next one.
736 * Implement some global positioning algorithm to place the
737 chains made by the greedy local positioning algorithm. Probably
738 by examining arcs which haven't been placed yet to tie two
741 * Take a function's size and time into account in the algorithm;
742 size in particular is important on the PA (long calls). Placing
743 many small functions onto their own page may be wise.
745 * Use better profiling information; many published algorithms
746 are based on call sequences through time, rather than just
749 * Prodecure cloning could improve performance when a small number
750 of arcs account for most of the calls to a particular function.
752 * Use relocation information to avoid moving unused functions
753 completely out of the code stream; this would avoid severe lossage
754 when the profile data bears little resemblance to actual runs.
756 * Propagation of arc usages should also improve .o link line
757 ordering which shares the same arc placement algorithm with
758 the function ordering code (in fact it is a degenerate case
759 of function ordering). */
762 DEFUN_VOID (cg_print_function_ordering
)
764 unsigned long index
, used
, unused
, scratch_index
;
765 unsigned long unplaced_arc_count
, high_arc_count
, scratch_arc_count
;
767 unsigned long long total_arcs
, tmp_arcs_count
;
769 unsigned long total_arcs
, tmp_arcs_count
;
771 Sym
**unused_syms
, **used_syms
, **scratch_syms
;
772 Arc
**unplaced_arcs
, **high_arcs
, **scratch_arcs
;
778 unplaced_arc_count
= 0;
780 scratch_arc_count
= 0;
782 /* First group all the unused functions together. */
783 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
784 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
785 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
786 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
787 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
788 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
790 /* Walk through all the functions; mark those which are never
791 called as placed (we'll emit them as a group later). */
792 for (index
= 0, used
= 0, unused
= 0; index
< symtab
.len
; index
++)
794 if (symtab
.base
[index
].ncalls
== 0)
796 /* Filter out gprof generated names. */
797 if (strcmp (symtab
.base
[index
].name
, "<locore>")
798 && strcmp (symtab
.base
[index
].name
, "<hicore>"))
800 unused_syms
[unused
++] = &symtab
.base
[index
];
801 symtab
.base
[index
].has_been_placed
= 1;
806 used_syms
[used
++] = &symtab
.base
[index
];
807 symtab
.base
[index
].has_been_placed
= 0;
808 symtab
.base
[index
].next
= 0;
809 symtab
.base
[index
].prev
= 0;
810 symtab
.base
[index
].nuses
= 0;
814 /* Sort the arcs from most used to least used. */
815 qsort (arcs
, numarcs
, sizeof (Arc
*), cmp_arc_count
);
817 /* Compute the total arc count. Also mark arcs as unplaced.
819 Note we don't compensate for overflow if that happens!
820 Overflow is much less likely when this file is compiled
821 with GCC as it can double-wide integers via long long. */
823 for (index
= 0; index
< numarcs
; index
++)
825 total_arcs
+= arcs
[index
]->count
;
826 arcs
[index
]->has_been_placed
= 0;
829 /* We want to pull out those functions which are referenced
830 by many highly used arcs and emit them as a group. This
831 could probably use some tuning. */
833 for (index
= 0; index
< numarcs
; index
++)
835 tmp_arcs_count
+= arcs
[index
]->count
;
837 /* Count how many times each parent and child are used up
838 to our threshhold of arcs (90%). */
839 if ((double)tmp_arcs_count
/ (double)total_arcs
> 0.90)
842 arcs
[index
]->child
->nuses
++;
845 /* Now sort a temporary symbol table based on the number of
846 times each function was used in the highest used arcs. */
847 memcpy (scratch_syms
, used_syms
, used
* sizeof (Sym
*));
848 qsort (scratch_syms
, used
, sizeof (Sym
*), cmp_fun_nuses
);
850 /* Now pick out those symbols we're going to emit as
851 a group. We take up to 1.25% of the used symbols. */
852 for (index
= 0; index
< used
/ 80; index
++)
854 Sym
*sym
= scratch_syms
[index
];
857 /* If we hit symbols that aren't used from many call sites,
858 then we can quit. We choose five as the low limit for
859 no particular reason. */
863 /* We're going to need the arcs between these functions.
864 Unfortunately, we don't know all these functions
865 until we're done. So we keep track of all the arcs
866 to the functions we care about, then prune out those
867 which are uninteresting.
869 An interesting variation would be to quit when we found
870 multi-call site functions which account for some percentage
873 arc
= sym
->cg
.children
;
876 if (arc
->parent
!= arc
->child
)
877 scratch_arcs
[scratch_arc_count
++] = arc
;
878 arc
->has_been_placed
= 1;
879 arc
= arc
->next_child
;
882 arc
= sym
->cg
.parents
;
885 if (arc
->parent
!= arc
->child
)
886 scratch_arcs
[scratch_arc_count
++] = arc
;
887 arc
->has_been_placed
= 1;
888 arc
= arc
->next_parent
;
891 /* Keep track of how many symbols we're going to place. */
892 scratch_index
= index
;
894 /* A lie, but it makes identifying these functions easier
896 sym
->has_been_placed
= 1;
899 /* Now walk through the temporary arcs and copy those we care about
900 into the high arcs array. */
901 for (index
= 0; index
< scratch_arc_count
; index
++)
903 Arc
*arc
= scratch_arcs
[index
];
905 /* If this arc refers to highly used functions, then
906 then we want to keep it. */
907 if (arc
->child
->has_been_placed
908 && arc
->parent
->has_been_placed
)
910 high_arcs
[high_arc_count
++] = scratch_arcs
[index
];
912 /* We need to turn of has_been_placed since we're going to
913 use the main arc placement algorithm on these arcs. */
914 arc
->child
->has_been_placed
= 0;
915 arc
->parent
->has_been_placed
= 0;
919 /* Dump the multi-site high usage functions which are not going
920 to be ordered by the main ordering algorithm. */
921 for (index
= 0; index
< scratch_index
; index
++)
923 if (scratch_syms
[index
]->has_been_placed
)
924 printf ("%s\n", scratch_syms
[index
]->name
);
927 /* Now we can order the multi-site high use functions based on the
928 arcs between them. */
929 qsort (high_arcs
, high_arc_count
, sizeof (Arc
*), cmp_arc_count
);
930 order_and_dump_functions_by_arcs (high_arcs
, high_arc_count
, 1,
931 unplaced_arcs
, &unplaced_arc_count
);
933 /* Order and dump the high use functions left, these typically
934 have only a few call sites. */
935 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
936 unplaced_arcs
, &unplaced_arc_count
);
938 /* Now place the rarely used functions. */
939 order_and_dump_functions_by_arcs (unplaced_arcs
, unplaced_arc_count
, 1,
940 scratch_arcs
, &scratch_arc_count
);
942 /* Output any functions not emitted by the order_and_dump calls. */
943 for (index
= 0; index
< used
; index
++)
944 if (used_syms
[index
]->has_been_placed
== 0)
945 printf("%s\n", used_syms
[index
]->name
);
947 /* Output the unused functions. */
948 for (index
= 0; index
< unused
; index
++)
949 printf("%s\n", unused_syms
[index
]->name
);
951 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
952 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
953 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
954 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
955 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
956 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
963 free (unplaced_arcs
);
966 /* Place functions based on the arcs in ARCS with NUMARCS entries;
967 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
969 If ALL is nonzero, then place all functions referenced by ARCS,
970 else only place those referenced in the top 99% of the arcs in ARCS. */
974 order_and_dump_functions_by_arcs (arcs
, numarcs
, all
,
975 unplaced_arcs
, unplaced_arc_count
)
977 unsigned long numarcs
;
980 unsigned long *unplaced_arc_count
;
983 unsigned long long tmp_arcs
, total_arcs
;
985 unsigned long tmp_arcs
, total_arcs
;
989 /* If needed, compute the total arc count.
991 Note we don't compensate for overflow if that happens! */
995 for (index
= 0; index
< numarcs
; index
++)
996 total_arcs
+= arcs
[index
]->count
;
1002 for (index
= 0; index
< numarcs
; index
++)
1005 Sym
*child
, *parent
;
1007 tmp_arcs
+= arcs
[index
]->count
;
1009 /* Ignore this arc if it's already been placed. */
1010 if (arcs
[index
]->has_been_placed
)
1013 child
= arcs
[index
]->child
;
1014 parent
= arcs
[index
]->parent
;
1016 /* If we're not using all arcs, and this is a rarely used
1017 arc, then put it on the unplaced_arc list. Similarly
1018 if both the parent and child of this arc have been placed. */
1019 if ((! all
&& (double)tmp_arcs
/ (double)total_arcs
> MOST
)
1020 || child
->has_been_placed
|| parent
->has_been_placed
)
1022 unplaced_arcs
[(*unplaced_arc_count
)++] = arcs
[index
];
1026 /* If all slots in the parent and child are full, then there isn't
1027 anything we can do right now. We'll place this arc on the
1028 unplaced arc list in the hope that a global positioning
1029 algorithm can use it to place function chains. */
1030 if (parent
->next
&& parent
->prev
&& child
->next
&& child
->prev
)
1032 unplaced_arcs
[(*unplaced_arc_count
)++] = arcs
[index
];
1036 /* If the parent is unattached, then find the closest
1037 place to attach it onto child's chain. Similarly
1038 for the opposite case. */
1039 if (!parent
->next
&& !parent
->prev
)
1046 /* Walk to the beginning and end of the child's chain. */
1059 /* Choose the closest. */
1060 child
= next_count
< prev_count
? next
: prev
;
1062 else if (! child
->next
&& !child
->prev
)
1081 parent
= prev_count
< next_count
? prev
: next
;
1085 /* Couldn't find anywhere to attach the functions,
1086 put the arc on the unplaced arc list. */
1087 unplaced_arcs
[(*unplaced_arc_count
)++] = arcs
[index
];
1091 /* Make sure we don't tie two ends together. */
1111 /* This would tie two ends together. */
1112 unplaced_arcs
[(*unplaced_arc_count
)++] = arcs
[index
];
1118 /* Must attach to the parent's prev field. */
1121 /* parent-prev and child-next */
1122 parent
->prev
= child
;
1123 child
->next
= parent
;
1124 arcs
[index
]->has_been_placed
= 1;
1127 else if (parent
->prev
)
1129 /* Must attach to the parent's next field. */
1132 /* parent-next and child-prev */
1133 parent
->next
= child
;
1134 child
->prev
= parent
;
1135 arcs
[index
]->has_been_placed
= 1;
1140 /* Can attach to either field in the parent, depends
1141 on where we've got space in the child. */
1144 /* parent-prev and child-next */
1145 parent
->prev
= child
;
1146 child
->next
= parent
;
1147 arcs
[index
]->has_been_placed
= 1;
1151 /* parent-next and child-prev */
1152 parent
->next
= child
;
1153 child
->prev
= parent
;
1154 arcs
[index
]->has_been_placed
= 1;
1159 /* Dump the chains of functions we've made. */
1160 for (index
= 0; index
< numarcs
; index
++)
1163 if (arcs
[index
]->parent
->has_been_placed
1164 || arcs
[index
]->child
->has_been_placed
)
1167 sym
= arcs
[index
]->parent
;
1169 /* If this symbol isn't attached to any other
1170 symbols, then we've got a rarely used arc.
1172 Skip it for now, we'll deal with them later. */
1173 if (sym
->next
== NULL
1174 && sym
->prev
== NULL
)
1177 /* Get to the start of this chain. */
1183 /* Mark it as placed. */
1184 sym
->has_been_placed
= 1;
1185 printf ("%s\n", sym
->name
);
1190 /* If we want to place all the arcs, then output those which weren't
1191 placed by the main algorithm. */
1193 for (index
= 0; index
< numarcs
; index
++)
1196 if (arcs
[index
]->parent
->has_been_placed
1197 || arcs
[index
]->child
->has_been_placed
)
1200 sym
= arcs
[index
]->parent
;
1202 sym
->has_been_placed
= 1;
1203 printf ("%s\n", sym
->name
);
1207 /* Print a suggested .o ordering for files on a link line based
1208 on profiling information. This uses the function placement
1209 code for the bulk of its work. */
1211 struct function_map
{
1212 char *function_name
;
1217 DEFUN_VOID (cg_print_file_ordering
)
1219 unsigned long scratch_arc_count
, index
;
1221 extern struct function_map
*symbol_map
;
1222 extern int symbol_map_count
;
1225 scratch_arc_count
= 0;
1227 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
1228 for (index
= 0; index
< numarcs
; index
++)
1230 if (! arcs
[index
]->parent
->mapped
1231 || ! arcs
[index
]->child
->mapped
)
1232 arcs
[index
]->has_been_placed
= 1;
1235 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
1236 scratch_arcs
, &scratch_arc_count
);
1238 /* Output .o's not handled by the main placement algorithm. */
1239 for (index
= 0; index
< symtab
.len
; index
++)
1241 if (symtab
.base
[index
].mapped
1242 && ! symtab
.base
[index
].has_been_placed
)
1243 printf ("%s\n", symtab
.base
[index
].name
);
1246 /* Now output any .o's that didn't have any text symbols. */
1248 for (index
= 0; index
< symbol_map_count
; index
++)
1252 /* Don't bother searching if this symbol is the
1253 same as the previous one. */
1254 if (last
&& !strcmp (last
, symbol_map
[index
].file_name
))
1257 for (index2
= 0; index2
< symtab
.len
; index2
++)
1259 if (! symtab
.base
[index2
].mapped
)
1262 if (!strcmp (symtab
.base
[index2
].name
, symbol_map
[index
].file_name
))
1266 /* If we didn't find it in the symbol table, then it must be a .o
1267 with no text symbols. Output it last. */
1268 if (index2
== symtab
.len
)
1269 printf ("%s\n", symbol_map
[index
].file_name
);
1270 last
= symbol_map
[index
].file_name
;