2003-11-07 Andrew Cagney <cagney@redhat.com>
[deliverable/binutils-gdb.git] / gprof / cg_print.c
1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21 \f
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "cg_arcs.h"
28 #include "cg_print.h"
29 #include "hist.h"
30 #include "utils.h"
31 #include "corefile.h"
32
33 /* Return value of comparison functions used to sort tables. */
34 #define LESSTHAN -1
35 #define EQUALTO 0
36 #define GREATERTHAN 1
37
38 static void print_header PARAMS ((void));
39 static void print_cycle PARAMS ((Sym *));
40 static int cmp_member PARAMS ((Sym *, Sym *));
41 static void sort_members PARAMS ((Sym *));
42 static void print_members PARAMS ((Sym *));
43 static int cmp_arc PARAMS ((Arc *, Arc *));
44 static void sort_parents PARAMS ((Sym *));
45 static void print_parents PARAMS ((Sym *));
46 static void sort_children PARAMS ((Sym *));
47 static void print_children PARAMS ((Sym *));
48 static void print_line PARAMS ((Sym *));
49 static int cmp_name PARAMS ((const PTR, const PTR));
50 static int cmp_arc_count PARAMS ((const PTR, const PTR));
51 static int cmp_fun_nuses PARAMS ((const PTR, const PTR));
52 static void order_and_dump_functions_by_arcs
53 PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *));
54
55 /* Declarations of automatically generated functions to output blurbs. */
56 extern void bsd_callg_blurb PARAMS ((FILE * fp));
57 extern void fsf_callg_blurb PARAMS ((FILE * fp));
58
59 double print_time = 0.0;
60
61
62 static void
63 print_header ()
64 {
65 if (first_output)
66 first_output = FALSE;
67 else
68 printf ("\f\n");
69
70 if (!bsd_style_output)
71 {
72 if (print_descriptions)
73 printf (_("\t\t Call graph (explanation follows)\n\n"));
74 else
75 printf (_("\t\t\tCall graph\n\n"));
76 }
77
78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 (long) hist_scale * sizeof (UNIT));
80
81 if (print_time > 0.0)
82 printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 100.0 / print_time, print_time / hz);
84 else
85 {
86 printf (_(" no time propagated\n\n"));
87
88 /* This doesn't hurt, since all the numerators will be 0.0. */
89 print_time = 1.0;
90 }
91
92 if (bsd_style_output)
93 {
94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
95 "", "", "", "", _("called"), _("total"), _("parents"));
96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 _("index"), _("%time"), _("self"), _("descendants"),
98 _("called"), _("self"), _("name"), _("index"));
99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
100 "", "", "", "", _("called"), _("total"), _("children"));
101 printf ("\n");
102 }
103 else
104 {
105 printf (_("index %% time self children called name\n"));
106 }
107 }
108
109 /* Print a cycle header. */
110
111 static void
112 print_cycle (cyc)
113 Sym *cyc;
114 {
115 char buf[BUFSIZ];
116
117 sprintf (buf, "[%d]", cyc->cg.index);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
123
124 if (cyc->cg.self_calls != 0)
125 printf ("+%-7lu", cyc->cg.self_calls);
126 else
127 printf (" %7.7s", "");
128
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
130 }
131
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
134
135 static int
136 cmp_member (left, right)
137 Sym *left;
138 Sym *right;
139 {
140 double left_time = left->cg.prop.self + left->cg.prop.child;
141 double right_time = right->cg.prop.self + right->cg.prop.child;
142 unsigned long left_calls = left->ncalls + left->cg.self_calls;
143 unsigned long right_calls = right->ncalls + right->cg.self_calls;
144
145 if (left_time > right_time)
146 return GREATERTHAN;
147
148 if (left_time < right_time)
149 return LESSTHAN;
150
151 if (left_calls > right_calls)
152 return GREATERTHAN;
153
154 if (left_calls < right_calls)
155 return LESSTHAN;
156
157 return EQUALTO;
158 }
159
160 /* Sort members of a cycle. */
161
162 static void
163 sort_members (cyc)
164 Sym *cyc;
165 {
166 Sym *todo, *doing, *prev;
167
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
170 todo = cyc->cg.cyc.next;
171 cyc->cg.cyc.next = 0;
172
173 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
174 {
175 todo = doing->cg.cyc.next;
176
177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178 {
179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180 break;
181 }
182
183 doing->cg.cyc.next = prev->cg.cyc.next;
184 prev->cg.cyc.next = doing;
185 }
186 }
187
188 /* Print the members of a cycle. */
189
190 static void
191 print_members (cyc)
192 Sym *cyc;
193 {
194 Sym *member;
195
196 sort_members (cyc);
197
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199 {
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
205
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
208 else
209 printf (" %7.7s", "");
210
211 printf (" ");
212 print_name (member);
213 printf ("\n");
214 }
215 }
216
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
224
225 static int
226 cmp_arc (left, right)
227 Arc *left;
228 Arc *right;
229 {
230 Sym *left_parent = left->parent;
231 Sym *left_child = left->child;
232 Sym *right_parent = right->parent;
233 Sym *right_child = right->child;
234 double left_time, right_time;
235
236 DBG (TIMEDEBUG,
237 printf ("[cmp_arc] ");
238 print_name (left_parent);
239 printf (" calls ");
240 print_name (left_child);
241 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
242 left->count, left_child->ncalls);
243 printf ("[cmp_arc] ");
244 print_name (right_parent);
245 printf (" calls ");
246 print_name (right_child);
247 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
248 right->count, right_child->ncalls);
249 printf ("\n");
250 );
251
252 if (left_parent == left_child)
253 return LESSTHAN; /* Left is a self call. */
254
255 if (right_parent == right_child)
256 return GREATERTHAN; /* Right is a self call. */
257
258 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
259 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
260 {
261 /* Left is a call within a cycle. */
262 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
263 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
264 {
265 /* Right is a call within the cycle, too. */
266 if (left->count < right->count)
267 return LESSTHAN;
268
269 if (left->count > right->count)
270 return GREATERTHAN;
271
272 return EQUALTO;
273 }
274 else
275 {
276 /* Right isn't a call within the cycle. */
277 return LESSTHAN;
278 }
279 }
280 else
281 {
282 /* Left isn't a call within a cycle. */
283 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
284 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
285 {
286 /* Right is a call within a cycle. */
287 return GREATERTHAN;
288 }
289 else
290 {
291 /* Neither is a call within a cycle. */
292 left_time = left->time + left->child_time;
293 right_time = right->time + right->child_time;
294
295 if (left_time < right_time)
296 return LESSTHAN;
297
298 if (left_time > right_time)
299 return GREATERTHAN;
300
301 if (left->count < right->count)
302 return LESSTHAN;
303
304 if (left->count > right->count)
305 return GREATERTHAN;
306
307 return EQUALTO;
308 }
309 }
310 }
311
312
313 static void
314 sort_parents (child)
315 Sym * child;
316 {
317 Arc *arc, *detached, sorted, *prev;
318
319 /* Unlink parents from child, then insertion sort back on to
320 sorted's parents.
321 *arc the arc you have detached and are inserting.
322 *detached the rest of the arcs to be sorted.
323 sorted arc list onto which you insertion sort.
324 *prev arc before the arc you are comparing. */
325 sorted.next_parent = 0;
326
327 for (arc = child->cg.parents; arc; arc = detached)
328 {
329 detached = arc->next_parent;
330
331 /* Consider *arc as disconnected; insert it into sorted. */
332 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
333 {
334 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
335 break;
336 }
337
338 arc->next_parent = prev->next_parent;
339 prev->next_parent = arc;
340 }
341
342 /* Reattach sorted arcs to child. */
343 child->cg.parents = sorted.next_parent;
344 }
345
346
347 static void
348 print_parents (child)
349 Sym *child;
350 {
351 Sym *parent;
352 Arc *arc;
353 Sym *cycle_head;
354
355 if (child->cg.cyc.head != 0)
356 cycle_head = child->cg.cyc.head;
357 else
358 cycle_head = child;
359
360 if (!child->cg.parents)
361 {
362 printf (bsd_style_output
363 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
364 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
365 "", "", "", "", "", "");
366 return;
367 }
368
369 sort_parents (child);
370
371 for (arc = child->cg.parents; arc; arc = arc->next_parent)
372 {
373 parent = arc->parent;
374 if (child == parent || (child->cg.cyc.num != 0
375 && parent->cg.cyc.num == child->cg.cyc.num))
376 {
377 /* Selfcall or call among siblings. */
378 printf (bsd_style_output
379 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
380 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
381 "", "", "", "",
382 arc->count, "");
383 print_name (parent);
384 printf ("\n");
385 }
386 else
387 {
388 /* Regular parent of child. */
389 printf (bsd_style_output
390 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
391 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
392 "", "",
393 arc->time / hz, arc->child_time / hz,
394 arc->count, cycle_head->ncalls);
395 print_name (parent);
396 printf ("\n");
397 }
398 }
399 }
400
401
402 static void
403 sort_children (parent)
404 Sym *parent;
405 {
406 Arc *arc, *detached, sorted, *prev;
407
408 /* Unlink children from parent, then insertion sort back on to
409 sorted's children.
410 *arc the arc you have detached and are inserting.
411 *detached the rest of the arcs to be sorted.
412 sorted arc list onto which you insertion sort.
413 *prev arc before the arc you are comparing. */
414 sorted.next_child = 0;
415
416 for (arc = parent->cg.children; arc; arc = detached)
417 {
418 detached = arc->next_child;
419
420 /* Consider *arc as disconnected; insert it into sorted. */
421 for (prev = &sorted; prev->next_child; prev = prev->next_child)
422 {
423 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
424 break;
425 }
426
427 arc->next_child = prev->next_child;
428 prev->next_child = arc;
429 }
430
431 /* Reattach sorted children to parent. */
432 parent->cg.children = sorted.next_child;
433 }
434
435
436 static void
437 print_children (parent)
438 Sym *parent;
439 {
440 Sym *child;
441 Arc *arc;
442
443 sort_children (parent);
444 arc = parent->cg.children;
445
446 for (arc = parent->cg.children; arc; arc = arc->next_child)
447 {
448 child = arc->child;
449 if (child == parent || (child->cg.cyc.num != 0
450 && child->cg.cyc.num == parent->cg.cyc.num))
451 {
452 /* Self call or call to sibling. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
455 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
456 "", "", "", "", arc->count, "");
457 print_name (child);
458 printf ("\n");
459 }
460 else
461 {
462 /* Regular child of parent. */
463 printf (bsd_style_output
464 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
465 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
466 "", "",
467 arc->time / hz, arc->child_time / hz,
468 arc->count, child->cg.cyc.head->ncalls);
469 print_name (child);
470 printf ("\n");
471 }
472 }
473 }
474
475
476 static void
477 print_line (np)
478 Sym *np;
479 {
480 char buf[BUFSIZ];
481
482 sprintf (buf, "[%d]", np->cg.index);
483 printf (bsd_style_output
484 ? "%-6.6s %5.1f %7.2f %11.2f"
485 : "%-6.6s %5.1f %7.2f %7.2f", buf,
486 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
487 np->cg.prop.self / hz, np->cg.prop.child / hz);
488
489 if ((np->ncalls + np->cg.self_calls) != 0)
490 {
491 printf (" %7lu", np->ncalls);
492
493 if (np->cg.self_calls != 0)
494 printf ("+%-7lu ", np->cg.self_calls);
495 else
496 printf (" %7.7s ", "");
497 }
498 else
499 {
500 printf (" %7.7s %7.7s ", "", "");
501 }
502
503 print_name (np);
504 printf ("\n");
505 }
506
507
508 /* Print dynamic call graph. */
509
510 void
511 cg_print (timesortsym)
512 Sym ** timesortsym;
513 {
514 unsigned int index;
515 Sym *parent;
516
517 if (print_descriptions && bsd_style_output)
518 bsd_callg_blurb (stdout);
519
520 print_header ();
521
522 for (index = 0; index < symtab.len + num_cycles; ++index)
523 {
524 parent = timesortsym[index];
525
526 if ((ignore_zeros && parent->ncalls == 0
527 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
528 && parent->cg.prop.child == 0)
529 || !parent->cg.print_flag
530 || (line_granularity && ! parent->is_func))
531 continue;
532
533 if (!parent->name && parent->cg.cyc.num != 0)
534 {
535 /* Cycle header. */
536 print_cycle (parent);
537 print_members (parent);
538 }
539 else
540 {
541 print_parents (parent);
542 print_line (parent);
543 print_children (parent);
544 }
545
546 if (bsd_style_output)
547 printf ("\n");
548
549 printf ("-----------------------------------------------\n");
550
551 if (bsd_style_output)
552 printf ("\n");
553 }
554
555 free (timesortsym);
556
557 if (print_descriptions && !bsd_style_output)
558 fsf_callg_blurb (stdout);
559 }
560
561
562 static int
563 cmp_name (left, right)
564 const PTR left;
565 const PTR right;
566 {
567 const Sym **npp1 = (const Sym **) left;
568 const Sym **npp2 = (const Sym **) right;
569
570 return strcmp ((*npp1)->name, (*npp2)->name);
571 }
572
573
574 void
575 cg_print_index ()
576 {
577 unsigned int index;
578 unsigned int nnames, todo, i, j;
579 int col, starting_col;
580 Sym **name_sorted_syms, *sym;
581 const char *filename;
582 char buf[20];
583 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
584
585 /* Now, sort regular function name
586 alphabetically to create an index. */
587 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
588
589 for (index = 0, nnames = 0; index < symtab.len; index++)
590 {
591 if (ignore_zeros && symtab.base[index].ncalls == 0
592 && symtab.base[index].hist.time == 0)
593 continue;
594
595 name_sorted_syms[nnames++] = &symtab.base[index];
596 }
597
598 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
599
600 for (index = 1, todo = nnames; index <= num_cycles; index++)
601 name_sorted_syms[todo++] = &cycle_header[index];
602
603 printf ("\f\n");
604 printf (_("Index by function name\n\n"));
605 index = (todo + 2) / 3;
606
607 for (i = 0; i < index; i++)
608 {
609 col = 0;
610 starting_col = 0;
611
612 for (j = i; j < todo; j += index)
613 {
614 sym = name_sorted_syms[j];
615
616 if (sym->cg.print_flag)
617 sprintf (buf, "[%d]", sym->cg.index);
618 else
619 sprintf (buf, "(%d)", sym->cg.index);
620
621 if (j < nnames)
622 {
623 if (bsd_style_output)
624 {
625 printf ("%6.6s %-19.19s", buf, sym->name);
626 }
627 else
628 {
629 col += strlen (buf);
630
631 for (; col < starting_col + 5; ++col)
632 putchar (' ');
633
634 printf (" %s ", buf);
635 col += print_name_only (sym);
636
637 if (!line_granularity && sym->is_static && sym->file)
638 {
639 filename = sym->file->name;
640
641 if (!print_path)
642 {
643 filename = strrchr (filename, '/');
644
645 if (filename)
646 ++filename;
647 else
648 filename = sym->file->name;
649 }
650
651 printf (" (%s)", filename);
652 col += strlen (filename) + 3;
653 }
654 }
655 }
656 else
657 {
658 if (bsd_style_output)
659 {
660 printf ("%6.6s ", buf);
661 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
662 printf ("%-19.19s", buf);
663 }
664 else
665 {
666 col += strlen (buf);
667 for (; col < starting_col + 5; ++col)
668 putchar (' ');
669 printf (" %s ", buf);
670 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
671 printf ("%s", buf);
672 col += strlen (buf);
673 }
674 }
675
676 starting_col += column_width;
677 }
678
679 printf ("\n");
680 }
681
682 free (name_sorted_syms);
683 }
684
685 /* Compare two arcs based on their usage counts.
686 We want to sort in descending order. */
687
688 static int
689 cmp_arc_count (left, right)
690 const PTR left;
691 const PTR right;
692 {
693 const Arc **npp1 = (const Arc **) left;
694 const Arc **npp2 = (const Arc **) right;
695
696 if ((*npp1)->count > (*npp2)->count)
697 return -1;
698 else if ((*npp1)->count < (*npp2)->count)
699 return 1;
700 else
701 return 0;
702 }
703
704 /* Compare two funtions based on their usage counts.
705 We want to sort in descending order. */
706
707 static int
708 cmp_fun_nuses (left, right)
709 const PTR left;
710 const PTR right;
711 {
712 const Sym **npp1 = (const Sym **) left;
713 const Sym **npp2 = (const Sym **) right;
714
715 if ((*npp1)->nuses > (*npp2)->nuses)
716 return -1;
717 else if ((*npp1)->nuses < (*npp2)->nuses)
718 return 1;
719 else
720 return 0;
721 }
722
723 /* Print a suggested function ordering based on the profiling data.
724
725 We perform 4 major steps when ordering functions:
726
727 * Group unused functions together and place them at the
728 end of the function order.
729
730 * Search the highest use arcs (those which account for 90% of
731 the total arc count) for functions which have several parents.
732
733 Group those with the most call sites together (currently the
734 top 1.25% which have at least five different call sites).
735
736 These are emitted at the start of the function order.
737
738 * Use a greedy placement algorithm to place functions which
739 occur in the top 99% of the arcs in the profile. Some provisions
740 are made to handle high usage arcs where the parent and/or
741 child has already been placed.
742
743 * Run the same greedy placement algorithm on the remaining
744 arcs to place the leftover functions.
745
746
747 The various "magic numbers" should (one day) be tuneable by command
748 line options. They were arrived at by benchmarking a few applications
749 with various values to see which values produced better overall function
750 orderings.
751
752 Of course, profiling errors, machine limitations (PA long calls), and
753 poor cutoff values for the placement algorithm may limit the usefullness
754 of the resulting function order. Improvements would be greatly appreciated.
755
756 Suggestions:
757
758 * Place the functions with many callers near the middle of the
759 list to reduce long calls.
760
761 * Propagate arc usage changes as functions are placed. Ie if
762 func1 and func2 are placed together, arcs to/from those arcs
763 to the same parent/child should be combined, then resort the
764 arcs to choose the next one.
765
766 * Implement some global positioning algorithm to place the
767 chains made by the greedy local positioning algorithm. Probably
768 by examining arcs which haven't been placed yet to tie two
769 chains together.
770
771 * Take a function's size and time into account in the algorithm;
772 size in particular is important on the PA (long calls). Placing
773 many small functions onto their own page may be wise.
774
775 * Use better profiling information; many published algorithms
776 are based on call sequences through time, rather than just
777 arc counts.
778
779 * Prodecure cloning could improve performance when a small number
780 of arcs account for most of the calls to a particular function.
781
782 * Use relocation information to avoid moving unused functions
783 completely out of the code stream; this would avoid severe lossage
784 when the profile data bears little resemblance to actual runs.
785
786 * Propagation of arc usages should also improve .o link line
787 ordering which shares the same arc placement algorithm with
788 the function ordering code (in fact it is a degenerate case
789 of function ordering). */
790
791 void
792 cg_print_function_ordering ()
793 {
794 unsigned long index, used, unused, scratch_index;
795 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
796 #ifdef __GNUC__
797 unsigned long long total_arcs, tmp_arcs_count;
798 #else
799 unsigned long total_arcs, tmp_arcs_count;
800 #endif
801 Sym **unused_syms, **used_syms, **scratch_syms;
802 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
803
804 index = 0;
805 used = 0;
806 unused = 0;
807 scratch_index = 0;
808 unplaced_arc_count = 0;
809 high_arc_count = 0;
810 scratch_arc_count = 0;
811
812 /* First group all the unused functions together. */
813 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
814 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
815 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
816 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
817 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
818 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
819
820 /* Walk through all the functions; mark those which are never
821 called as placed (we'll emit them as a group later). */
822 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
823 {
824 if (symtab.base[index].ncalls == 0)
825 {
826 /* Filter out gprof generated names. */
827 if (strcmp (symtab.base[index].name, "<locore>")
828 && strcmp (symtab.base[index].name, "<hicore>"))
829 {
830 unused_syms[unused++] = &symtab.base[index];
831 symtab.base[index].has_been_placed = 1;
832 }
833 }
834 else
835 {
836 used_syms[used++] = &symtab.base[index];
837 symtab.base[index].has_been_placed = 0;
838 symtab.base[index].next = 0;
839 symtab.base[index].prev = 0;
840 symtab.base[index].nuses = 0;
841 }
842 }
843
844 /* Sort the arcs from most used to least used. */
845 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
846
847 /* Compute the total arc count. Also mark arcs as unplaced.
848
849 Note we don't compensate for overflow if that happens!
850 Overflow is much less likely when this file is compiled
851 with GCC as it can double-wide integers via long long. */
852 total_arcs = 0;
853 for (index = 0; index < numarcs; index++)
854 {
855 total_arcs += arcs[index]->count;
856 arcs[index]->has_been_placed = 0;
857 }
858
859 /* We want to pull out those functions which are referenced
860 by many highly used arcs and emit them as a group. This
861 could probably use some tuning. */
862 tmp_arcs_count = 0;
863 for (index = 0; index < numarcs; index++)
864 {
865 tmp_arcs_count += arcs[index]->count;
866
867 /* Count how many times each parent and child are used up
868 to our threshhold of arcs (90%). */
869 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
870 break;
871
872 arcs[index]->child->nuses++;
873 }
874
875 /* Now sort a temporary symbol table based on the number of
876 times each function was used in the highest used arcs. */
877 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
878 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
879
880 /* Now pick out those symbols we're going to emit as
881 a group. We take up to 1.25% of the used symbols. */
882 for (index = 0; index < used / 80; index++)
883 {
884 Sym *sym = scratch_syms[index];
885 Arc *arc;
886
887 /* If we hit symbols that aren't used from many call sites,
888 then we can quit. We choose five as the low limit for
889 no particular reason. */
890 if (sym->nuses == 5)
891 break;
892
893 /* We're going to need the arcs between these functions.
894 Unfortunately, we don't know all these functions
895 until we're done. So we keep track of all the arcs
896 to the functions we care about, then prune out those
897 which are uninteresting.
898
899 An interesting variation would be to quit when we found
900 multi-call site functions which account for some percentage
901 of the arcs. */
902 arc = sym->cg.children;
903
904 while (arc)
905 {
906 if (arc->parent != arc->child)
907 scratch_arcs[scratch_arc_count++] = arc;
908 arc->has_been_placed = 1;
909 arc = arc->next_child;
910 }
911
912 arc = sym->cg.parents;
913
914 while (arc)
915 {
916 if (arc->parent != arc->child)
917 scratch_arcs[scratch_arc_count++] = arc;
918 arc->has_been_placed = 1;
919 arc = arc->next_parent;
920 }
921
922 /* Keep track of how many symbols we're going to place. */
923 scratch_index = index;
924
925 /* A lie, but it makes identifying
926 these functions easier later. */
927 sym->has_been_placed = 1;
928 }
929
930 /* Now walk through the temporary arcs and copy
931 those we care about into the high arcs array. */
932 for (index = 0; index < scratch_arc_count; index++)
933 {
934 Arc *arc = scratch_arcs[index];
935
936 /* If this arc refers to highly used functions, then
937 then we want to keep it. */
938 if (arc->child->has_been_placed
939 && arc->parent->has_been_placed)
940 {
941 high_arcs[high_arc_count++] = scratch_arcs[index];
942
943 /* We need to turn of has_been_placed since we're going to
944 use the main arc placement algorithm on these arcs. */
945 arc->child->has_been_placed = 0;
946 arc->parent->has_been_placed = 0;
947 }
948 }
949
950 /* Dump the multi-site high usage functions which are not
951 going to be ordered by the main ordering algorithm. */
952 for (index = 0; index < scratch_index; index++)
953 {
954 if (scratch_syms[index]->has_been_placed)
955 printf ("%s\n", scratch_syms[index]->name);
956 }
957
958 /* Now we can order the multi-site high use
959 functions based on the arcs between them. */
960 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
961 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
962 unplaced_arcs, &unplaced_arc_count);
963
964 /* Order and dump the high use functions left,
965 these typically have only a few call sites. */
966 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
967 unplaced_arcs, &unplaced_arc_count);
968
969 /* Now place the rarely used functions. */
970 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
971 scratch_arcs, &scratch_arc_count);
972
973 /* Output any functions not emitted by the order_and_dump calls. */
974 for (index = 0; index < used; index++)
975 if (used_syms[index]->has_been_placed == 0)
976 printf("%s\n", used_syms[index]->name);
977
978 /* Output the unused functions. */
979 for (index = 0; index < unused; index++)
980 printf("%s\n", unused_syms[index]->name);
981
982 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
983 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
984 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
985 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
986 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
987 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
988
989 free (unused_syms);
990 free (used_syms);
991 free (scratch_syms);
992 free (high_arcs);
993 free (scratch_arcs);
994 free (unplaced_arcs);
995 }
996
997 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
998 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
999
1000 If ALL is nonzero, then place all functions referenced by THE_ARCS,
1001 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
1002
1003 #define MOST 0.99
1004 static void
1005 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
1006 unplaced_arcs, unplaced_arc_count)
1007 Arc **the_arcs;
1008 unsigned long arc_count;
1009 int all;
1010 Arc **unplaced_arcs;
1011 unsigned long *unplaced_arc_count;
1012 {
1013 #ifdef __GNUC__
1014 unsigned long long tmp_arcs, total_arcs;
1015 #else
1016 unsigned long tmp_arcs, total_arcs;
1017 #endif
1018 unsigned int index;
1019
1020 /* If needed, compute the total arc count.
1021
1022 Note we don't compensate for overflow if that happens! */
1023 if (! all)
1024 {
1025 total_arcs = 0;
1026 for (index = 0; index < arc_count; index++)
1027 total_arcs += the_arcs[index]->count;
1028 }
1029 else
1030 total_arcs = 0;
1031
1032 tmp_arcs = 0;
1033
1034 for (index = 0; index < arc_count; index++)
1035 {
1036 Sym *sym1, *sym2;
1037 Sym *child, *parent;
1038
1039 tmp_arcs += the_arcs[index]->count;
1040
1041 /* Ignore this arc if it's already been placed. */
1042 if (the_arcs[index]->has_been_placed)
1043 continue;
1044
1045 child = the_arcs[index]->child;
1046 parent = the_arcs[index]->parent;
1047
1048 /* If we're not using all arcs, and this is a rarely used
1049 arc, then put it on the unplaced_arc list. Similarly
1050 if both the parent and child of this arc have been placed. */
1051 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1052 || child->has_been_placed || parent->has_been_placed)
1053 {
1054 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1055 continue;
1056 }
1057
1058 /* If all slots in the parent and child are full, then there isn't
1059 anything we can do right now. We'll place this arc on the
1060 unplaced arc list in the hope that a global positioning
1061 algorithm can use it to place function chains. */
1062 if (parent->next && parent->prev && child->next && child->prev)
1063 {
1064 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1065 continue;
1066 }
1067
1068 /* If the parent is unattached, then find the closest
1069 place to attach it onto child's chain. Similarly
1070 for the opposite case. */
1071 if (!parent->next && !parent->prev)
1072 {
1073 int next_count = 0;
1074 int prev_count = 0;
1075 Sym *prev = child;
1076 Sym *next = child;
1077
1078 /* Walk to the beginning and end of the child's chain. */
1079 while (next->next)
1080 {
1081 next = next->next;
1082 next_count++;
1083 }
1084
1085 while (prev->prev)
1086 {
1087 prev = prev->prev;
1088 prev_count++;
1089 }
1090
1091 /* Choose the closest. */
1092 child = next_count < prev_count ? next : prev;
1093 }
1094 else if (! child->next && !child->prev)
1095 {
1096 int next_count = 0;
1097 int prev_count = 0;
1098 Sym *prev = parent;
1099 Sym *next = parent;
1100
1101 while (next->next)
1102 {
1103 next = next->next;
1104 next_count++;
1105 }
1106
1107 while (prev->prev)
1108 {
1109 prev = prev->prev;
1110 prev_count++;
1111 }
1112
1113 parent = prev_count < next_count ? prev : next;
1114 }
1115 else
1116 {
1117 /* Couldn't find anywhere to attach the functions,
1118 put the arc on the unplaced arc list. */
1119 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1120 continue;
1121 }
1122
1123 /* Make sure we don't tie two ends together. */
1124 sym1 = parent;
1125 if (sym1->next)
1126 while (sym1->next)
1127 sym1 = sym1->next;
1128 else
1129 while (sym1->prev)
1130 sym1 = sym1->prev;
1131
1132 sym2 = child;
1133 if (sym2->next)
1134 while (sym2->next)
1135 sym2 = sym2->next;
1136 else
1137 while (sym2->prev)
1138 sym2 = sym2->prev;
1139
1140 if (sym1 == child
1141 && sym2 == parent)
1142 {
1143 /* This would tie two ends together. */
1144 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1145 continue;
1146 }
1147
1148 if (parent->next)
1149 {
1150 /* Must attach to the parent's prev field. */
1151 if (! child->next)
1152 {
1153 /* parent-prev and child-next */
1154 parent->prev = child;
1155 child->next = parent;
1156 the_arcs[index]->has_been_placed = 1;
1157 }
1158 }
1159 else if (parent->prev)
1160 {
1161 /* Must attach to the parent's next field. */
1162 if (! child->prev)
1163 {
1164 /* parent-next and child-prev */
1165 parent->next = child;
1166 child->prev = parent;
1167 the_arcs[index]->has_been_placed = 1;
1168 }
1169 }
1170 else
1171 {
1172 /* Can attach to either field in the parent, depends
1173 on where we've got space in the child. */
1174 if (child->prev)
1175 {
1176 /* parent-prev and child-next. */
1177 parent->prev = child;
1178 child->next = parent;
1179 the_arcs[index]->has_been_placed = 1;
1180 }
1181 else
1182 {
1183 /* parent-next and child-prev. */
1184 parent->next = child;
1185 child->prev = parent;
1186 the_arcs[index]->has_been_placed = 1;
1187 }
1188 }
1189 }
1190
1191 /* Dump the chains of functions we've made. */
1192 for (index = 0; index < arc_count; index++)
1193 {
1194 Sym *sym;
1195 if (the_arcs[index]->parent->has_been_placed
1196 || the_arcs[index]->child->has_been_placed)
1197 continue;
1198
1199 sym = the_arcs[index]->parent;
1200
1201 /* If this symbol isn't attached to any other
1202 symbols, then we've got a rarely used arc.
1203
1204 Skip it for now, we'll deal with them later. */
1205 if (sym->next == NULL
1206 && sym->prev == NULL)
1207 continue;
1208
1209 /* Get to the start of this chain. */
1210 while (sym->prev)
1211 sym = sym->prev;
1212
1213 while (sym)
1214 {
1215 /* Mark it as placed. */
1216 sym->has_been_placed = 1;
1217 printf ("%s\n", sym->name);
1218 sym = sym->next;
1219 }
1220 }
1221
1222 /* If we want to place all the arcs, then output
1223 those which weren't placed by the main algorithm. */
1224 if (all)
1225 for (index = 0; index < arc_count; index++)
1226 {
1227 Sym *sym;
1228 if (the_arcs[index]->parent->has_been_placed
1229 || the_arcs[index]->child->has_been_placed)
1230 continue;
1231
1232 sym = the_arcs[index]->parent;
1233
1234 sym->has_been_placed = 1;
1235 printf ("%s\n", sym->name);
1236 }
1237 }
1238
1239 /* Print a suggested .o ordering for files on a link line based
1240 on profiling information. This uses the function placement
1241 code for the bulk of its work. */
1242
1243 void
1244 cg_print_file_ordering ()
1245 {
1246 unsigned long scratch_arc_count, index;
1247 Arc **scratch_arcs;
1248 char *last;
1249
1250 scratch_arc_count = 0;
1251
1252 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1253 for (index = 0; index < numarcs; index++)
1254 {
1255 if (! arcs[index]->parent->mapped
1256 || ! arcs[index]->child->mapped)
1257 arcs[index]->has_been_placed = 1;
1258 }
1259
1260 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1261 scratch_arcs, &scratch_arc_count);
1262
1263 /* Output .o's not handled by the main placement algorithm. */
1264 for (index = 0; index < symtab.len; index++)
1265 {
1266 if (symtab.base[index].mapped
1267 && ! symtab.base[index].has_been_placed)
1268 printf ("%s\n", symtab.base[index].name);
1269 }
1270
1271 /* Now output any .o's that didn't have any text symbols. */
1272 last = NULL;
1273 for (index = 0; index < symbol_map_count; index++)
1274 {
1275 unsigned int index2;
1276
1277 /* Don't bother searching if this symbol
1278 is the same as the previous one. */
1279 if (last && !strcmp (last, symbol_map[index].file_name))
1280 continue;
1281
1282 for (index2 = 0; index2 < symtab.len; index2++)
1283 {
1284 if (! symtab.base[index2].mapped)
1285 continue;
1286
1287 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1288 break;
1289 }
1290
1291 /* If we didn't find it in the symbol table, then it must
1292 be a .o with no text symbols. Output it last. */
1293 if (index2 == symtab.len)
1294 printf ("%s\n", symbol_map[index].file_name);
1295 last = symbol_map[index].file_name;
1296 }
1297 }
This page took 0.058128 seconds and 4 git commands to generate.