Tue Feb 3 14:25:25 1998 Brent Baccala <baccala@freesoft.org>
[deliverable/binutils-gdb.git] / gprof / cg_print.c
1 #include "libiberty.h"
2 #include "cg_arcs.h"
3 #include "cg_print.h"
4 #include "hist.h"
5 #include "utils.h"
6
7 /*
8 * Return value of comparison functions used to sort tables:
9 */
10 #define LESSTHAN -1
11 #define EQUALTO 0
12 #define GREATERTHAN 1
13
14 static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
15 int, Arc **,
16 unsigned long *));
17 /* declarations of automatically generated functions to output blurbs: */
18 extern void bsd_callg_blurb PARAMS ((FILE * fp));
19 extern void fsf_callg_blurb PARAMS ((FILE * fp));
20
21 double print_time = 0.0;
22
23
24 static void
25 DEFUN_VOID (print_header)
26 {
27 if (first_output)
28 {
29 first_output = FALSE;
30 }
31 else
32 {
33 printf ("\f\n");
34 }
35 if (!bsd_style_output)
36 {
37 if (print_descriptions)
38 {
39 printf ("\t\t Call graph (explanation follows)\n\n");
40 }
41 else
42 {
43 printf ("\t\t\tCall graph\n\n");
44 }
45 }
46 printf ("\ngranularity: each sample hit covers %ld byte(s)",
47 (long) hist_scale * sizeof (UNIT));
48 if (print_time > 0.0)
49 {
50 printf (" for %.2f%% of %.2f seconds\n\n",
51 100.0 / print_time, print_time / hz);
52 }
53 else
54 {
55 printf (" no time propagated\n\n");
56 /*
57 * This doesn't hurt, since all the numerators will be 0.0:
58 */
59 print_time = 1.0;
60 }
61 if (bsd_style_output)
62 {
63 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
64 "", "", "", "", "called", "total", "parents");
65 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
66 "index", "%time", "self", "descendents",
67 "called", "self", "name", "index");
68 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
69 "", "", "", "", "called", "total", "children");
70 printf ("\n");
71 }
72 else
73 {
74 printf ("index %% time self children called name\n");
75 }
76 }
77
78
79 /*
80 * Print a cycle header.
81 */
82 static void
83 DEFUN (print_cycle, (cyc), Sym * cyc)
84 {
85 char buf[BUFSIZ];
86
87 sprintf (buf, "[%d]", cyc->cg.index);
88 printf (bsd_style_output
89 ? "%-6.6s %5.1f %7.2f %11.2f %7d"
90 : "%-6.6s %5.1f %7.2f %7.2f %7d", buf,
91 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
92 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
93 if (cyc->cg.self_calls != 0)
94 {
95 printf ("+%-7d", cyc->cg.self_calls);
96 }
97 else
98 {
99 printf (" %7.7s", "");
100 }
101 printf (" <cycle %d as a whole> [%d]\n", cyc->cg.cyc.num, cyc->cg.index);
102 }
103
104
105 /*
106 * Compare LEFT and RIGHT membmer. Major comparison key is
107 * CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.
108 */
109 static int
110 DEFUN (cmp_member, (left, right), Sym * left AND Sym * right)
111 {
112 double left_time = left->cg.prop.self + left->cg.prop.child;
113 double right_time = right->cg.prop.self + right->cg.prop.child;
114 long left_calls = left->ncalls + left->cg.self_calls;
115 long right_calls = right->ncalls + right->cg.self_calls;
116
117 if (left_time > right_time)
118 {
119 return GREATERTHAN;
120 }
121 if (left_time < right_time)
122 {
123 return LESSTHAN;
124 }
125
126 if (left_calls > right_calls)
127 {
128 return GREATERTHAN;
129 }
130 if (left_calls < right_calls)
131 {
132 return LESSTHAN;
133 }
134 return EQUALTO;
135 }
136
137
138 /*
139 * Sort members of a cycle.
140 */
141 static void
142 DEFUN (sort_members, (cyc), Sym * cyc)
143 {
144 Sym *todo, *doing, *prev;
145 /*
146 * Detach cycle members from cyclehead, and insertion sort them
147 * back on.
148 */
149 todo = cyc->cg.cyc.next;
150 cyc->cg.cyc.next = 0;
151 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
152 {
153 todo = doing->cg.cyc.next;
154 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
155 {
156 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
157 {
158 break;
159 }
160 }
161 doing->cg.cyc.next = prev->cg.cyc.next;
162 prev->cg.cyc.next = doing;
163 }
164 }
165
166
167 /*
168 * Print the members of a cycle.
169 */
170 static void
171 DEFUN (print_members, (cyc), Sym * cyc)
172 {
173 Sym *member;
174
175 sort_members (cyc);
176 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
177 {
178 printf (bsd_style_output
179 ? "%6.6s %5.5s %7.2f %11.2f %7d"
180 : "%6.6s %5.5s %7.2f %7.2f %7d",
181 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
182 member->ncalls);
183 if (member->cg.self_calls != 0)
184 {
185 printf ("+%-7d", member->cg.self_calls);
186 }
187 else
188 {
189 printf (" %7.7s", "");
190 }
191 printf (" ");
192 print_name (member);
193 printf ("\n");
194 }
195 }
196
197
198 /*
199 * Compare two arcs to/from the same child/parent.
200 * - if one arc is a self arc, it's least.
201 * - if one arc is within a cycle, it's less than.
202 * - if both arcs are within a cycle, compare arc counts.
203 * - if neither arc is within a cycle, compare with
204 * time + child_time as major key
205 * arc count as minor key
206 */
207 static int
208 DEFUN (cmp_arc, (left, right), Arc * left AND Arc * right)
209 {
210 Sym *left_parent = left->parent;
211 Sym *left_child = left->child;
212 Sym *right_parent = right->parent;
213 Sym *right_child = right->child;
214 double left_time, right_time;
215
216 DBG (TIMEDEBUG,
217 printf ("[cmp_arc] ");
218 print_name (left_parent);
219 printf (" calls ");
220 print_name (left_child);
221 printf (" %f + %f %d/%d\n", left->time, left->child_time,
222 left->count, left_child->ncalls);
223 printf ("[cmp_arc] ");
224 print_name (right_parent);
225 printf (" calls ");
226 print_name (right_child);
227 printf (" %f + %f %d/%d\n", right->time, right->child_time,
228 right->count, right_child->ncalls);
229 printf ("\n");
230 );
231 if (left_parent == left_child)
232 {
233 return LESSTHAN; /* left is a self call */
234 }
235 if (right_parent == right_child)
236 {
237 return GREATERTHAN; /* right is a self call */
238 }
239
240 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
241 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
242 {
243 /* left is a call within a cycle */
244 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
245 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
246 {
247 /* right is a call within the cycle, too */
248 if (left->count < right->count)
249 {
250 return LESSTHAN;
251 }
252 if (left->count > right->count)
253 {
254 return GREATERTHAN;
255 }
256 return EQUALTO;
257 }
258 else
259 {
260 /* right isn't a call within the cycle */
261 return LESSTHAN;
262 }
263 }
264 else
265 {
266 /* left isn't a call within a cycle */
267 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
268 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
269 {
270 /* right is a call within a cycle */
271 return GREATERTHAN;
272 }
273 else
274 {
275 /* neither is a call within a cycle */
276 left_time = left->time + left->child_time;
277 right_time = right->time + right->child_time;
278 if (left_time < right_time)
279 {
280 return LESSTHAN;
281 }
282 if (left_time > right_time)
283 {
284 return GREATERTHAN;
285 }
286 if (left->count < right->count)
287 {
288 return LESSTHAN;
289 }
290 if (left->count > right->count)
291 {
292 return GREATERTHAN;
293 }
294 return EQUALTO;
295 }
296 }
297 }
298
299
300 static void
301 DEFUN (sort_parents, (child), Sym * child)
302 {
303 Arc *arc, *detached, sorted, *prev;
304
305 /*
306 * Unlink parents from child, then insertion sort back on to
307 * sorted's parents.
308 * *arc the arc you have detached and are inserting.
309 * *detached the rest of the arcs to be sorted.
310 * sorted arc list onto which you insertion sort.
311 * *prev arc before the arc you are comparing.
312 */
313 sorted.next_parent = 0;
314 for (arc = child->cg.parents; arc; arc = detached)
315 {
316 detached = arc->next_parent;
317
318 /* consider *arc as disconnected; insert it into sorted: */
319 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
320 {
321 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
322 {
323 break;
324 }
325 }
326 arc->next_parent = prev->next_parent;
327 prev->next_parent = arc;
328 }
329
330 /* reattach sorted arcs to child: */
331 child->cg.parents = sorted.next_parent;
332 }
333
334
335 static void
336 DEFUN (print_parents, (child), Sym * child)
337 {
338 Sym *parent;
339 Arc *arc;
340 Sym *cycle_head;
341
342 if (child->cg.cyc.head != 0)
343 {
344 cycle_head = child->cg.cyc.head;
345 }
346 else
347 {
348 cycle_head = child;
349 }
350 if (!child->cg.parents)
351 {
352 printf (bsd_style_output
353 ? "%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n"
354 : "%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n",
355 "", "", "", "", "", "");
356 return;
357 }
358 sort_parents (child);
359 for (arc = child->cg.parents; arc; arc = arc->next_parent)
360 {
361 parent = arc->parent;
362 if (child == parent || (child->cg.cyc.num != 0
363 && parent->cg.cyc.num == child->cg.cyc.num))
364 {
365 /* selfcall or call among siblings: */
366 printf (bsd_style_output
367 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
368 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
369 "", "", "", "",
370 arc->count, "");
371 print_name (parent);
372 printf ("\n");
373 }
374 else
375 {
376 /* regular parent of child: */
377 printf (bsd_style_output
378 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
379 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
380 "", "",
381 arc->time / hz, arc->child_time / hz,
382 arc->count, cycle_head->ncalls);
383 print_name (parent);
384 printf ("\n");
385 }
386 }
387 }
388
389
390 static void
391 DEFUN (sort_children, (parent), Sym * parent)
392 {
393 Arc *arc, *detached, sorted, *prev;
394 /*
395 * Unlink children from parent, then insertion sort back on to
396 * sorted's children.
397 * *arc the arc you have detached and are inserting.
398 * *detached the rest of the arcs to be sorted.
399 * sorted arc list onto which you insertion sort.
400 * *prev arc before the arc you are comparing.
401 */
402 sorted.next_child = 0;
403 for (arc = parent->cg.children; arc; arc = detached)
404 {
405 detached = arc->next_child;
406
407 /* consider *arc as disconnected; insert it into sorted: */
408 for (prev = &sorted; prev->next_child; prev = prev->next_child)
409 {
410 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
411 {
412 break;
413 }
414 }
415 arc->next_child = prev->next_child;
416 prev->next_child = arc;
417 }
418
419 /* reattach sorted children to parent: */
420 parent->cg.children = sorted.next_child;
421 }
422
423
424 static void
425 DEFUN (print_children, (parent), Sym * parent)
426 {
427 Sym *child;
428 Arc *arc;
429
430 sort_children (parent);
431 arc = parent->cg.children;
432 for (arc = parent->cg.children; arc; arc = arc->next_child)
433 {
434 child = arc->child;
435 if (child == parent || (child->cg.cyc.num != 0
436 && child->cg.cyc.num == parent->cg.cyc.num))
437 {
438 /* self call or call to sibling: */
439 printf (bsd_style_output
440 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
441 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
442 "", "", "", "", arc->count, "");
443 print_name (child);
444 printf ("\n");
445 }
446 else
447 {
448 /* regular child of parent: */
449 printf (bsd_style_output
450 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
451 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
452 "", "",
453 arc->time / hz, arc->child_time / hz,
454 arc->count, child->cg.cyc.head->ncalls);
455 print_name (child);
456 printf ("\n");
457 }
458 }
459 }
460
461
462 static void
463 DEFUN (print_line, (np), Sym * np)
464 {
465 char buf[BUFSIZ];
466
467 sprintf (buf, "[%d]", np->cg.index);
468 printf (bsd_style_output
469 ? "%-6.6s %5.1f %7.2f %11.2f"
470 : "%-6.6s %5.1f %7.2f %7.2f", buf,
471 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
472 np->cg.prop.self / hz, np->cg.prop.child / hz);
473 if ((np->ncalls + np->cg.self_calls) != 0)
474 {
475 printf (" %7d", np->ncalls);
476 if (np->cg.self_calls != 0)
477 {
478 printf ("+%-7d ", np->cg.self_calls);
479 }
480 else
481 {
482 printf (" %7.7s ", "");
483 }
484 }
485 else
486 {
487 printf (" %7.7s %7.7s ", "", "");
488 }
489 print_name (np);
490 printf ("\n");
491 }
492
493
494 /*
495 * Print dynamic call graph.
496 */
497 void
498 DEFUN (cg_print, (timesortsym), Sym ** timesortsym)
499 {
500 int index;
501 Sym *parent;
502
503 if (print_descriptions && bsd_style_output)
504 {
505 bsd_callg_blurb (stdout);
506 }
507
508 print_header ();
509
510 for (index = 0; index < symtab.len + num_cycles; ++index)
511 {
512 parent = timesortsym[index];
513 if ((ignore_zeros && parent->ncalls == 0
514 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
515 && parent->cg.prop.child == 0)
516 || !parent->cg.print_flag
517 || (line_granularity && ! parent->is_func))
518 {
519 continue;
520 }
521 if (!parent->name && parent->cg.cyc.num != 0)
522 {
523 /* cycle header: */
524 print_cycle (parent);
525 print_members (parent);
526 }
527 else
528 {
529 print_parents (parent);
530 print_line (parent);
531 print_children (parent);
532 }
533 if (bsd_style_output)
534 printf ("\n");
535 printf ("-----------------------------------------------\n");
536 if (bsd_style_output)
537 printf ("\n");
538 }
539 free (timesortsym);
540 if (print_descriptions && !bsd_style_output)
541 {
542 fsf_callg_blurb (stdout);
543 }
544 }
545
546
547 static int
548 DEFUN (cmp_name, (left, right), const PTR left AND const PTR right)
549 {
550 const Sym **npp1 = (const Sym **) left;
551 const Sym **npp2 = (const Sym **) right;
552
553 return strcmp ((*npp1)->name, (*npp2)->name);
554 }
555
556
557 void
558 DEFUN_VOID (cg_print_index)
559 {
560 int index, nnames, todo, i, j, col, starting_col;
561 Sym **name_sorted_syms, *sym;
562 const char *filename;
563 char buf[20];
564 int column_width = (output_width - 1) / 3; /* don't write in last col! */
565 /*
566 * Now, sort regular function name alphabetically to create an
567 * index:
568 */
569 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
570 for (index = 0, nnames = 0; index < symtab.len; index++)
571 {
572 if (ignore_zeros && symtab.base[index].ncalls == 0
573 && symtab.base[index].hist.time == 0)
574 {
575 continue;
576 }
577 name_sorted_syms[nnames++] = &symtab.base[index];
578 }
579 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
580 for (index = 1, todo = nnames; index <= num_cycles; index++)
581 {
582 name_sorted_syms[todo++] = &cycle_header[index];
583 }
584 printf ("\f\nIndex by function name\n\n");
585 index = (todo + 2) / 3;
586 for (i = 0; i < index; i++)
587 {
588 col = 0;
589 starting_col = 0;
590 for (j = i; j < todo; j += index)
591 {
592 sym = name_sorted_syms[j];
593 if (sym->cg.print_flag)
594 {
595 sprintf (buf, "[%d]", sym->cg.index);
596 }
597 else
598 {
599 sprintf (buf, "(%d)", sym->cg.index);
600 }
601 if (j < nnames)
602 {
603 if (bsd_style_output)
604 {
605 printf ("%6.6s %-19.19s", buf, sym->name);
606 }
607 else
608 {
609 col += strlen (buf);
610 for (; col < starting_col + 5; ++col)
611 {
612 putchar (' ');
613 }
614 printf (" %s ", buf);
615 col += print_name_only (sym);
616 if (!line_granularity && sym->is_static && sym->file)
617 {
618 filename = sym->file->name;
619 if (!print_path)
620 {
621 filename = strrchr (filename, '/');
622 if (filename)
623 {
624 ++filename;
625 }
626 else
627 {
628 filename = sym->file->name;
629 }
630 }
631 printf (" (%s)", filename);
632 col += strlen (filename) + 3;
633 }
634 }
635 }
636 else
637 {
638 if (bsd_style_output)
639 {
640 printf ("%6.6s ", buf);
641 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
642 printf ("%-19.19s", buf);
643 }
644 else
645 {
646 col += strlen (buf);
647 for (; col < starting_col + 5; ++col)
648 putchar (' ');
649 printf (" %s ", buf);
650 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
651 printf ("%s", buf);
652 col += strlen (buf);
653 }
654 }
655 starting_col += column_width;
656 }
657 printf ("\n");
658 }
659 free (name_sorted_syms);
660 }
661
662 /* Compare two arcs based on their usage counts. We want to sort
663 in descending order. */
664 static int
665 DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
666 {
667 const Arc **npp1 = (const Arc **) left;
668 const Arc **npp2 = (const Arc **) right;
669
670 if ((*npp1)->count > (*npp2)->count)
671 return -1;
672 else if ((*npp1)->count < (*npp2)->count)
673 return 1;
674 else
675 return 0;
676 }
677
678 /* Compare two funtions based on their usage counts. We want to sort
679 in descending order. */
680 static int
681 DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
682 {
683 const Sym **npp1 = (const Sym **) left;
684 const Sym **npp2 = (const Sym **) right;
685
686 if ((*npp1)->nuses > (*npp2)->nuses)
687 return -1;
688 else if ((*npp1)->nuses < (*npp2)->nuses)
689 return 1;
690 else
691 return 0;
692 }
693
694 /* Print a suggested function ordering based on the profiling data.
695
696 We perform 4 major steps when ordering functions:
697
698 * Group unused functions together and place them at the
699 end of the function order.
700
701 * Search the highest use arcs (those which account for 90% of
702 the total arc count) for functions which have several parents.
703
704 Group those with the most call sites together (currently the
705 top 1.25% which have at least five different call sites).
706
707 These are emitted at the start of the function order.
708
709 * Use a greedy placement algorithm to place functions which
710 occur in the top 99% of the arcs in the profile. Some provisions
711 are made to handle high usage arcs where the parent and/or
712 child has already been placed.
713
714 * Run the same greedy placement algorithm on the remaining
715 arcs to place the leftover functions.
716
717
718 The various "magic numbers" should (one day) be tuneable by command
719 line options. They were arrived at by benchmarking a few applications
720 with various values to see which values produced better overall function
721 orderings.
722
723 Of course, profiling errors, machine limitations (PA long calls), and
724 poor cutoff values for the placement algorithm may limit the usefullness
725 of the resulting function order. Improvements would be greatly appreciated.
726
727 Suggestions:
728
729 * Place the functions with many callers near the middle of the
730 list to reduce long calls.
731
732 * Propagate arc usage changes as functions are placed. Ie if
733 func1 and func2 are placed together, arcs to/from those arcs
734 to the same parent/child should be combined, then resort the
735 arcs to choose the next one.
736
737 * Implement some global positioning algorithm to place the
738 chains made by the greedy local positioning algorithm. Probably
739 by examining arcs which haven't been placed yet to tie two
740 chains together.
741
742 * Take a function's size and time into account in the algorithm;
743 size in particular is important on the PA (long calls). Placing
744 many small functions onto their own page may be wise.
745
746 * Use better profiling information; many published algorithms
747 are based on call sequences through time, rather than just
748 arc counts.
749
750 * Prodecure cloning could improve performance when a small number
751 of arcs account for most of the calls to a particular function.
752
753 * Use relocation information to avoid moving unused functions
754 completely out of the code stream; this would avoid severe lossage
755 when the profile data bears little resemblance to actual runs.
756
757 * Propagation of arc usages should also improve .o link line
758 ordering which shares the same arc placement algorithm with
759 the function ordering code (in fact it is a degenerate case
760 of function ordering). */
761
762 void
763 DEFUN_VOID (cg_print_function_ordering)
764 {
765 unsigned long index, used, unused, scratch_index;
766 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
767 #ifdef __GNUC__
768 unsigned long long total_arcs, tmp_arcs_count;
769 #else
770 unsigned long total_arcs, tmp_arcs_count;
771 #endif
772 Sym **unused_syms, **used_syms, **scratch_syms;
773 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
774
775 index = 0;
776 used = 0;
777 unused = 0;
778 scratch_index = 0;
779 unplaced_arc_count = 0;
780 high_arc_count = 0;
781 scratch_arc_count = 0;
782
783 /* First group all the unused functions together. */
784 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
785 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
786 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
787 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
788 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
789 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
790
791 /* Walk through all the functions; mark those which are never
792 called as placed (we'll emit them as a group later). */
793 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
794 {
795 if (symtab.base[index].ncalls == 0)
796 {
797 /* Filter out gprof generated names. */
798 if (strcmp (symtab.base[index].name, "<locore>")
799 && strcmp (symtab.base[index].name, "<hicore>"))
800 {
801 unused_syms[unused++] = &symtab.base[index];
802 symtab.base[index].has_been_placed = 1;
803 }
804 }
805 else
806 {
807 used_syms[used++] = &symtab.base[index];
808 symtab.base[index].has_been_placed = 0;
809 symtab.base[index].next = 0;
810 symtab.base[index].prev = 0;
811 symtab.base[index].nuses = 0;
812 }
813 }
814
815 /* Sort the arcs from most used to least used. */
816 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
817
818 /* Compute the total arc count. Also mark arcs as unplaced.
819
820 Note we don't compensate for overflow if that happens!
821 Overflow is much less likely when this file is compiled
822 with GCC as it can double-wide integers via long long. */
823 total_arcs = 0;
824 for (index = 0; index < numarcs; index++)
825 {
826 total_arcs += arcs[index]->count;
827 arcs[index]->has_been_placed = 0;
828 }
829
830 /* We want to pull out those functions which are referenced
831 by many highly used arcs and emit them as a group. This
832 could probably use some tuning. */
833 tmp_arcs_count = 0;
834 for (index = 0; index < numarcs; index++)
835 {
836 tmp_arcs_count += arcs[index]->count;
837
838 /* Count how many times each parent and child are used up
839 to our threshhold of arcs (90%). */
840 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
841 break;
842
843 arcs[index]->child->nuses++;
844 }
845
846 /* Now sort a temporary symbol table based on the number of
847 times each function was used in the highest used arcs. */
848 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
849 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
850
851 /* Now pick out those symbols we're going to emit as
852 a group. We take up to 1.25% of the used symbols. */
853 for (index = 0; index < used / 80; index++)
854 {
855 Sym *sym = scratch_syms[index];
856 Arc *arc;
857
858 /* If we hit symbols that aren't used from many call sites,
859 then we can quit. We choose five as the low limit for
860 no particular reason. */
861 if (sym->nuses == 5)
862 break;
863
864 /* We're going to need the arcs between these functions.
865 Unfortunately, we don't know all these functions
866 until we're done. So we keep track of all the arcs
867 to the functions we care about, then prune out those
868 which are uninteresting.
869
870 An interesting variation would be to quit when we found
871 multi-call site functions which account for some percentage
872 of the arcs. */
873
874 arc = sym->cg.children;
875 while (arc)
876 {
877 if (arc->parent != arc->child)
878 scratch_arcs[scratch_arc_count++] = arc;
879 arc->has_been_placed = 1;
880 arc = arc->next_child;
881 }
882
883 arc = sym->cg.parents;
884 while (arc)
885 {
886 if (arc->parent != arc->child)
887 scratch_arcs[scratch_arc_count++] = arc;
888 arc->has_been_placed = 1;
889 arc = arc->next_parent;
890 }
891
892 /* Keep track of how many symbols we're going to place. */
893 scratch_index = index;
894
895 /* A lie, but it makes identifying these functions easier
896 later. */
897 sym->has_been_placed = 1;
898 }
899
900 /* Now walk through the temporary arcs and copy those we care about
901 into the high arcs array. */
902 for (index = 0; index < scratch_arc_count; index++)
903 {
904 Arc *arc = scratch_arcs[index];
905
906 /* If this arc refers to highly used functions, then
907 then we want to keep it. */
908 if (arc->child->has_been_placed
909 && arc->parent->has_been_placed)
910 {
911 high_arcs[high_arc_count++] = scratch_arcs[index];
912
913 /* We need to turn of has_been_placed since we're going to
914 use the main arc placement algorithm on these arcs. */
915 arc->child->has_been_placed = 0;
916 arc->parent->has_been_placed = 0;
917 }
918 }
919
920 /* Dump the multi-site high usage functions which are not going
921 to be ordered by the main ordering algorithm. */
922 for (index = 0; index < scratch_index; index++)
923 {
924 if (scratch_syms[index]->has_been_placed)
925 printf ("%s\n", scratch_syms[index]->name);
926 }
927
928 /* Now we can order the multi-site high use functions based on the
929 arcs between them. */
930 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
931 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
932 unplaced_arcs, &unplaced_arc_count);
933
934 /* Order and dump the high use functions left, these typically
935 have only a few call sites. */
936 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
937 unplaced_arcs, &unplaced_arc_count);
938
939 /* Now place the rarely used functions. */
940 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
941 scratch_arcs, &scratch_arc_count);
942
943 /* Output any functions not emitted by the order_and_dump calls. */
944 for (index = 0; index < used; index++)
945 if (used_syms[index]->has_been_placed == 0)
946 printf("%s\n", used_syms[index]->name);
947
948 /* Output the unused functions. */
949 for (index = 0; index < unused; index++)
950 printf("%s\n", unused_syms[index]->name);
951
952 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
953 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
954 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
955 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
956 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
957 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
958
959 free (unused_syms);
960 free (used_syms);
961 free (scratch_syms);
962 free (high_arcs);
963 free (scratch_arcs);
964 free (unplaced_arcs);
965 }
966
967 /* Place functions based on the arcs in ARCS with NUMARCS entries;
968 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
969
970 If ALL is nonzero, then place all functions referenced by ARCS,
971 else only place those referenced in the top 99% of the arcs in ARCS. */
972
973 #define MOST 0.99
974 static void
975 order_and_dump_functions_by_arcs (arcs, numarcs, all,
976 unplaced_arcs, unplaced_arc_count)
977 Arc **arcs;
978 unsigned long numarcs;
979 int all;
980 Arc **unplaced_arcs;
981 unsigned long *unplaced_arc_count;
982 {
983 #ifdef __GNUC__
984 unsigned long long tmp_arcs, total_arcs;
985 #else
986 unsigned long tmp_arcs, total_arcs;
987 #endif
988 unsigned int index;
989
990 /* If needed, compute the total arc count.
991
992 Note we don't compensate for overflow if that happens! */
993 if (! all)
994 {
995 total_arcs = 0;
996 for (index = 0; index < numarcs; index++)
997 total_arcs += arcs[index]->count;
998 }
999 else
1000 total_arcs = 0;
1001
1002 tmp_arcs = 0;
1003 for (index = 0; index < numarcs; index++)
1004 {
1005 Sym *sym1, *sym2;
1006 Sym *child, *parent;
1007
1008 tmp_arcs += arcs[index]->count;
1009
1010 /* Ignore this arc if it's already been placed. */
1011 if (arcs[index]->has_been_placed)
1012 continue;
1013
1014 child = arcs[index]->child;
1015 parent = arcs[index]->parent;
1016
1017 /* If we're not using all arcs, and this is a rarely used
1018 arc, then put it on the unplaced_arc list. Similarly
1019 if both the parent and child of this arc have been placed. */
1020 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1021 || child->has_been_placed || parent->has_been_placed)
1022 {
1023 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1024 continue;
1025 }
1026
1027 /* If all slots in the parent and child are full, then there isn't
1028 anything we can do right now. We'll place this arc on the
1029 unplaced arc list in the hope that a global positioning
1030 algorithm can use it to place function chains. */
1031 if (parent->next && parent->prev && child->next && child->prev)
1032 {
1033 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1034 continue;
1035 }
1036
1037 /* If the parent is unattached, then find the closest
1038 place to attach it onto child's chain. Similarly
1039 for the opposite case. */
1040 if (!parent->next && !parent->prev)
1041 {
1042 int next_count = 0;
1043 int prev_count = 0;
1044 Sym *prev = child;
1045 Sym *next = child;
1046
1047 /* Walk to the beginning and end of the child's chain. */
1048 while (next->next)
1049 {
1050 next = next->next;
1051 next_count++;
1052 }
1053
1054 while (prev->prev)
1055 {
1056 prev = prev->prev;
1057 prev_count++;
1058 }
1059
1060 /* Choose the closest. */
1061 child = next_count < prev_count ? next : prev;
1062 }
1063 else if (! child->next && !child->prev)
1064 {
1065 int next_count = 0;
1066 int prev_count = 0;
1067 Sym *prev = parent;
1068 Sym *next = parent;
1069
1070 while (next->next)
1071 {
1072 next = next->next;
1073 next_count++;
1074 }
1075
1076 while (prev->prev)
1077 {
1078 prev = prev->prev;
1079 prev_count++;
1080 }
1081
1082 parent = prev_count < next_count ? prev : next;
1083 }
1084 else
1085 {
1086 /* Couldn't find anywhere to attach the functions,
1087 put the arc on the unplaced arc list. */
1088 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1089 continue;
1090 }
1091
1092 /* Make sure we don't tie two ends together. */
1093 sym1 = parent;
1094 if (sym1->next)
1095 while (sym1->next)
1096 sym1 = sym1->next;
1097 else
1098 while (sym1->prev)
1099 sym1 = sym1->prev;
1100
1101 sym2 = child;
1102 if (sym2->next)
1103 while (sym2->next)
1104 sym2 = sym2->next;
1105 else
1106 while (sym2->prev)
1107 sym2 = sym2->prev;
1108
1109 if (sym1 == child
1110 && sym2 == parent)
1111 {
1112 /* This would tie two ends together. */
1113 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1114 continue;
1115 }
1116
1117 if (parent->next)
1118 {
1119 /* Must attach to the parent's prev field. */
1120 if (! child->next)
1121 {
1122 /* parent-prev and child-next */
1123 parent->prev = child;
1124 child->next = parent;
1125 arcs[index]->has_been_placed = 1;
1126 }
1127 }
1128 else if (parent->prev)
1129 {
1130 /* Must attach to the parent's next field. */
1131 if (! child->prev)
1132 {
1133 /* parent-next and child-prev */
1134 parent->next = child;
1135 child->prev = parent;
1136 arcs[index]->has_been_placed = 1;
1137 }
1138 }
1139 else
1140 {
1141 /* Can attach to either field in the parent, depends
1142 on where we've got space in the child. */
1143 if (child->prev)
1144 {
1145 /* parent-prev and child-next */
1146 parent->prev = child;
1147 child->next = parent;
1148 arcs[index]->has_been_placed = 1;
1149 }
1150 else
1151 {
1152 /* parent-next and child-prev */
1153 parent->next = child;
1154 child->prev = parent;
1155 arcs[index]->has_been_placed = 1;
1156 }
1157 }
1158 }
1159
1160 /* Dump the chains of functions we've made. */
1161 for (index = 0; index < numarcs; index++)
1162 {
1163 Sym *sym;
1164 if (arcs[index]->parent->has_been_placed
1165 || arcs[index]->child->has_been_placed)
1166 continue;
1167
1168 sym = arcs[index]->parent;
1169
1170 /* If this symbol isn't attached to any other
1171 symbols, then we've got a rarely used arc.
1172
1173 Skip it for now, we'll deal with them later. */
1174 if (sym->next == NULL
1175 && sym->prev == NULL)
1176 continue;
1177
1178 /* Get to the start of this chain. */
1179 while (sym->prev)
1180 sym = sym->prev;
1181
1182 while (sym)
1183 {
1184 /* Mark it as placed. */
1185 sym->has_been_placed = 1;
1186 printf ("%s\n", sym->name);
1187 sym = sym->next;
1188 }
1189 }
1190
1191 /* If we want to place all the arcs, then output those which weren't
1192 placed by the main algorithm. */
1193 if (all)
1194 for (index = 0; index < numarcs; index++)
1195 {
1196 Sym *sym;
1197 if (arcs[index]->parent->has_been_placed
1198 || arcs[index]->child->has_been_placed)
1199 continue;
1200
1201 sym = arcs[index]->parent;
1202
1203 sym->has_been_placed = 1;
1204 printf ("%s\n", sym->name);
1205 }
1206 }
1207
1208 /* Print a suggested .o ordering for files on a link line based
1209 on profiling information. This uses the function placement
1210 code for the bulk of its work. */
1211
1212 struct function_map {
1213 char *function_name;
1214 char *file_name;
1215 };
1216
1217 void
1218 DEFUN_VOID (cg_print_file_ordering)
1219 {
1220 unsigned long scratch_arc_count, index;
1221 Arc **scratch_arcs;
1222 extern struct function_map *symbol_map;
1223 extern int symbol_map_count;
1224 char *last;
1225
1226 scratch_arc_count = 0;
1227
1228 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1229 for (index = 0; index < numarcs; index++)
1230 {
1231 if (! arcs[index]->parent->mapped
1232 || ! arcs[index]->child->mapped)
1233 arcs[index]->has_been_placed = 1;
1234 }
1235
1236 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1237 scratch_arcs, &scratch_arc_count);
1238
1239 /* Output .o's not handled by the main placement algorithm. */
1240 for (index = 0; index < symtab.len; index++)
1241 {
1242 if (symtab.base[index].mapped
1243 && ! symtab.base[index].has_been_placed)
1244 printf ("%s\n", symtab.base[index].name);
1245 }
1246
1247 /* Now output any .o's that didn't have any text symbols. */
1248 last = NULL;
1249 for (index = 0; index < symbol_map_count; index++)
1250 {
1251 int index2;
1252
1253 /* Don't bother searching if this symbol is the
1254 same as the previous one. */
1255 if (last && !strcmp (last, symbol_map[index].file_name))
1256 continue;
1257
1258 for (index2 = 0; index2 < symtab.len; index2++)
1259 {
1260 if (! symtab.base[index2].mapped)
1261 continue;
1262
1263 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1264 break;
1265 }
1266
1267 /* If we didn't find it in the symbol table, then it must be a .o
1268 with no text symbols. Output it last. */
1269 if (index2 == symtab.len)
1270 printf ("%s\n", symbol_map[index].file_name);
1271 last = symbol_map[index].file_name;
1272 }
1273 }
This page took 0.078915 seconds and 5 git commands to generate.