* gprof.c (long_options): Add "--function-ordering" and
[deliverable/binutils-gdb.git] / gprof / cg_print.c
CommitLineData
5489fcc3
KR
1#include "libiberty.h"
2#include "cg_arcs.h"
3#include "cg_print.h"
4#include "hist.h"
5#include "utils.h"
6
7/*
8 * Return value of comparison functions used to sort tables:
9 */
10#define LESSTHAN -1
11#define EQUALTO 0
12#define GREATERTHAN 1
13
64c50fc5
JL
14static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
15 int, Arc **,
16 unsigned long *));
5489fcc3 17/* declarations of automatically generated functions to output blurbs: */
12516a37
KR
18extern void bsd_callg_blurb PARAMS ((FILE * fp));
19extern void fsf_callg_blurb PARAMS ((FILE * fp));
5489fcc3
KR
20
21double print_time = 0.0;
22
23
24static void
12516a37 25DEFUN_VOID (print_header)
5489fcc3 26{
12516a37
KR
27 if (first_output)
28 {
29 first_output = FALSE;
30 }
31 else
32 {
33 printf ("\f\n");
03c35bcb 34 }
12516a37
KR
35 if (!bsd_style_output)
36 {
37 if (print_descriptions)
38 {
39 printf ("\t\t Call graph (explanation follows)\n\n");
40 }
41 else
42 {
43 printf ("\t\t\tCall graph\n\n");
03c35bcb
KR
44 }
45 }
12516a37
KR
46 printf ("\ngranularity: each sample hit covers %ld byte(s)",
47 (long) hist_scale * sizeof (UNIT));
48 if (print_time > 0.0)
49 {
50 printf (" for %.2f%% of %.2f seconds\n\n",
51 100.0 / print_time, print_time / hz);
52 }
53 else
54 {
55 printf (" no time propagated\n\n");
56 /*
57 * This doesn't hurt, since all the numerators will be 0.0:
58 */
59 print_time = 1.0;
03c35bcb 60 }
12516a37
KR
61 if (bsd_style_output)
62 {
63 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
64 "", "", "", "", "called", "total", "parents");
65 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
66 "index", "%time", "self", "descendents",
67 "called", "self", "name", "index");
68 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
69 "", "", "", "", "called", "total", "children");
70 printf ("\n");
71 }
72 else
73 {
74 printf ("index %% time self children called name\n");
03c35bcb
KR
75 }
76}
5489fcc3
KR
77
78
79/*
80 * Print a cycle header.
81 */
82static void
12516a37 83DEFUN (print_cycle, (cyc), Sym * cyc)
5489fcc3 84{
12516a37 85 char buf[BUFSIZ];
5489fcc3 86
12516a37 87 sprintf (buf, "[%d]", cyc->cg.index);
869b94c5
KR
88 printf (bsd_style_output
89 ? "%-6.6s %5.1f %7.2f %11.2f %7d"
90 : "%-6.6s %5.1f %7.2f %7.2f %7d", buf,
12516a37
KR
91 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
92 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
93 if (cyc->cg.self_calls != 0)
94 {
95 printf ("+%-7d", cyc->cg.self_calls);
96 }
97 else
98 {
99 printf (" %7.7s", "");
03c35bcb 100 }
869b94c5 101 printf (" <cycle %d as a whole> [%d]\n", cyc->cg.cyc.num, cyc->cg.index);
03c35bcb 102}
5489fcc3
KR
103
104
105/*
106 * Compare LEFT and RIGHT membmer. Major comparison key is
107 * CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.
108 */
109static int
12516a37 110DEFUN (cmp_member, (left, right), Sym * left AND Sym * right)
5489fcc3 111{
12516a37
KR
112 double left_time = left->cg.prop.self + left->cg.prop.child;
113 double right_time = right->cg.prop.self + right->cg.prop.child;
114 long left_calls = left->ncalls + left->cg.self_calls;
115 long right_calls = right->ncalls + right->cg.self_calls;
116
117 if (left_time > right_time)
118 {
119 return GREATERTHAN;
03c35bcb 120 }
12516a37
KR
121 if (left_time < right_time)
122 {
123 return LESSTHAN;
03c35bcb 124 }
12516a37
KR
125
126 if (left_calls > right_calls)
127 {
128 return GREATERTHAN;
03c35bcb 129 }
12516a37
KR
130 if (left_calls < right_calls)
131 {
132 return LESSTHAN;
03c35bcb 133 }
12516a37 134 return EQUALTO;
03c35bcb 135}
5489fcc3
KR
136
137
138/*
139 * Sort members of a cycle.
140 */
141static void
12516a37 142DEFUN (sort_members, (cyc), Sym * cyc)
5489fcc3 143{
12516a37
KR
144 Sym *todo, *doing, *prev;
145 /*
146 * Detach cycle members from cyclehead, and insertion sort them
147 * back on.
148 */
149 todo = cyc->cg.cyc.next;
150 cyc->cg.cyc.next = 0;
151 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
152 {
153 todo = doing->cg.cyc.next;
154 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
155 {
156 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
157 {
158 break;
03c35bcb
KR
159 }
160 }
12516a37
KR
161 doing->cg.cyc.next = prev->cg.cyc.next;
162 prev->cg.cyc.next = doing;
03c35bcb
KR
163 }
164}
5489fcc3
KR
165
166
167/*
168 * Print the members of a cycle.
169 */
170static void
12516a37 171DEFUN (print_members, (cyc), Sym * cyc)
5489fcc3 172{
12516a37
KR
173 Sym *member;
174
175 sort_members (cyc);
176 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
177 {
869b94c5
KR
178 printf (bsd_style_output
179 ? "%6.6s %5.5s %7.2f %11.2f %7d"
180 : "%6.6s %5.5s %7.2f %7.2f %7d",
12516a37
KR
181 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
182 member->ncalls);
183 if (member->cg.self_calls != 0)
184 {
185 printf ("+%-7d", member->cg.self_calls);
186 }
187 else
188 {
189 printf (" %7.7s", "");
03c35bcb 190 }
12516a37
KR
191 printf (" ");
192 print_name (member);
193 printf ("\n");
03c35bcb
KR
194 }
195}
5489fcc3
KR
196
197
198/*
199 * Compare two arcs to/from the same child/parent.
12516a37
KR
200 * - if one arc is a self arc, it's least.
201 * - if one arc is within a cycle, it's less than.
202 * - if both arcs are within a cycle, compare arc counts.
203 * - if neither arc is within a cycle, compare with
204 * time + child_time as major key
205 * arc count as minor key
5489fcc3
KR
206 */
207static int
12516a37 208DEFUN (cmp_arc, (left, right), Arc * left AND Arc * right)
5489fcc3 209{
12516a37
KR
210 Sym *left_parent = left->parent;
211 Sym *left_child = left->child;
212 Sym *right_parent = right->parent;
213 Sym *right_child = right->child;
214 double left_time, right_time;
215
216 DBG (TIMEDEBUG,
217 printf ("[cmp_arc] ");
218 print_name (left_parent);
219 printf (" calls ");
220 print_name (left_child);
221 printf (" %f + %f %d/%d\n", left->time, left->child_time,
5489fcc3 222 left->count, left_child->ncalls);
12516a37
KR
223 printf ("[cmp_arc] ");
224 print_name (right_parent);
225 printf (" calls ");
226 print_name (right_child);
227 printf (" %f + %f %d/%d\n", right->time, right->child_time,
5489fcc3 228 right->count, right_child->ncalls);
12516a37
KR
229 printf ("\n");
230 );
231 if (left_parent == left_child)
232 {
233 return LESSTHAN; /* left is a self call */
03c35bcb 234 }
12516a37
KR
235 if (right_parent == right_child)
236 {
237 return GREATERTHAN; /* right is a self call */
03c35bcb 238 }
12516a37
KR
239
240 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
241 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
242 {
243 /* left is a call within a cycle */
244 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
245 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
246 {
247 /* right is a call within the cycle, too */
248 if (left->count < right->count)
249 {
250 return LESSTHAN;
03c35bcb 251 }
12516a37
KR
252 if (left->count > right->count)
253 {
254 return GREATERTHAN;
03c35bcb 255 }
12516a37
KR
256 return EQUALTO;
257 }
258 else
5489fcc3 259 {
12516a37
KR
260 /* right isn't a call within the cycle */
261 return LESSTHAN;
03c35bcb 262 }
12516a37
KR
263 }
264 else
265 {
266 /* left isn't a call within a cycle */
267 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
268 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
5489fcc3 269 {
12516a37
KR
270 /* right is a call within a cycle */
271 return GREATERTHAN;
272 }
273 else
274 {
275 /* neither is a call within a cycle */
276 left_time = left->time + left->child_time;
277 right_time = right->time + right->child_time;
278 if (left_time < right_time)
279 {
280 return LESSTHAN;
03c35bcb 281 }
12516a37
KR
282 if (left_time > right_time)
283 {
284 return GREATERTHAN;
03c35bcb 285 }
12516a37
KR
286 if (left->count < right->count)
287 {
288 return LESSTHAN;
03c35bcb 289 }
12516a37
KR
290 if (left->count > right->count)
291 {
292 return GREATERTHAN;
03c35bcb 293 }
12516a37 294 return EQUALTO;
03c35bcb
KR
295 }
296 }
297}
5489fcc3
KR
298
299
300static void
12516a37 301DEFUN (sort_parents, (child), Sym * child)
5489fcc3 302{
12516a37
KR
303 Arc *arc, *detached, sorted, *prev;
304
305 /*
306 * Unlink parents from child, then insertion sort back on to
307 * sorted's parents.
308 * *arc the arc you have detached and are inserting.
309 * *detached the rest of the arcs to be sorted.
310 * sorted arc list onto which you insertion sort.
311 * *prev arc before the arc you are comparing.
312 */
313 sorted.next_parent = 0;
314 for (arc = child->cg.parents; arc; arc = detached)
315 {
316 detached = arc->next_parent;
317
318 /* consider *arc as disconnected; insert it into sorted: */
319 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
320 {
321 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
322 {
323 break;
03c35bcb
KR
324 }
325 }
12516a37
KR
326 arc->next_parent = prev->next_parent;
327 prev->next_parent = arc;
03c35bcb 328 }
12516a37
KR
329
330 /* reattach sorted arcs to child: */
331 child->cg.parents = sorted.next_parent;
03c35bcb 332}
5489fcc3
KR
333
334
335static void
12516a37 336DEFUN (print_parents, (child), Sym * child)
5489fcc3 337{
12516a37
KR
338 Sym *parent;
339 Arc *arc;
340 Sym *cycle_head;
341
342 if (child->cg.cyc.head != 0)
343 {
344 cycle_head = child->cg.cyc.head;
345 }
346 else
347 {
348 cycle_head = child;
03c35bcb 349 }
12516a37
KR
350 if (!child->cg.parents)
351 {
352 printf (bsd_style_output
353 ? "%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n"
354 : "%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n",
355 "", "", "", "", "", "");
356 return;
03c35bcb 357 }
12516a37
KR
358 sort_parents (child);
359 for (arc = child->cg.parents; arc; arc = arc->next_parent)
360 {
361 parent = arc->parent;
362 if (child == parent || (child->cg.cyc.num != 0
363 && parent->cg.cyc.num == child->cg.cyc.num))
5489fcc3 364 {
12516a37
KR
365 /* selfcall or call among siblings: */
366 printf (bsd_style_output
367 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
368 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
369 "", "", "", "",
370 arc->count, "");
371 print_name (parent);
372 printf ("\n");
373 }
374 else
375 {
376 /* regular parent of child: */
377 printf (bsd_style_output
378 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
379 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
380 "", "",
381 arc->time / hz, arc->child_time / hz,
382 arc->count, cycle_head->ncalls);
383 print_name (parent);
384 printf ("\n");
03c35bcb
KR
385 }
386 }
387}
5489fcc3
KR
388
389
390static void
12516a37 391DEFUN (sort_children, (parent), Sym * parent)
5489fcc3 392{
12516a37
KR
393 Arc *arc, *detached, sorted, *prev;
394 /*
395 * Unlink children from parent, then insertion sort back on to
396 * sorted's children.
397 * *arc the arc you have detached and are inserting.
398 * *detached the rest of the arcs to be sorted.
399 * sorted arc list onto which you insertion sort.
400 * *prev arc before the arc you are comparing.
401 */
402 sorted.next_child = 0;
403 for (arc = parent->cg.children; arc; arc = detached)
404 {
405 detached = arc->next_child;
406
407 /* consider *arc as disconnected; insert it into sorted: */
408 for (prev = &sorted; prev->next_child; prev = prev->next_child)
409 {
410 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
411 {
412 break;
03c35bcb
KR
413 }
414 }
12516a37
KR
415 arc->next_child = prev->next_child;
416 prev->next_child = arc;
03c35bcb 417 }
12516a37
KR
418
419 /* reattach sorted children to parent: */
420 parent->cg.children = sorted.next_child;
03c35bcb 421}
5489fcc3
KR
422
423
424static void
12516a37 425DEFUN (print_children, (parent), Sym * parent)
5489fcc3 426{
12516a37
KR
427 Sym *child;
428 Arc *arc;
429
430 sort_children (parent);
431 arc = parent->cg.children;
432 for (arc = parent->cg.children; arc; arc = arc->next_child)
433 {
434 child = arc->child;
435 if (child == parent || (child->cg.cyc.num != 0
436 && child->cg.cyc.num == parent->cg.cyc.num))
437 {
438 /* self call or call to sibling: */
439 printf (bsd_style_output
440 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
441 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
442 "", "", "", "", arc->count, "");
443 print_name (child);
444 printf ("\n");
445 }
446 else
5489fcc3 447 {
12516a37
KR
448 /* regular child of parent: */
449 printf (bsd_style_output
450 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
451 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
452 "", "",
453 arc->time / hz, arc->child_time / hz,
454 arc->count, child->cg.cyc.head->ncalls);
455 print_name (child);
456 printf ("\n");
03c35bcb
KR
457 }
458 }
459}
5489fcc3
KR
460
461
462static void
12516a37 463DEFUN (print_line, (np), Sym * np)
5489fcc3 464{
12516a37
KR
465 char buf[BUFSIZ];
466
467 sprintf (buf, "[%d]", np->cg.index);
468 printf (bsd_style_output
469 ? "%-6.6s %5.1f %7.2f %11.2f"
470 : "%-6.6s %5.1f %7.2f %7.2f", buf,
471 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
472 np->cg.prop.self / hz, np->cg.prop.child / hz);
473 if ((np->ncalls + np->cg.self_calls) != 0)
474 {
475 printf (" %7d", np->ncalls);
476 if (np->cg.self_calls != 0)
477 {
478 printf ("+%-7d ", np->cg.self_calls);
479 }
480 else
481 {
482 printf (" %7.7s ", "");
03c35bcb 483 }
12516a37
KR
484 }
485 else
486 {
487 printf (" %7.7s %7.7s ", "", "");
03c35bcb 488 }
12516a37
KR
489 print_name (np);
490 printf ("\n");
03c35bcb 491}
5489fcc3
KR
492
493
494/*
495 * Print dynamic call graph.
496 */
497void
12516a37 498DEFUN (cg_print, (timesortsym), Sym ** timesortsym)
5489fcc3 499{
12516a37
KR
500 int index;
501 Sym *parent;
5489fcc3 502
12516a37
KR
503 if (print_descriptions && bsd_style_output)
504 {
505 bsd_callg_blurb (stdout);
03c35bcb 506 }
5489fcc3 507
12516a37 508 print_header ();
5489fcc3 509
12516a37
KR
510 for (index = 0; index < symtab.len + num_cycles; ++index)
511 {
512 parent = timesortsym[index];
513 if ((ignore_zeros && parent->ncalls == 0
514 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
515 && parent->cg.prop.child == 0)
516 || !parent->cg.print_flag)
517 {
518 continue;
03c35bcb 519 }
12516a37 520 if (!parent->name && parent->cg.cyc.num != 0)
5489fcc3 521 {
12516a37
KR
522 /* cycle header: */
523 print_cycle (parent);
524 print_members (parent);
525 }
526 else
527 {
528 print_parents (parent);
529 print_line (parent);
530 print_children (parent);
03c35bcb 531 }
12516a37
KR
532 if (bsd_style_output)
533 printf ("\n");
534 printf ("-----------------------------------------------\n");
535 if (bsd_style_output)
536 printf ("\n");
5489fcc3 537 }
12516a37
KR
538 free (timesortsym);
539 if (print_descriptions && !bsd_style_output)
540 {
541 fsf_callg_blurb (stdout);
5489fcc3 542 }
03c35bcb 543}
5489fcc3
KR
544
545
546static int
12516a37 547DEFUN (cmp_name, (left, right), const PTR left AND const PTR right)
5489fcc3 548{
12516a37
KR
549 const Sym **npp1 = (const Sym **) left;
550 const Sym **npp2 = (const Sym **) right;
5489fcc3 551
12516a37 552 return strcmp ((*npp1)->name, (*npp2)->name);
03c35bcb 553}
5489fcc3
KR
554
555
556void
12516a37 557DEFUN_VOID (cg_print_index)
5489fcc3 558{
12516a37
KR
559 int index, nnames, todo, i, j, col, starting_col;
560 Sym **name_sorted_syms, *sym;
561 const char *filename;
562 char buf[20];
563 int column_width = (output_width - 1) / 3; /* don't write in last col! */
564 /*
565 * Now, sort regular function name alphabetically to create an
566 * index:
567 */
568 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
569 for (index = 0, nnames = 0; index < symtab.len; index++)
570 {
571 if (ignore_zeros && symtab.base[index].ncalls == 0
572 && symtab.base[index].hist.time == 0)
573 {
574 continue;
03c35bcb 575 }
12516a37 576 name_sorted_syms[nnames++] = &symtab.base[index];
03c35bcb 577 }
12516a37
KR
578 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
579 for (index = 1, todo = nnames; index <= num_cycles; index++)
580 {
581 name_sorted_syms[todo++] = &cycle_header[index];
03c35bcb 582 }
12516a37
KR
583 printf ("\f\nIndex by function name\n\n");
584 index = (todo + 2) / 3;
585 for (i = 0; i < index; i++)
586 {
587 col = 0;
588 starting_col = 0;
589 for (j = i; j < todo; j += index)
5489fcc3 590 {
12516a37
KR
591 sym = name_sorted_syms[j];
592 if (sym->cg.print_flag)
593 {
594 sprintf (buf, "[%d]", sym->cg.index);
595 }
596 else
597 {
598 sprintf (buf, "(%d)", sym->cg.index);
03c35bcb 599 }
12516a37
KR
600 if (j < nnames)
601 {
602 if (bsd_style_output)
603 {
604 printf ("%6.6s %-19.19s", buf, sym->name);
605 }
606 else
607 {
608 col += strlen (buf);
609 for (; col < starting_col + 5; ++col)
610 {
611 putchar (' ');
03c35bcb 612 }
12516a37
KR
613 printf (" %s ", buf);
614 col += print_name_only (sym);
615 if (!line_granularity && sym->is_static && sym->file)
616 {
617 filename = sym->file->name;
618 if (!print_path)
619 {
620 filename = strrchr (filename, '/');
621 if (filename)
622 {
623 ++filename;
624 }
625 else
626 {
627 filename = sym->file->name;
03c35bcb
KR
628 }
629 }
12516a37
KR
630 printf (" (%s)", filename);
631 col += strlen (filename) + 3;
03c35bcb
KR
632 }
633 }
12516a37
KR
634 }
635 else
636 {
869b94c5
KR
637 if (bsd_style_output)
638 {
639 printf ("%6.6s ", buf);
640 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
641 printf ("%-19.19s", buf);
642 }
643 else
644 {
645 col += strlen (buf);
646 for (; col < starting_col + 5; ++col)
647 putchar (' ');
648 printf (" %s ", buf);
649 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
650 printf ("%s", buf);
651 col += strlen (buf);
652 }
03c35bcb 653 }
12516a37 654 starting_col += column_width;
03c35bcb 655 }
12516a37 656 printf ("\n");
03c35bcb 657 }
12516a37 658 free (name_sorted_syms);
03c35bcb 659}
64c50fc5
JL
660
661/* Compare two arcs based on their usage counts. We want to sort
662 in descending order. */
663static int
664DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
665{
666 const Arc **npp1 = (const Arc **) left;
667 const Arc **npp2 = (const Arc **) right;
668
669 if ((*npp1)->count > (*npp2)->count)
670 return -1;
671 else if ((*npp1)->count < (*npp2)->count)
672 return 1;
673 else
674 return 0;
675}
676
677/* Compare two funtions based on their usage counts. We want to sort
678 in descending order. */
679static int
680DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
681{
682 const Sym **npp1 = (const Sym **) left;
683 const Sym **npp2 = (const Sym **) right;
684
685 if ((*npp1)->nuses > (*npp2)->nuses)
686 return -1;
687 else if ((*npp1)->nuses < (*npp2)->nuses)
688 return 1;
689 else
690 return 0;
691}
692
693/* Print a suggested function ordering based on the profiling data.
694
695 We perform 4 major steps when ordering functions:
696
697 * Group unused functions together and place them at the
698 end of the function order.
699
700 * Search the highest use arcs (those which account for 90% of
701 the total arc count) for functions which have several parents.
702
703 Group those with the most call sites together (currently the
704 top 1.25% which have at least five different call sites).
705
706 These are emitted at the start of the function order.
707
708 * Use a greedy placement algorithm to place functions which
709 occur in the top 99% of the arcs in the profile. Some provisions
710 are made to handle high usage arcs where the parent and/or
711 child has already been placed.
712
713 * Run the same greedy placement algorithm on the remaining
714 arcs to place the leftover functions.
715
716
717 The various "magic numbers" should (one day) be tuneable by command
718 line options. They were arrived at by benchmarking a few applications
719 with various values to see which values produced better overall function
720 orderings.
721
722 Of course, profiling errors, machine limitations (PA long calls), and
723 poor cutoff values for the placement algorithm may limit the usefullness
724 of the resulting function order. Improvements would be greatly appreciated.
725
726 Suggestions:
727
728 * Place the functions with many callers near the middle of the
729 list to reduce long calls.
730
731 * Propagate arc usage changes as functions are placed. Ie if
732 func1 and func2 are placed together, arcs to/from those arcs
733 to the same parent/child should be combined, then resort the
734 arcs to choose the next one.
735
736 * Implement some global positioning algorithm to place the
737 chains made by the greedy local positioning algorithm. Probably
738 by examining arcs which haven't been placed yet to tie two
739 chains together.
740
741 * Take a function's size and time into account in the algorithm;
742 size in particular is important on the PA (long calls). Placing
743 many small functions onto their own page may be wise.
744
745 * Use better profiling information; many published algorithms
746 are based on call sequences through time, rather than just
747 arc counts.
748
749 * Prodecure cloning could improve performance when a small number
750 of arcs account for most of the calls to a particular function.
751
752 * Use relocation information to avoid moving unused functions
753 completely out of the code stream; this would avoid severe lossage
754 when the profile data bears little resemblance to actual runs.
755
756 * Propagation of arc usages should also improve .o link line
757 ordering which shares the same arc placement algorithm with
758 the function ordering code (in fact it is a degenerate case
759 of function ordering). */
760
761void
762DEFUN_VOID (cg_print_function_ordering)
763{
764 unsigned long index, used, unused, scratch_index;
765 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
766#ifdef __GNU_C__
767 unsigned long long total_arcs, tmp_arcs_count;
768#else
769 unsigned long total_arcs, tmp_arcs_count;
770#endif
771 Sym **unused_syms, **used_syms, **scratch_syms;
772 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
773
774 index = 0;
775 used = 0;
776 unused = 0;
777 scratch_index = 0;
778 unplaced_arc_count = 0;
779 high_arc_count = 0;
780 scratch_arc_count = 0;
781
782 /* First group all the unused functions together. */
783 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
784 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
785 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
786 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
787 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
788 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
789
790 /* Walk through all the functions; mark those which are never
791 called as placed (we'll emit them as a group later). */
792 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
793 {
794 if (symtab.base[index].ncalls == 0)
795 {
796 /* Filter out gprof generated names. */
797 if (strcmp (symtab.base[index].name, "<locore>")
798 && strcmp (symtab.base[index].name, "<hicore>"))
799 {
800 unused_syms[unused++] = &symtab.base[index];
801 symtab.base[index].has_been_placed = 1;
802 }
803 }
804 else
805 {
806 used_syms[used++] = &symtab.base[index];
807 symtab.base[index].has_been_placed = 0;
808 symtab.base[index].next = 0;
809 symtab.base[index].prev = 0;
810 symtab.base[index].nuses = 0;
811 }
812 }
813
814 /* Sort the arcs from most used to least used. */
815 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
816
817 /* Compute the total arc count. Also mark arcs as unplaced.
818
819 Note we don't compensate for overflow if that happens!
820 Overflow is much less likely when this file is compiled
821 with GCC as it can double-wide integers via long long. */
822 total_arcs = 0;
823 for (index = 0; index < numarcs; index++)
824 {
825 total_arcs += arcs[index]->count;
826 arcs[index]->has_been_placed = 0;
827 }
828
829 /* We want to pull out those functions which are referenced
830 by many highly used arcs and emit them as a group. This
831 could probably use some tuning. */
832 tmp_arcs_count = 0;
833 for (index = 0; index < numarcs; index++)
834 {
835 tmp_arcs_count += arcs[index]->count;
836
837 /* Count how many times each parent and child are used up
838 to our threshhold of arcs (90%). */
839 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
840 break;
841
842 arcs[index]->child->nuses++;
843 }
844
845 /* Now sort a temporary symbol table based on the number of
846 times each function was used in the highest used arcs. */
847 bcopy (used_syms, scratch_syms, used * sizeof (Sym *));
848 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
849
850 /* Now pick out those symbols we're going to emit as
851 a group. We take up to 1.25% of the used symbols. */
852 for (index = 0; index < used / 80; index++)
853 {
854 Sym *sym = scratch_syms[index];
855 Arc *arc;
856
857 /* If we hit symbols that aren't used from many call sites,
858 then we can quit. We choose five as the low limit for
859 no particular reason. */
860 if (sym->nuses == 5)
861 break;
862
863 /* We're going to need the arcs between these functions.
864 Unfortunately, we don't know all these functions
865 until we're done. So we keep track of all the arcs
866 to the functions we care about, then prune out those
867 which are uninteresting.
868
869 An interesting variation would be to quit when we found
870 multi-call site functions which account for some percentage
871 of the arcs. */
872
873 arc = sym->cg.children;
874 while (arc)
875 {
876 if (arc->parent != arc->child)
877 scratch_arcs[scratch_arc_count++] = arc;
878 arc->has_been_placed = 1;
879 arc = arc->next_child;
880 }
881
882 arc = sym->cg.parents;
883 while (arc)
884 {
885 if (arc->parent != arc->child)
886 scratch_arcs[scratch_arc_count++] = arc;
887 arc->has_been_placed = 1;
888 arc = arc->next_parent;
889 }
890
891 /* Keep track of how many symbols we're going to place. */
892 scratch_index = index;
893
894 /* A lie, but it makes identifying these functions easier
895 later. */
896 sym->has_been_placed = 1;
897 }
898
899 /* Now walk through the temporary arcs and copy those we care about
900 into the high arcs array. */
901 for (index = 0; index < scratch_arc_count; index++)
902 {
903 Arc *arc = scratch_arcs[index];
904
905 /* If this arc refers to highly used functions, then
906 then we want to keep it. */
907 if (arc->child->has_been_placed
908 && arc->parent->has_been_placed)
909 {
910 high_arcs[high_arc_count++] = scratch_arcs[index];
911
912 /* We need to turn of has_been_placed since we're going to
913 use the main arc placement algorithm on these arcs. */
914 arc->child->has_been_placed = 0;
915 arc->parent->has_been_placed = 0;
916 }
917 }
918
919 /* Dump the multi-site high usage functions which are not going
920 to be ordered by the main ordering algorithm. */
921 for (index = 0; index < scratch_index; index++)
922 {
923 if (scratch_syms[index]->has_been_placed)
924 printf ("%s\n", scratch_syms[index]->name);
925 }
926
927 /* Now we can order the multi-site high use functions based on the
928 arcs between them. */
929 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
930 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
931 unplaced_arcs, &unplaced_arc_count);
932
933 /* Order and dump the high use functions left, these typically
934 have only a few call sites. */
935 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
936 unplaced_arcs, &unplaced_arc_count);
937
938 /* Now place the rarely used functions. */
939 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
940 scratch_arcs, &scratch_arc_count);
941
942 /* Output any functions not emitted by the order_and_dump calls. */
943 for (index = 0; index < used; index++)
944 if (used_syms[index]->has_been_placed == 0)
945 printf("%s\n", used_syms[index]->name);
946
947 /* Output the unused functions. */
948 for (index = 0; index < unused; index++)
949 printf("%s\n", unused_syms[index]->name);
950
951 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
952 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
953 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
954 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
955 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
956 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
957
958 free (unused_syms);
959 free (used_syms);
960 free (scratch_syms);
961 free (high_arcs);
962 free (scratch_arcs);
963 free (unplaced_arcs);
964}
965
966/* Place functions based on the arcs in ARCS with NUMARCS entries;
967 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
968
969 If ALL is nonzero, then place all functions referenced by ARCS,
970 else only place those referenced in the top 99% of the arcs in ARCS. */
971
972#define MOST 0.99
973static void
974order_and_dump_functions_by_arcs (arcs, numarcs, all,
975 unplaced_arcs, unplaced_arc_count)
976 Arc **arcs;
977 unsigned long numarcs;
978 int all;
979 Arc **unplaced_arcs;
980 unsigned long *unplaced_arc_count;
981{
982#ifdef __GNU_C__
983 unsigned long long tmp_arcs, total_arcs;
984#else
985 unsigned long tmp_arcs, total_arcs;
986#endif
987 unsigned int index;
988
989 /* If needed, compute the total arc count.
990
991 Note we don't compensate for overflow if that happens! */
992 if (! all)
993 {
994 total_arcs = 0;
995 for (index = 0; index < numarcs; index++)
996 total_arcs += arcs[index]->count;
997 }
998 else
999 total_arcs = 0;
1000
1001 tmp_arcs = 0;
1002 for (index = 0; index < numarcs; index++)
1003 {
1004 Sym *sym1, *sym2;
1005 Sym *child, *parent;
1006
1007 tmp_arcs += arcs[index]->count;
1008
1009 /* Ignore this arc if it's already been placed. */
1010 if (arcs[index]->has_been_placed)
1011 continue;
1012
1013 child = arcs[index]->child;
1014 parent = arcs[index]->parent;
1015
1016 /* If we're not using all arcs, and this is a rarely used
1017 arc, then put it on the unplaced_arc list. Similarly
1018 if both the parent and child of this arc have been placed. */
1019 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1020 || child->has_been_placed || parent->has_been_placed)
1021 {
1022 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1023 continue;
1024 }
1025
1026 /* If all slots in the parent and child are full, then there isn't
1027 anything we can do right now. We'll place this arc on the
1028 unplaced arc list in the hope that a global positioning
1029 algorithm can use it to place function chains. */
1030 if (parent->next && parent->prev && child->next && child->prev)
1031 {
1032 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1033 continue;
1034 }
1035
1036 /* If the parent is unattached, then find the closest
1037 place to attach it onto child's chain. Similarly
1038 for the opposite case. */
1039 if (!parent->next && !parent->prev)
1040 {
1041 int next_count = 0;
1042 int prev_count = 0;
1043 Sym *prev = child;
1044 Sym *next = child;
1045
1046 /* Walk to the beginning and end of the child's chain. */
1047 while (next->next)
1048 {
1049 next = next->next;
1050 next_count++;
1051 }
1052
1053 while (prev->prev)
1054 {
1055 prev = prev->prev;
1056 prev_count++;
1057 }
1058
1059 /* Choose the closest. */
1060 child = next_count < prev_count ? next : prev;
1061 }
1062 else if (! child->next && !child->prev)
1063 {
1064 int next_count = 0;
1065 int prev_count = 0;
1066 Sym *prev = parent;
1067 Sym *next = parent;
1068
1069 while (next->next)
1070 {
1071 next = next->next;
1072 next_count++;
1073 }
1074
1075 while (prev->prev)
1076 {
1077 prev = prev->prev;
1078 prev_count++;
1079 }
1080
1081 parent = prev_count < next_count ? prev : next;
1082 }
1083 else
1084 {
1085 /* Couldn't find anywhere to attach the functions,
1086 put the arc on the unplaced arc list. */
1087 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1088 continue;
1089 }
1090
1091 /* Make sure we don't tie two ends together. */
1092 sym1 = parent;
1093 if (sym1->next)
1094 while (sym1->next)
1095 sym1 = sym1->next;
1096 else
1097 while (sym1->prev)
1098 sym1 = sym1->prev;
1099
1100 sym2 = child;
1101 if (sym2->next)
1102 while (sym2->next)
1103 sym2 = sym2->next;
1104 else
1105 while (sym2->prev)
1106 sym2 = sym2->prev;
1107
1108 if (sym1 == child
1109 && sym2 == parent)
1110 {
1111 /* This would tie two ends together. */
1112 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1113 continue;
1114 }
1115
1116 if (parent->next)
1117 {
1118 /* Must attach to the parent's prev field. */
1119 if (! child->next)
1120 {
1121 /* parent-prev and child-next */
1122 parent->prev = child;
1123 child->next = parent;
1124 arcs[index]->has_been_placed = 1;
1125 }
1126 }
1127 else if (parent->prev)
1128 {
1129 /* Must attach to the parent's next field. */
1130 if (! child->prev)
1131 {
1132 /* parent-next and child-prev */
1133 parent->next = child;
1134 child->prev = parent;
1135 arcs[index]->has_been_placed = 1;
1136 }
1137 }
1138 else
1139 {
1140 /* Can attach to either field in the parent, depends
1141 on where we've got space in the child. */
1142 if (child->prev)
1143 {
1144 /* parent-prev and child-next */
1145 parent->prev = child;
1146 child->next = parent;
1147 arcs[index]->has_been_placed = 1;
1148 }
1149 else
1150 {
1151 /* parent-next and child-prev */
1152 parent->next = child;
1153 child->prev = parent;
1154 arcs[index]->has_been_placed = 1;
1155 }
1156 }
1157 }
1158
1159 /* Dump the chains of functions we've made. */
1160 for (index = 0; index < numarcs; index++)
1161 {
1162 Sym *sym;
1163 if (arcs[index]->parent->has_been_placed
1164 || arcs[index]->child->has_been_placed)
1165 continue;
1166
1167 sym = arcs[index]->parent;
1168
1169 /* If this symbol isn't attached to any other
1170 symbols, then we've got a rarely used arc.
1171
1172 Skip it for now, we'll deal with them later. */
1173 if (sym->next == NULL
1174 && sym->prev == NULL)
1175 continue;
1176
1177 /* Get to the start of this chain. */
1178 while (sym->prev)
1179 sym = sym->prev;
1180
1181 while (sym)
1182 {
1183 /* Mark it as placed. */
1184 sym->has_been_placed = 1;
1185 printf ("%s\n", sym->name);
1186 sym = sym->next;
1187 }
1188 }
1189
1190 /* If we want to place all the arcs, then output those which weren't
1191 placed by the main algorithm. */
1192 if (all)
1193 for (index = 0; index < numarcs; index++)
1194 {
1195 Sym *sym;
1196 if (arcs[index]->parent->has_been_placed
1197 || arcs[index]->child->has_been_placed)
1198 continue;
1199
1200 sym = arcs[index]->parent;
1201
1202 sym->has_been_placed = 1;
1203 printf ("%s\n", sym->name);
1204 }
1205}
1206
1207/* Print a suggested .o ordering for files on a link line based
1208 on profiling information. This uses the function placement
1209 code for the bulk of its work. */
1210
1211struct function_map {
1212 char *function_name;
1213 char *file_name;
1214};
1215
1216void
1217DEFUN_VOID (cg_print_file_ordering)
1218{
1219 unsigned long scratch_arc_count, index;
1220 Arc **scratch_arcs;
1221 extern struct function_map *symbol_map;
1222 extern int symbol_map_count;
1223 char *last;
1224
1225 scratch_arc_count = 0;
1226
1227 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1228 for (index = 0; index < numarcs; index++)
1229 {
1230 if (! arcs[index]->parent->mapped
1231 || ! arcs[index]->child->mapped)
1232 arcs[index]->has_been_placed = 1;
1233 }
1234
1235 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1236 scratch_arcs, &scratch_arc_count);
1237
1238 /* Output .o's not handled by the main placement algorithm. */
1239 for (index = 0; index < symtab.len; index++)
1240 {
1241 if (symtab.base[index].mapped
1242 && ! symtab.base[index].has_been_placed)
1243 printf ("%s\n", symtab.base[index].name);
1244 }
1245
1246 /* Now output any .o's that didn't have any text symbols. */
1247 last = NULL;
1248 for (index = 0; index < symbol_map_count; index++)
1249 {
1250 int index2;
1251
1252 /* Don't bother searching if this symbol is the
1253 same as the previous one. */
1254 if (last && !strcmp (last, symbol_map[index].file_name))
1255 continue;
1256
1257 for (index2 = 0; index2 < symtab.len; index2++)
1258 {
1259 if (! symtab.base[index2].mapped)
1260 continue;
1261
1262 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1263 break;
1264 }
1265
1266 /* If we didn't find it in the symbol table, then it must be a .o
1267 with no text symbols. Output it last. */
1268 if (index2 == symtab.len)
1269 printf ("%s\n", symbol_map[index].file_name);
1270 last = symbol_map[index].file_name;
1271 }
1272}
This page took 0.101156 seconds and 4 git commands to generate.