gas: xtensa: implement trampoline coalescing
authorMax Filippov <jcmvbkbc@gmail.com>
Tue, 14 Nov 2017 23:16:08 +0000 (15:16 -0800)
committerMax Filippov <jcmvbkbc@gmail.com>
Mon, 27 Nov 2017 23:15:46 +0000 (15:15 -0800)
There is a recurring pattern in assembly files generated by a compiler
where a lot of jumps in a function are going to the same place. When
these jumps are relaxed with trampolines the assembler generates a
separate jump thread from each source.
Create an index of trampoline jump targets for each segment and see if a
jump being relaxed goes to a location from that index, in which case
replace its target with a location of existing trampoline jump that
results in the shortest path to the original target.

gas/
2017-11-27  Max Filippov  <jcmvbkbc@gmail.com>

* config/tc-xtensa.c (trampoline_chain_entry, trampoline_chain)
(trampoline_chain_index): New structures.
(trampoline_index): Add chain_index field.
(xg_order_trampoline_chain_entry, xg_sort_trampoline_chain)
(xg_find_chain_entry, xg_get_best_chain_entry)
(xg_order_trampoline_chain, xg_get_trampoline_chain)
(xg_find_best_eq_target, xg_add_location_to_chain)
(xg_create_trampoline_chain, xg_get_single_symbol_slot): New
functions.
(xg_relax_fixups): Call xg_find_best_eq_target to adjust jump
target to point to an existing jump. Call
xg_create_trampoline_chain to create new jump target. Call
xg_add_location_to_chain to add newly created trampoline jump
to the corresponding chain.
(add_jump_to_trampoline): Extract loop searching for a single
slot with a symbol into a separate function, replace that code
with a call to that function.
(relax_frag_immed): Call xg_find_best_eq_target to adjust jump
target to point to an existing jump.
* testsuite/gas/xtensa/all.exp: Add trampoline-2 test.
* testsuite/gas/xtensa/trampoline.d: Adjust absolute addresses
as many duplicate trampoline chains are now coalesced.
* testsuite/gas/xtensa/trampoline.s: Add _nop so that objdump
stays in sync with instruction stream.
* testsuite/gas/xtensa/trampoline-2.l: New test result file.
* testsuite/gas/xtensa/trampoline-2.s: New test source file.

gas/ChangeLog
gas/config/tc-xtensa.c
gas/testsuite/gas/xtensa/all.exp
gas/testsuite/gas/xtensa/trampoline-2.l [new file with mode: 0644]
gas/testsuite/gas/xtensa/trampoline-2.s [new file with mode: 0644]
gas/testsuite/gas/xtensa/trampoline.d
gas/testsuite/gas/xtensa/trampoline.s

index c1299417bb849b7520a57bea32c7aaa2cd45ab26..92d0c36255051cb925c8760ec6d6137f238c6328 100644 (file)
@@ -1,3 +1,32 @@
+2017-11-27  Max Filippov  <jcmvbkbc@gmail.com>
+
+       * config/tc-xtensa.c (trampoline_chain_entry, trampoline_chain)
+       (trampoline_chain_index): New structures.
+       (trampoline_index): Add chain_index field.
+       (xg_order_trampoline_chain_entry, xg_sort_trampoline_chain)
+       (xg_find_chain_entry, xg_get_best_chain_entry)
+       (xg_order_trampoline_chain, xg_get_trampoline_chain)
+       (xg_find_best_eq_target, xg_add_location_to_chain)
+       (xg_create_trampoline_chain, xg_get_single_symbol_slot): New
+       functions.
+       (xg_relax_fixups): Call xg_find_best_eq_target to adjust jump
+       target to point to an existing jump. Call
+       xg_create_trampoline_chain to create new jump target. Call
+       xg_add_location_to_chain to add newly created trampoline jump
+       to the corresponding chain.
+       (add_jump_to_trampoline): Extract loop searching for a single
+       slot with a symbol into a separate function, replace that code
+       with a call to that function.
+       (relax_frag_immed): Call xg_find_best_eq_target to adjust jump
+       target to point to an existing jump.
+       * testsuite/gas/xtensa/all.exp: Add trampoline-2 test.
+       * testsuite/gas/xtensa/trampoline.d: Adjust absolute addresses
+       as many duplicate trampoline chains are now coalesced.
+       * testsuite/gas/xtensa/trampoline.s: Add _nop so that objdump
+       stays in sync with instruction stream.
+       * testsuite/gas/xtensa/trampoline-2.l: New test result file.
+       * testsuite/gas/xtensa/trampoline-2.s: New test source file.
+
 2017-11-27  Max Filippov  <jcmvbkbc@gmail.com>
 
        * config/tc-xtensa.c (search_trampolines, get_best_trampoline):
index b507d4425a368049089f50b1924df4498a6907d0..9defd739b3b7b908cb94fc26f648483f59346969 100644 (file)
@@ -7348,6 +7348,33 @@ xtensa_end (void)
   xtensa_check_frag_count ();
 }
 
+struct trampoline_chain_entry
+{
+  symbolS *sym;
+  addressT offset;
+};
+
+/* Trampoline chain for a given (sym, offset) pair is a sorted array
+   of locations of trampoline jumps leading there.  Jumps are represented
+   as pairs (sym, offset): trampoline frag symbol and offset of the jump
+   inside the frag.  */
+struct trampoline_chain
+{
+  struct trampoline_chain_entry target;
+  struct trampoline_chain_entry *entry;
+  size_t n_entries;
+  size_t n_max;
+  bfd_boolean needs_sorting;
+};
+
+struct trampoline_chain_index
+{
+  struct trampoline_chain *entry;
+  size_t n_entries;
+  size_t n_max;
+  bfd_boolean needs_sorting;
+};
+
 struct trampoline_index
 {
   fragS **entry;
@@ -7359,7 +7386,10 @@ struct trampoline_seg
 {
   struct trampoline_seg *next;
   asection *seg;
+  /* Trampolines ordered by their frag fr_address */
   struct trampoline_index index;
+  /* Known trampoline chains ordered by (sym, offset) pair */
+  struct trampoline_chain_index chain_index;
 };
 
 static struct trampoline_seg trampoline_seg_list;
@@ -7520,6 +7550,187 @@ static bfd_boolean xg_is_trampoline_frag_full (const fragS *fragP)
   return fragP->fr_var < 3;
 }
 
+static int xg_order_trampoline_chain_entry (const void *a, const void *b)
+{
+  const struct trampoline_chain_entry *pa = a;
+  const struct trampoline_chain_entry *pb = b;
+
+  if (pa->sym == pb->sym ||
+      S_GET_VALUE (pa->sym) == S_GET_VALUE (pb->sym))
+    if (pa->offset == pb->offset)
+      return 0;
+    else
+      return pa->offset < pb->offset ? -1 : 1;
+  else
+    return S_GET_VALUE (pa->sym) < S_GET_VALUE (pb->sym) ? -1 : 1;
+}
+
+static void xg_sort_trampoline_chain (struct trampoline_chain *tc)
+{
+  qsort (tc->entry, tc->n_entries, sizeof (*tc->entry),
+        xg_order_trampoline_chain_entry);
+  tc->needs_sorting = FALSE;
+}
+
+/* Find entry index in the given chain with maximal address <= source.  */
+static size_t xg_find_chain_entry (struct trampoline_chain *tc,
+                                  addressT source)
+{
+  size_t a = 0;
+  size_t b = tc->n_entries;
+
+  if (tc->needs_sorting)
+    xg_sort_trampoline_chain (tc);
+
+  while (b - a > 1)
+    {
+      size_t c = (a + b) / 2;
+      struct trampoline_chain_entry *e = tc->entry + c;
+
+      if (S_GET_VALUE(e->sym) + e->offset <= source)
+       a = c;
+      else
+       b = c;
+    }
+  return a;
+}
+
+/* Find the best jump target for the source in the given trampoline chain.
+   The best jump target is the one that results in the shortest path to the
+   final target, it's the location of the jump closest to the final target,
+   but within the J_RANGE - J_MARGIN from the source.  */
+static struct trampoline_chain_entry *
+xg_get_best_chain_entry (struct trampoline_chain *tc, addressT source)
+{
+  addressT target = S_GET_VALUE(tc->target.sym) + tc->target.offset;
+  size_t i = xg_find_chain_entry (tc, source);
+  struct trampoline_chain_entry *e = tc->entry + i;
+  int step = target < source ? -1 : 1;
+  addressT chained_target;
+  offsetT off;
+
+  if (target > source &&
+      S_GET_VALUE(e->sym) + e->offset <= source &&
+      i + 1 < tc->n_entries)
+    ++i;
+
+  while (i + step < tc->n_entries)
+    {
+      struct trampoline_chain_entry *next = tc->entry + i + step;
+
+      chained_target = S_GET_VALUE(next->sym) + next->offset;
+      off = source - chained_target;
+
+      if (labs (off) >= J_RANGE - J_MARGIN)
+       break;
+
+      i += step;
+    }
+
+  e = tc->entry + i;
+  chained_target = S_GET_VALUE(e->sym) + e->offset;
+  off = source - chained_target;
+
+  if (labs (off) < J_MARGIN ||
+      labs (off) >= J_RANGE - J_MARGIN)
+    return &tc->target;
+  return tc->entry + i;
+}
+
+static int xg_order_trampoline_chain (const void *a, const void *b)
+{
+  const struct trampoline_chain *pa = a;
+  const struct trampoline_chain *pb = b;
+
+  return xg_order_trampoline_chain_entry (&pa->target, &pb->target);
+}
+
+static struct trampoline_chain *
+xg_get_trampoline_chain (struct trampoline_seg *ts,
+                        symbolS *sym,
+                        addressT offset)
+{
+  struct trampoline_chain_index *idx = &ts->chain_index;
+  struct trampoline_chain c;
+
+  if (idx->needs_sorting)
+    {
+      qsort (idx->entry, idx->n_entries, sizeof (*idx->entry),
+            xg_order_trampoline_chain);
+      idx->needs_sorting = FALSE;
+    }
+  c.target.sym = sym;
+  c.target.offset = offset;
+  return bsearch (&c, idx->entry, idx->n_entries,
+                 sizeof (struct trampoline_chain),
+                 xg_order_trampoline_chain);
+}
+
+/* Find trampoline chain in the given trampoline segment that is going
+   to the *sym + *offset.  If found, replace *sym and *offset with the
+   best jump target in that chain.  */
+static struct trampoline_chain *
+xg_find_best_eq_target (struct trampoline_seg *ts,
+                       addressT source, symbolS **sym,
+                       addressT *offset)
+{
+  struct trampoline_chain *tc = xg_get_trampoline_chain (ts, *sym, *offset);
+
+  if (tc)
+    {
+      struct trampoline_chain_entry *e = xg_get_best_chain_entry (tc, source);
+
+      *sym = e->sym;
+      *offset = e->offset;
+    }
+  return tc;
+}
+
+static void xg_add_location_to_chain (struct trampoline_chain *tc,
+                                     symbolS *sym, addressT offset)
+{
+  struct trampoline_chain_entry *e;
+
+  if (tc->n_entries == tc->n_max)
+    {
+      tc->n_max = (tc->n_max + 1) * 2;
+      tc->entry = xrealloc (tc->entry, sizeof (*tc->entry) * tc->n_max);
+    }
+  e = tc->entry + tc->n_entries;
+  e->sym = sym;
+  e->offset = offset;
+  ++tc->n_entries;
+  tc->needs_sorting = TRUE;
+}
+
+static struct trampoline_chain *
+xg_create_trampoline_chain (struct trampoline_seg *ts,
+                           symbolS *sym, addressT offset)
+{
+  struct trampoline_chain_index *idx = &ts->chain_index;
+  struct trampoline_chain *tc;
+
+  if (idx->n_entries == idx->n_max)
+    {
+      idx->n_max = (idx->n_max + 1) * 2;
+      idx->entry = xrealloc (idx->entry,
+                            sizeof (*idx->entry) * idx->n_max);
+    }
+
+  tc = idx->entry + idx->n_entries;
+  tc->target.sym = sym;
+  tc->target.offset = offset;
+  tc->entry = NULL;
+  tc->n_entries = 0;
+  tc->n_max = 0;
+  xg_add_location_to_chain (tc, sym, offset);
+
+  ++idx->n_entries;
+  idx->needs_sorting = TRUE;
+
+  return tc;
+}
+
 void dump_trampolines (void);
 
 void
@@ -9200,9 +9411,24 @@ static void xg_relax_fixups (struct trampoline_seg *ts)
   for (fx = seginfo->fix_root; fx; fx = fx->fx_next)
     {
       fixS *fixP = fx;
+      struct trampoline_chain *tc = NULL;
+
+      if (xg_is_relaxable_fixup (fixP))
+       {
+         tc = xg_find_best_eq_target (ts, fixP->fx_frag->fr_address,
+                                      &fixP->fx_addsy, &fixP->fx_offset);
+         if (!tc)
+           tc = xg_create_trampoline_chain (ts, fixP->fx_addsy,
+                                            fixP->fx_offset);
+         gas_assert (tc);
+       }
 
       while (xg_is_relaxable_fixup (fixP))
-       fixP = xg_relax_fixup (idx, fixP);
+       {
+         fixP = xg_relax_fixup (idx, fixP);
+         xg_add_location_to_chain (tc, fixP->fx_frag->fr_symbol,
+                                   fixP->fx_where);
+       }
     }
 }
 
@@ -9889,14 +10115,14 @@ init_trampoline_frag (fragS *fp)
   return growth;
 }
 
-
 static int
-add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
+xg_get_single_symbol_slot (fragS *fragP)
 {
-  int i, slot = -1;
+  int i;
+  int slot = -1;
 
   for (i = 0; i < MAX_SLOTS; ++i)
-    if (origfrag->tc_frag_data.slot_symbols[i])
+    if (fragP->tc_frag_data.slot_symbols[i])
       {
        gas_assert (slot == -1);
        slot = i;
@@ -9904,10 +10130,19 @@ add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
 
   gas_assert (slot >= 0 && slot < MAX_SLOTS);
 
+  return slot;
+}
+
+static fixS *
+add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
+{
+  int slot = xg_get_single_symbol_slot (origfrag);
+  fixS *fixP;
+
   /* Assemble a jump to the target label in the trampoline frag.  */
-  xg_append_jump (tramp,
-                 origfrag->tc_frag_data.slot_symbols[slot],
-                 origfrag->tc_frag_data.slot_offsets[slot]);
+  fixP = xg_append_jump (tramp,
+                        origfrag->tc_frag_data.slot_symbols[slot],
+                        origfrag->tc_frag_data.slot_offsets[slot]);
 
   /* Modify the original j to point here.  */
   origfrag->tc_frag_data.slot_symbols[slot] = tramp->fr_symbol;
@@ -9923,7 +10158,7 @@ add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
       xg_remove_trampoline_from_index (&ts->index, tr);
     }
 
-  return 3;
+  return fixP;
 }
 
 
@@ -10072,15 +10307,42 @@ relax_frag_immed (segT segP,
       istack.insn[istack.ninsn - 2].opcode == xtensa_j_opcode)
     {
       TInsn *jinsn = &istack.insn[istack.ninsn - 2];
+      struct trampoline_seg *ts = find_trampoline_seg (segP);
+      struct trampoline_chain *tc = NULL;
 
-      if (!xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset, total_text_diff))
+      if (ts &&
+         !xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset,
+                                  total_text_diff))
+       {
+         int s = xg_get_single_symbol_slot (fragP);
+         addressT offset = fragP->tc_frag_data.slot_offsets[s];
+
+         tc = xg_find_best_eq_target (ts, fragP->fr_address,
+                                      &fragP->tc_frag_data.slot_symbols[s],
+                                      &offset);
+
+         if (!tc)
+           tc = xg_create_trampoline_chain (ts,
+                                            fragP->tc_frag_data.slot_symbols[s],
+                                            offset);
+         fragP->tc_frag_data.slot_offsets[s] = offset;
+         tinsn_immed_from_frag (jinsn, fragP, s);
+       }
+
+      if (!xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset,
+                                  total_text_diff))
        {
          fragS *tf = xg_find_best_trampoline_for_tinsn (jinsn, fragP);
 
          if (tf)
            {
-             this_text_diff += init_trampoline_frag (tf);
-             this_text_diff += add_jump_to_trampoline (tf, fragP);
+             fixS *fixP;
+
+             this_text_diff += init_trampoline_frag (tf) + 3;
+             fixP = add_jump_to_trampoline (tf, fragP);
+             xg_add_location_to_chain (tc, fixP->fx_frag->fr_symbol,
+                                       fixP->fx_where);
+             fragP->tc_frag_data.relax_seen = FALSE;
            }
          else
            {
index 1ab382700a3f597c2559c3f65c25c35b05ccf7ee..9fdd2e0dc8245b5bc7a1f879e781b034462eb47d 100644 (file)
@@ -99,6 +99,7 @@ if [istarget xtensa*-*-*] then {
     run_dump_test "weak-call"
     run_dump_test "jlong"
     run_dump_test "trampoline"
+    run_list_test "trampoline-2"
     run_dump_test "first_frag_align"
     run_dump_test "auto-litpools"
     run_dump_test "auto-litpools-first1"
diff --git a/gas/testsuite/gas/xtensa/trampoline-2.l b/gas/testsuite/gas/xtensa/trampoline-2.l
new file mode 100644 (file)
index 0000000..36a0971
--- /dev/null
@@ -0,0 +1 @@
+# No warnings or errors expected!
diff --git a/gas/testsuite/gas/xtensa/trampoline-2.s b/gas/testsuite/gas/xtensa/trampoline-2.s
new file mode 100644 (file)
index 0000000..70ec496
--- /dev/null
@@ -0,0 +1,20 @@
+       .text
+
+       .rep 4000
+       bnez    a2, .L1
+       .endr
+
+       .rep 100000
+       _nop
+       .endr
+
+.L1:
+       j       .L1
+
+       .rep 100000
+       _nop
+       .endr
+
+       .rep 4000
+       bnez    a2, .L1
+       .endr
index f9aa5d97edb223fa341242ad1c9c034de47c06a9..287075b337f050cd70a417d3001e7b090213fa45 100644 (file)
@@ -5,29 +5,28 @@
 .*: +file format .*xtensa.*
 #...
 .*0:.*j.0x1194c
-.*3:.*j.0x1194f
-.*6:.*j.0x11952
-.*9:.*j.0x11955
+.*3:.*j.0x1194c
+.*6:.*j.0x1194f
+.*9:.*j.0x11952
 #...
-.*11949:.*j.0x11958
-.*1194c:.*j.0x24a0b
+.*11949:.*j.0x11955
+.*1194c:.*j.0x24a08
 .*1194f:.*j.0x24a0b
-.*11952:.*j.0x24a0e
-.*11955:.*j.0x2d687
+.*11952:.*j.0x2d684
 #...
+.*24a08:.*j.0x24a08
 .*24a0b:.*j.0x24a0b
-.*24a0e:.*j.0x24a0e
 #...
-.*2d684:.*ret
-.*2d687:.*j.0x49404
+.*2d681:.*ret
+.*2d684:.*j.0x49401
 #...
-.*49404:.*j.0x49404
-.*49407:.*beqz.n.a2,.0x4940c
-.*49409:.*j.0x61aa2
+.*49401:.*j.0x49401
+.*49404:.*beqz.n.a2,.0x49409
+.*49406:.*j.0x61a9f
 #...
-.*61aa2:.*j.0x7a13b
+.*61a9f:.*j.0x7a138
 #...
-.*7a13b:.*j.0x927f5
+.*7a138:.*j.0x927f8
 #...
-.*927f5:.*j.0x927f5
+.*927f8:.*j.0x927f8
 #...
index 997a7f9218762f43cff1429a9ccc77773603a9e1..15113435262b5409d64beba37955346e3bfbc755 100644 (file)
@@ -25,6 +25,7 @@
        _ret
        .endr
        _nop
+       _nop
 4:
        j       4b
 
This page took 0.046219 seconds and 4 git commands to generate.