ctf: index: accommodate lttng-crash timestamp quirk
authorFrancis Deslauriers <francis.deslauriers@efficios.com>
Fri, 14 Jun 2019 19:56:50 +0000 (15:56 -0400)
committerPhilippe Proulx <eeppeliteloop@gmail.com>
Fri, 16 Aug 2019 15:35:50 +0000 (11:35 -0400)
This commit is to accommodate the indexing of non-compliant CTF traces
produced by LTTng tracers and extracted by the lttng-crash utility.

What makes those traces non-compliant ?
=======================================
The lttng-crash is used to extract a trace from the tracing buffers from
non-volatile memory following a system crash. Because a system crash can
happen at any time, it's possible that some tracing packets might not
have been closed properly. It's possible to observe this by looking at
the `timestamp_end` field of the packets. They might be left to their
initial value of 0 when their `timestamp_begin` are often larger than 0.

This has the higher level effect that affected packets have a
`timestamp_end` smaller than their `timestamp_begin`, as if their end
time was before the beginning time.

How this commit fixes the index for the affected traces
=====================================================
For each index entry, if the entry's `timestamp_end` is 0 and the
`timestamp_begin` is not 0:
 - If it's the stream file's last packet: set the packet index entry's end
   time to the packet's last event's time, if any, or to the packet's
   beginning time otherwise.
 - If it's not the stream file's last packet: set the packet index
   entry's end time to the next packet's beginning time.

Affected versions
=================
All current and future lttng-ust and lttng-modules versions are affect
because this really cannot be fixed by the tracers because lttng-crash
only tries to make the most of a critical situation (i.e. a system
crash).

Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Change-Id: Ib6ff9683b676983d8691f514f4d6c3e5c8c7e071
Reviewed-on: https://review.lttng.org/c/babeltrace/+/1436
Tested-by: jenkins <jenkins@lttng.org>
Reviewed-by: Philippe Proulx <eeppeliteloop@gmail.com>
src/plugins/ctf/fs-src/fs.c

index 901bda0a8354d3e0753c56377150e519822fbdec..6c362a32faf398801fea801544721065ebfae7c9 100644 (file)
@@ -1949,6 +1949,96 @@ end:
        return ret;
 }
 
+/*
+ * When using the lttng-crash feature it's likely that the last packets of each
+ * stream have their timestamp_end set to zero. This is caused by the fact that
+ * the tracer crashed and was not able to properly close the packets.
+ *
+ * To fix up this erroneous data we do the following:
+ * For each index entry, if the entry's timestamp_end is 0 and the
+ * timestamp_begin is not 0:
+ *  - If it's the stream file's last packet: set the packet index entry's end
+ *    time to the packet's last event's time, if any, or to the packet's
+ *    beginning time otherwise.
+ *  - If it's not the stream file's last packet: set the packet index
+ *    entry's end time to the next packet's beginning time.
+ *
+ * Affected versions:
+ * - All current and future lttng-ust and lttng-modules versions.
+ */
+static
+int fix_index_lttng_crash_quirk(struct ctf_fs_trace *trace)
+{
+       int ret = 0;
+       guint ds_file_group_idx;
+       GPtrArray *ds_file_groups = trace->ds_file_groups;
+       bt_logging_level log_level = trace->log_level;
+
+       for (ds_file_group_idx = 0; ds_file_group_idx < ds_file_groups->len;
+                       ds_file_group_idx++) {
+               guint entry_idx;
+               struct ctf_clock_class *default_cc;
+               struct ctf_fs_ds_index_entry *last_entry;
+               struct ctf_fs_ds_index *index;
+
+               struct ctf_fs_ds_file_group *ds_file_group =
+                       g_ptr_array_index(ds_file_groups, ds_file_group_idx);
+
+               BT_ASSERT(ds_file_group);
+               index = ds_file_group->index;
+
+               BT_ASSERT(ds_file_group->sc->default_clock_class);
+               default_cc = ds_file_group->sc->default_clock_class;
+
+               BT_ASSERT(index);
+               BT_ASSERT(index->entries);
+               BT_ASSERT(index->entries->len > 0);
+
+               last_entry = g_ptr_array_index(index->entries,
+                       index->entries->len - 1);
+               BT_ASSERT(last_entry);
+
+
+               /* 1. Fix the last entry first. */
+               if (last_entry->timestamp_end == 0 &&
+                               last_entry->timestamp_begin != 0) {
+                       /*
+                        * Decode packet to read the timestamp of the
+                        * last event of the stream file.
+                        */
+                       ret = decode_packet_last_event_timestamp(trace,
+                               default_cc, last_entry,
+                               &last_entry->timestamp_end,
+                               &last_entry->timestamp_end_ns);
+                       if (ret) {
+                               BT_LOGE_STR("Failed to decode last event's clock snapshot");
+                               goto end;
+                       }
+               }
+
+               /* Iterate over all entries but the last one. */
+               for (entry_idx = 0; entry_idx < index->entries->len - 1;
+                               entry_idx++) {
+                       struct ctf_fs_ds_index_entry *curr_entry, *next_entry;
+                       curr_entry = g_ptr_array_index(index->entries, entry_idx);
+                       next_entry = g_ptr_array_index(index->entries, entry_idx + 1);
+
+                       if (curr_entry->timestamp_end == 0 &&
+                                       curr_entry->timestamp_begin != 0) {
+                               /*
+                                * 2. Set the current index entry `end` timestamp to
+                                * the next index entry `begin` timestamp.
+                                */
+                               curr_entry->timestamp_end = next_entry->timestamp_begin;
+                               curr_entry->timestamp_end_ns = next_entry->timestamp_begin_ns;
+                       }
+               }
+       }
+
+end:
+       return ret;
+}
+
 /*
  * Extract the tracer information necessary to compare versions.
  * Returns 0 on success, and -1 if the extraction is not successful because the
@@ -2086,6 +2176,22 @@ bool is_tracer_affected_by_barectf_event_before_packet_bug(
        return is_affected;
 }
 
+static
+bool is_tracer_affected_by_lttng_crash_quirk(
+               struct tracer_info *curr_tracer_info)
+{
+       bool is_affected = false;
+
+       /* All LTTng tracer may be affected by this lttng crash quirk. */
+       if (strcmp(curr_tracer_info->name, "lttng-ust") == 0) {
+               is_affected = true;
+       } else if (strcmp(curr_tracer_info->name, "lttng-modules") == 0) {
+               is_affected = true;
+       }
+
+       return is_affected;
+}
+
 /*
  * Looks for trace produced by known buggy tracers and fix up the index
  * produced earlier.
@@ -2141,6 +2247,15 @@ int fix_packet_index_tracer_bugs(struct ctf_fs_component *ctf_fs)
                                goto end;
                        }
                }
+
+               if (is_tracer_affected_by_lttng_crash_quirk(
+                               &current_tracer_info)) {
+                       ret = fix_index_lttng_crash_quirk(trace);
+                       if (ret) {
+                               BT_LOGE_STR("Failed to fix lttng-crash timestamp quirks.");
+                               goto end;
+                       }
+               }
        }
 end:
        return ret;
This page took 0.027446 seconds and 4 git commands to generate.