ctf: de-duplicate index entries
authorSimon Marchi <simon.marchi@efficios.com>
Thu, 31 Oct 2019 22:28:54 +0000 (18:28 -0400)
committerSimon Marchi <simon.marchi@efficios.com>
Fri, 15 Nov 2019 18:08:11 +0000 (13:08 -0500)
When trace snapshots were taken quickly enough, it's possible for them
to overlap.  Some identical packets will be present in multiple
snapshots.

For example, the first snapshot could contain packets A, B and C while
the second snapshot could contain packets B, C, and D.

When reading those snapshots together with babeltrace, we will want to
present a coherent view of the logical trace that is spread across
multiple snapshots.  That is, we will want to avoid reading the packet
twice.

Currently, we're not considering that case when building the building
the in-memory index.  Processing the two streams above will lead to
those entries in the merged index:

 - Packet A (snapshot 1)
 - Packet B (snapshot 1)
 - Packet B (snapshot 2)
 - Packet C (snapshot 1)
 - Packet C (snapshot 2)
 - Packet D (snapshot 2)

This patch makes it so we only keep a single reference to each packet.
If a packet is duplicated, it doesn't matter to which snapshot the
reference points, since all copies of the packet are identical.  So a
possible outcome of the situation above, with this patch applied, could
be:

 - Packet A (snapshot 1)
 - Packet B (snapshot 1)
 - Packet C (snapshot 1)
 - Packet D (snapshot 2)

So far the index is not used for much, so I don't think that this patch
will have any visible behavior change.  However, an upcoming patch will
build on this to make it so we only read each packet once.

Change-Id: I00962d593b5078253043029902f853e5c3fa0dc5
Signed-off-by: Simon Marchi <simon.marchi@efficios.com>
src/plugins/ctf/fs-src/fs.c

index 79dd49e791934bf3441f33fe95ba215596d0ddd0..64fff672705f68ee4e2304c5739ab392a8721745 100644 (file)
@@ -700,24 +700,69 @@ void ds_file_group_insert_ds_file_info_sorted(
        array_insert(ds_file_group->ds_file_infos, ds_file_info, i);
 }
 
+static
+bool ds_index_entries_equal(
+       const struct ctf_fs_ds_index_entry *left,
+       const struct ctf_fs_ds_index_entry *right)
+{
+       if (left->packet_size != right->packet_size) {
+               return false;
+       }
+
+       if (left->timestamp_begin != right->timestamp_begin) {
+               return false;
+       }
+
+       if (left->timestamp_end != right->timestamp_end) {
+               return false;
+       }
+
+       if (left->packet_seq_num != right->packet_seq_num) {
+               return false;
+       }
+
+       return true;
+}
+
+/*
+ * Insert `entry` into `index`, without duplication.
+ *
+ * The entry is inserted only if there isn't an identical entry already.
+ *
+ * In any case, the ownership of `entry` is transferred to this function.  So if
+ * the entry is not inserted, it is freed.
+ */
+
 static
 void ds_index_insert_ds_index_entry_sorted(
        struct ctf_fs_ds_index *index,
        struct ctf_fs_ds_index_entry *entry)
 {
        guint i;
+       struct ctf_fs_ds_index_entry *other_entry;
 
        /* Find the spot where to insert this index entry. */
        for (i = 0; i < index->entries->len; i++) {
-               struct ctf_fs_ds_index_entry *other_entry =
-                       g_ptr_array_index(index->entries, i);
+               other_entry = g_ptr_array_index(index->entries, i);
 
-               if (entry->timestamp_begin_ns < other_entry->timestamp_begin_ns) {
+               if (entry->timestamp_begin_ns <= other_entry->timestamp_begin_ns) {
                        break;
                }
        }
 
-       array_insert(index->entries, entry, i);
+       /*
+        * Insert the entry only if a duplicate doesn't already exist.
+        *
+        * There can be duplicate packets if reading multiple overlapping
+        * snapshots of the same trace.  We then want the index to contain
+        * a reference to only one copy of that packet.
+        */
+       if (i == index->entries->len ||
+                       !ds_index_entries_equal(entry, other_entry)) {
+               array_insert(index->entries, entry, i);
+       } else {
+               g_free(entry);
+       }
 }
 
 static
@@ -731,10 +776,9 @@ void merge_ctf_fs_ds_indexes(struct ctf_fs_ds_index *dest, struct ctf_fs_ds_inde
 
                /*
                * Ownership of the ctf_fs_ds_index_entry is transferred to
-               * dest.
+               * ds_index_insert_ds_index_entry_sorted.
                */
                g_ptr_array_index(src->entries, i) = NULL;
-
                ds_index_insert_ds_index_entry_sorted(dest, entry);
        }
 }
This page took 0.027358 seconds and 4 git commands to generate.