datastore: Add API for single queries
authorGeneviève Bastien <gbastien+lttng@versatic.net>
Wed, 15 Feb 2017 17:23:20 +0000 (12:23 -0500)
committerGenevieve Bastien <gbastien+lttng@versatic.net>
Thu, 2 Mar 2017 20:02:12 +0000 (15:02 -0500)
When what we want is only one interval, there is no need to get them
all, we can just fast return when the one interval is found.

This change alones reduces the performance impact of the datastore by
~2x for single queries in the case of smallish traces.

This patch also adds a binary search to find the first interval to
look at.

Change-Id: I77d0ac9818f272f7d160f92666445ce2d8b3200a
Signed-off-by: Geneviève Bastien <gbastien+lttng@versatic.net>
Reviewed-on: https://git.eclipse.org/r/91753
Reviewed-by: Hudson CI
Reviewed-by: Loic Prieur-Drevon <loic.prieur.drevon@ericsson.com>
statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/AbstractHistoryTree.java
statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/HTNode.java
statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHTNode.java
statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHistoryTree.java

index 19e1228798f3acfa4ae3998255d40961f49c9f9e..598aa0ed7cdab3902790268cb0b7f79b7a93ebcb 100644 (file)
@@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Predicate;
 
 import org.eclipse.jdt.annotation.NonNull;
+import org.eclipse.jdt.annotation.Nullable;
 import org.eclipse.tracecompass.common.core.NonNullUtils;
 import org.eclipse.tracecompass.internal.datastore.core.historytree.HtIo;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition;
@@ -908,6 +909,36 @@ public abstract class AbstractHistoryTree<E extends IHTInterval, N extends HTNod
         return Iterables.concat(intervalsOfNodes);
     }
 
+    @Override
+    public @Nullable E getMatchingInterval(RangeCondition<Long> timeCondition,
+            Predicate<E> extraPredicate) {
+
+        /* Queue a stack of nodes containing nodes intersecting t */
+        Deque<Integer> queue = new LinkedList<>();
+        /* We start by reading the information in the root node */
+        queue.add(getRootNode().getSequenceNumber());
+
+        /* Then we follow the down in the relevant children until we find the interval */
+        try {
+            while (!queue.isEmpty()) {
+                int sequenceNumber = queue.pop();
+                HTNode<E> currentNode = readNode(sequenceNumber);
+
+                @Nullable E interval = currentNode.getMatchingInterval(timeCondition, extraPredicate);
+                if (interval != null) {
+                    return interval;
+                }
+
+                if (currentNode.getNodeType() == HTNode.NodeType.CORE) {
+                    /* Here we add the relevant children nodes for BFS */
+                    queue.addAll(currentNode.selectNextChildren(timeCondition));
+                }
+            }
+        } catch (ClosedChannelException e) {
+        }
+        return null;
+    }
+
     @Override
     public String toString() {
         return "Information on the current tree:\n\n" + "Blocksize: " //$NON-NLS-1$ //$NON-NLS-2$
index d26a33c139bf6fa5fb02b72bb75dae7dda2aa654..2d6054f9d2cb9710fff58d0117c071fe6e834241 100644 (file)
@@ -37,7 +37,6 @@ import org.eclipse.tracecompass.internal.provisional.datastore.core.serializatio
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
 
 /**
  * The base class for all the types of nodes that go in the History Tree.
@@ -579,6 +578,9 @@ public class HTNode<E extends IHTInterval> implements IHTNode<E> {
      * Sub-classes may override this to change or specify the interval
      * comparator.
      *
+     * NOTE: sub-classes who override this may also need to override the
+     * {@link #getStartIndexFor(RangeCondition, Predicate)}.
+     *
      * @return The way intervals are to be sorted in this node
      */
     protected Comparator<E> getIntervalComparator() {
@@ -681,32 +683,114 @@ public class HTNode<E extends IHTInterval> implements IHTNode<E> {
     public Iterable<E> getMatchingIntervals(RangeCondition<Long> timeCondition,
             Predicate<E> extraPredicate) {
 
-        // TODO Use getIntervalComparator() to restrict the dataset further
         // TODO Benchmark using/returning streams instead of iterables
 
         if (isOnDisk()) {
-            @NonNull Iterable<E> ret = fIntervals;
-            ret = Iterables.filter(ret, interval -> timeCondition.intersects(interval.getStart(), interval.getEnd()));
-            ret = Iterables.filter(ret, interval -> extraPredicate.test(interval));
-            return ret;
+            return doGetMatchingIntervals(timeCondition, extraPredicate);
         }
 
         takeReadLock();
         try {
-        return fIntervals.stream()
-                .filter(interval -> timeCondition.intersects(interval.getStart(), interval.getEnd()))
-                .filter(extraPredicate)
-                    /*
-                     * Because this class works with read locks, we can't
-                     * return a lazy stream unfortunately. Room for improvement?
-                     */
-                .collect(Collectors.toList());
+            return doGetMatchingIntervals(timeCondition, extraPredicate);
 
         } finally {
             releaseReadLock();
         }
     }
 
+    @Override
+    public @Nullable E getMatchingInterval(RangeCondition<Long> timeCondition, Predicate<E> extraPredicate) {
+        if (isOnDisk()) {
+            return doGetMatchingInterval(timeCondition, extraPredicate);
+        }
+
+        takeReadLock();
+        try {
+            return doGetMatchingInterval(timeCondition, extraPredicate);
+
+        } finally {
+            releaseReadLock();
+        }
+    }
+
+    private Iterable<E> doGetMatchingIntervals(RangeCondition<Long> timeCondition,
+            Predicate<E> extraPredicate) {
+        List<E> list = new ArrayList<>();
+        for (int i = getStartIndexFor(timeCondition, extraPredicate); i < fIntervals.size(); i++) {
+            E curInterval = fIntervals.get(i);
+            if (timeCondition.intersects(curInterval.getStart(), curInterval.getEnd()) &&
+                    extraPredicate.test(curInterval)) {
+                list.add(curInterval);
+            }
+        }
+        return list;
+    }
+
+    private @Nullable E doGetMatchingInterval(RangeCondition<Long> timeCondition,
+            Predicate<E> extraPredicate) {
+        for (int i = getStartIndexFor(timeCondition, extraPredicate); i < fIntervals.size(); i++) {
+            E curInterval = fIntervals.get(i);
+            if (timeCondition.intersects(curInterval.getStart(), curInterval.getEnd()) &&
+                    extraPredicate.test(curInterval)) {
+                return curInterval;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Get the start index. It will skip all the intervals that end before the
+     * beginning of the time range.
+     *
+     * NOTE: This method goes with the comparator. If a tree overrides the
+     * default comparator (that sorts first by end time), then it will need to
+     * override this method. Here, the binary search is re-implemented because
+     * the interval type is generic but a tree for a concrete type could use a
+     * binary search instead.
+     *
+     * @param timeCondition
+     *            The time-based RangeCondition
+     * @param extraPredicate
+     *            Extra predicate to run on the elements.
+     * @return The index of the first interval greater than or equal to the
+     *         conditions in parameter
+     */
+    protected int getStartIndexFor(RangeCondition<Long> timeCondition, Predicate<E> extraPredicate) {
+        if (fIntervals.isEmpty()) {
+            return 0;
+        }
+        /*
+         * Implement our own binary search since the intervals are generic, we
+         * cannot make a dummy interval and find its position
+         */
+        int low = 0;
+        int high = fIntervals.size() - 1;
+        long target = timeCondition.min();
+
+        while (low <= high) {
+            // Divide the sum by 2
+            int mid = (low + high) >>> 1;
+            E midVal = fIntervals.get(mid);
+            long end = midVal.getEnd();
+
+            if (end < target) {
+                low = mid + 1;
+            } else if (end > target) {
+                high = mid - 1;
+            } else {
+                // key found, rewind to see where it starts
+                while (end == target && mid > 0) {
+                    mid = mid - 1;
+                    midVal = fIntervals.get(mid);
+                    end = midVal.getEnd();
+                }
+                // if end == target, then mid is 0
+                return (end == target ? mid : mid + 1); // key found
+            }
+        }
+        return low; // key not found
+    }
+
     @Override
     public void closeThisNode(long endtime) {
         fRwl.writeLock().lock();
@@ -985,4 +1069,5 @@ public class HTNode<E extends IHTInterval> implements IHTNode<E> {
     public void debugPrintIntervals(PrintStream writer) {
         getIntervals().forEach(writer::println);
     }
+
 }
index 2106382ebab2c17964987a5e8a031ef0719e8c52..c15a7f120db163f9de3c282f6669f632bf2b09e4 100644 (file)
@@ -15,6 +15,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.function.Predicate;
 
+import org.eclipse.jdt.annotation.Nullable;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.exceptions.RangeException;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.interval.IHTInterval;
@@ -168,6 +169,20 @@ public interface IHTNode<E extends IHTInterval> {
     Iterable<E> getMatchingIntervals(RangeCondition<Long> timeCondition,
             Predicate<E> extraPredicate);
 
+    /**
+     * Retrieve an interval inside this node that matches the given conditions.
+     *
+     * @param timeCondition
+     *            The time-based RangeCondition
+     * @param extraPredicate
+     *            Extra predicate to run on the elements. Only intervals also
+     *            matching this predicate will be returned.
+     * @return An interval matching the conditions or <code>null</code> if no
+     *         interval was found
+     */
+    @Nullable E getMatchingInterval(RangeCondition<Long> timeCondition,
+            Predicate<E> extraPredicate);
+
     /**
      * Return the total header size of this node (will depend on the node type).
      *
index 76614371d33e890b272e24d81dc0ffdb08ca4403..3e2d8e6d69900dade81b663ce9ee627e61f9a3e7 100644 (file)
@@ -15,6 +15,7 @@ import java.io.IOException;
 import java.nio.channels.ClosedChannelException;
 import java.util.function.Predicate;
 
+import org.eclipse.jdt.annotation.Nullable;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.exceptions.RangeException;
 import org.eclipse.tracecompass.internal.provisional.datastore.core.interval.IHTInterval;
@@ -150,6 +151,24 @@ public interface IHistoryTree<E extends IHTInterval> {
     Iterable<E> getMatchingIntervals(RangeCondition<Long> timeCondition,
             Predicate<E> extraPredicate);
 
+    /**
+     * Query the tree to retrieve an interval matching the given conditions.
+     *
+     * @param timeCondition
+     *            Time-based RangeCondition, can represent a single timestamp, a
+     *            series of punctual timestamps, or a time range.
+     * @param extraPredicate
+     *            Extra check to run on the elements to determine if they should
+     *            be returned or not. This will be checked at the node level, so
+     *            if it's known in advance it might be advantageous to pass it
+     *            here rather than checking it yourself on the returned
+     *            Iterable.
+     * @return An interval matching the given conditions, or <code>null</code>
+     *         if no interval was found.
+     */
+    @Nullable E getMatchingInterval(RangeCondition<Long> timeCondition,
+            Predicate<E> extraPredicate);
+
     // ------------------------------------------------------------------------
     // Attribute-tree reading/writing operations
     //
This page took 0.032185 seconds and 5 git commands to generate.