timing.core: add merge function to segment store
authorMatthew Khouzam <matthew.khouzam@ericsson.com>
Thu, 25 Aug 2016 19:08:11 +0000 (15:08 -0400)
committerMatthew Khouzam <matthew.khouzam@ericsson.com>
Wed, 7 Sep 2016 20:18:21 +0000 (16:18 -0400)
This allows merging statistics node. This will make statistics
work in a more streamlined way.

Note: merging introduces a slight error to standard deviation,
This is due to the pooled variance algorithm used.

Potential use-cases for this:
* Map-reduce statistics on a segment store.
* Merging trees of statistics.

Change-Id: Ie6758bdcd5df03b58dc5521bf07fa5f9693c30bf
Signed-off-by: Matthew Khouzam <matthew.khouzam@ericsson.com>
Reviewed-on: https://git.eclipse.org/r/79763
Reviewed-by: Genevieve Bastien <gbastien+lttng@versatic.net>
Tested-by: Genevieve Bastien <gbastien+lttng@versatic.net>
Reviewed-by: Hudson CI
analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/OfflineStatisticsCalculator.java
analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/SegmentStoreStatisticsTest.java
analysis/org.eclipse.tracecompass.analysis.timing.core/src/org/eclipse/tracecompass/analysis/timing/core/segmentstore/statistics/SegmentStoreStatistics.java

index 398e0a597f33458962dada94525dd97b384694a8..432eb6e4d892da209cfc6d345c8703ce2dfd4a3c 100644 (file)
@@ -12,7 +12,6 @@ package org.eclipse.tracecompass.analysis.timing.core.tests.segmentstore.statist
 import java.util.Collection;
 
 import org.eclipse.jdt.annotation.NonNull;
-import org.eclipse.tracecompass.segmentstore.core.BasicSegment;
 import org.eclipse.tracecompass.segmentstore.core.ISegment;
 
 /**
@@ -23,7 +22,7 @@ import org.eclipse.tracecompass.segmentstore.core.ISegment;
  *
  */
 public class OfflineStatisticsCalculator {
-    private final Collection<@NonNull BasicSegment> fSs;
+    private final Collection<@NonNull ISegment> fSs;
 
     /**
      * Constructor
@@ -31,7 +30,7 @@ public class OfflineStatisticsCalculator {
      * @param ss
      *            segment store, fully build
      */
-    public OfflineStatisticsCalculator(Collection<@NonNull BasicSegment> ss) {
+    public OfflineStatisticsCalculator(Collection<@NonNull ISegment> ss) {
         fSs = ss;
     }
 
@@ -75,7 +74,7 @@ public class OfflineStatisticsCalculator {
     }
 
     /**
-     * Get the standard deviation for a window.
+     * Get the standard deviation.
      *
      * @return the standard deviation
      */
@@ -92,4 +91,25 @@ public class OfflineStatisticsCalculator {
         }
         return Math.sqrt(totalVariance);
     }
+
+    /**
+     * Get the total
+     *
+     * @return the total
+     */
+    public long getTotal() {
+        long total = 0;
+        for (ISegment interval : fSs) {
+            total += interval.getLength();
+        }
+        return total;
+    }
+
+    /**
+     * Get the # of intervals
+     * @return the # of intervals
+     */
+    public int count() {
+        return fSs.size();
+    }
 }
index 893d1232486cca3f3638eb9650e7e03ebf3525d7..3be02e39194671f4390d05a6dd765161f8d3f0a1 100644 (file)
@@ -39,16 +39,10 @@ public class SegmentStoreStatisticsTest {
 
     private static final double NO_ERROR = 0.0;
     private static final double ERROR = 0.000001;
+    private static final double APPROX_ERROR = 0.0001;
 
-    private static void testOnlineVsOffline(List<@NonNull BasicSegment> fixture) {
-        SegmentStoreStatistics sss = getSegStoreStat(fixture);
-        OfflineStatisticsCalculator osc = new OfflineStatisticsCalculator(fixture);
-        assertEquals("Average", osc.getAvg(), sss.getAverage(), ERROR);
-        assertEquals("Standard Deviation", osc.getStdDev(), sss.getStdDev(), ERROR);
-        assertEquals("Min", osc.getMin(), sss.getMin());
-        assertEquals("Max", osc.getMax(), sss.getMax());
-        assertEquals("Min Segment", osc.getMin(), sss.getMinSegment().getLength());
-        assertEquals("Max Segment", osc.getMax(), sss.getMaxSegment().getLength());
+    private static void testOnlineVsOffline(List<@NonNull ISegment> fixture) {
+        validate(new OfflineStatisticsCalculator(fixture), getSegStoreStat(fixture));
     }
 
     /**
@@ -56,7 +50,7 @@ public class SegmentStoreStatisticsTest {
      */
     @Test
     public void climbTest() {
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = 0; i < MEDIUM_AMOUNT_OF_SEGMENTS; i++) {
             fixture.add(createDummySegment(i, i * 2));
         }
@@ -70,7 +64,7 @@ public class SegmentStoreStatisticsTest {
         testOnlineVsOffline(fixture);
     }
 
-    private static SegmentStoreStatistics getSegStoreStat(List<@NonNull BasicSegment> fixture) {
+    private static SegmentStoreStatistics getSegStoreStat(List<@NonNull ISegment> fixture) {
         SegmentStoreStatistics sss = new SegmentStoreStatistics();
         for (ISegment seg : fixture) {
             sss.update(seg);
@@ -83,7 +77,7 @@ public class SegmentStoreStatisticsTest {
      */
     @Test
     public void decrementingTest() {
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = MEDIUM_AMOUNT_OF_SEGMENTS; i >= 0; i--) {
             fixture.add(createDummySegment(i, i * 2));
         }
@@ -102,7 +96,7 @@ public class SegmentStoreStatisticsTest {
      */
     @Test
     public void smallTest() {
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = 1; i >= 0; i--) {
             fixture.add(createDummySegment(i, i * 2));
         }
@@ -114,7 +108,7 @@ public class SegmentStoreStatisticsTest {
      */
     @Test
     public void largeTest() {
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) {
             fixture.add(createDummySegment(i, i * 2));
         }
@@ -128,7 +122,7 @@ public class SegmentStoreStatisticsTest {
     public void noiseTest() {
         Random rnd = new Random();
         rnd.setSeed(1234);
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) {
             int start = Math.abs(rnd.nextInt(100000000));
             int end = start + Math.abs(rnd.nextInt(1000000));
@@ -144,7 +138,7 @@ public class SegmentStoreStatisticsTest {
     public void gaussianNoiseTest() {
         Random rnd = new Random();
         rnd.setSeed(1234);
-        List<@NonNull BasicSegment> fixture = new ArrayList<>();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
         for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) {
             int start = Math.abs(rnd.nextInt(100000000));
             final int delta = Math.abs(rnd.nextInt(1000));
@@ -154,6 +148,185 @@ public class SegmentStoreStatisticsTest {
         testOnlineVsOffline(fixture);
     }
 
+    /**
+     * Test statistics nodes being merged. Two contiguous blocks.
+     */
+    @Test
+    public void mergeStatisticsNodesTest() {
+        // calculates stats for all the segments
+        SegmentStoreStatistics expected = new SegmentStoreStatistics();
+        // calculates stats for half of the segments
+        SegmentStoreStatistics a = new SegmentStoreStatistics();
+        // calculates stats for another half of the segments
+        SegmentStoreStatistics b = new SegmentStoreStatistics();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
+        for (int i = 0; i < 10; i++) {
+            ISegment seg = new BasicSegment(i, i * 2 + 2);
+            expected.update(seg);
+            a.update(seg);
+            fixture.add(seg);
+        }
+        for (int i = 0; i < 10; i++) {
+            ISegment seg = new BasicSegment(i, i * 2 + 2);
+            expected.update(seg);
+            b.update(seg);
+            fixture.add(seg);
+        }
+        a.merge(b);
+        OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture);
+        // Compare the expected stats with the offline algorithm
+        validate(offlineExpected, expected);
+        // Compare the results of the merge with the expected results
+        validate(expected, a);
+    }
+
+    /**
+     * Test statistics nodes being merged. Two random blocks.
+     */
+    @Test
+    public void mergeStatisticsRandomNodesTest() {
+        // calculates stats for all the segments
+        SegmentStoreStatistics expected = new SegmentStoreStatistics();
+        // calculates stats for half of the segments, randomly
+        SegmentStoreStatistics a = new SegmentStoreStatistics();
+        // calculates stats for the other half of the segments
+        SegmentStoreStatistics b = new SegmentStoreStatistics();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
+        Random rnd = new Random();
+        rnd.setSeed(10);
+        int size = rnd.nextInt(1000);
+        int size2 = rnd.nextInt(1000);
+        for (int i = 0; i < size; i++) {
+            int start = Math.abs(rnd.nextInt(100000000));
+            final int delta = Math.abs(rnd.nextInt(1000));
+            int end = start + delta * delta;
+            ISegment seg = new BasicSegment(start, end);
+            expected.update(seg);
+            a.update(seg);
+            fixture.add(seg);
+        }
+        for (int i = 0; i < size2; i++) {
+            int start = Math.abs(rnd.nextInt(100000000));
+            final int delta = Math.abs(rnd.nextInt(1000));
+            int end = start + delta * delta;
+            ISegment seg = new BasicSegment(start, end);
+            expected.update(seg);
+            b.update(seg);
+            fixture.add(seg);
+        }
+        a.merge(b);
+        assertEquals(size + size2, a.getNbSegments());
+        OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture);
+        // Compare the expected stats with the offline algorithm
+        validate(offlineExpected, expected);
+        // Compare the results of the merge with the expected results
+        validate(expected, a);
+    }
+
+    /**
+     * Test statistics nodes being merged. Two overlapping blocks.
+     */
+    @Test
+    public void mergeStatisticsOverlappingNodesTest() {
+        // calculates stats for all the segments
+        SegmentStoreStatistics expected = new SegmentStoreStatistics();
+        // calculates stats for half of the segments
+        SegmentStoreStatistics a = new SegmentStoreStatistics();
+        // calculates stats for the other half of the segments
+        SegmentStoreStatistics b = new SegmentStoreStatistics();
+        List<@NonNull ISegment> fixture = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            BasicSegment seg = new BasicSegment(i, i * 2 + 2);
+            expected.update(seg);
+            if ((i & 2) != 0) {
+                a.update(seg);
+            } else {
+                b.update(seg);
+            }
+            fixture.add(seg);
+        }
+        a.merge(b);
+        OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture);
+        validate(offlineExpected, expected);
+        validate(expected, a);
+    }
+
+    private static @NonNull SegmentStoreStatistics fillSmallStatistics() {
+        SegmentStoreStatistics stats = new SegmentStoreStatistics();
+        for (int i = 0; i < 10; i++) {
+            BasicSegment seg = new BasicSegment(i, i * 2 + 2);
+            stats.update(seg);
+        }
+        return stats;
+    }
+
+    /**
+     * Test statistics nodes being merged. corner cases.
+     */
+    @Test
+    public void mergeStatisticsCorenerCaseNodesTest() {
+        ISegment segment = new BasicSegment(1, 5);
+
+        // Control statistics, not to be modified
+        SegmentStoreStatistics noSegments = new SegmentStoreStatistics();
+        SegmentStoreStatistics oneSegment = new SegmentStoreStatistics();
+        oneSegment.update(segment);
+
+        // The segment store statistics to test
+        SegmentStoreStatistics testStats = new SegmentStoreStatistics();
+        SegmentStoreStatistics testStats2 = new SegmentStoreStatistics();
+
+        // Test merging empty stats on a non-empty one
+        testStats.update(segment);
+        testStats.merge(testStats2);
+        validate(oneSegment, testStats);
+        validate(noSegments, testStats2);
+
+        // Test merging on an empty stats
+        testStats2.merge(testStats);
+        validate(oneSegment, testStats);
+        validate(oneSegment, testStats2);
+
+        // Fill a small segment store and add the one extra segment to it
+        SegmentStoreStatistics expected = fillSmallStatistics();
+        expected.update(segment);
+
+        // Test merging stats with only 1 segment
+        testStats = fillSmallStatistics();
+        testStats.merge(testStats2);
+        validate(oneSegment, testStats2);
+        validate(expected, testStats);
+
+        // Test merging on stats with only 1 segment
+        testStats = fillSmallStatistics();
+        testStats2.merge(testStats);
+        validate(fillSmallStatistics(), testStats);
+        validate(expected, testStats2);
+
+    }
+
+    private static void validate(SegmentStoreStatistics expected, SegmentStoreStatistics toBeTested) {
+        assertEquals("# of Segments", expected.getNbSegments(), toBeTested.getNbSegments());
+        assertEquals("Total duration", expected.getTotal(), toBeTested.getTotal(), ERROR * expected.getTotal());
+        assertEquals("Average", expected.getAverage(), toBeTested.getAverage(), ERROR * expected.getAverage());
+        assertEquals("Min", expected.getMin(), toBeTested.getMin());
+        assertEquals("Max", expected.getMax(), toBeTested.getMax());
+        assertEquals("Min Segment", expected.getMinSegment().getLength(), toBeTested.getMinSegment().getLength());
+        assertEquals("Max Segment", expected.getMaxSegment().getLength(), toBeTested.getMaxSegment().getLength());
+        assertEquals("Standard Deviation", expected.getStdDev(), toBeTested.getStdDev(), APPROX_ERROR * expected.getStdDev());
+    }
+
+    private static void validate(OfflineStatisticsCalculator osc, SegmentStoreStatistics sss) {
+        assertEquals("# of Segments", osc.count(), sss.getNbSegments());
+        assertEquals("Total duration", osc.getTotal(), sss.getTotal(), ERROR * osc.getTotal());
+        assertEquals("Average", osc.getAvg(), sss.getAverage(), ERROR * osc.getAvg());
+        assertEquals("Min", osc.getMin(), sss.getMin());
+        assertEquals("Max", osc.getMax(), sss.getMax());
+        assertEquals("Min Segment", osc.getMin(), sss.getMinSegment().getLength());
+        assertEquals("Max Segment", osc.getMax(), sss.getMaxSegment().getLength());
+        assertEquals("Standard Deviation", osc.getStdDev(), sss.getStdDev(), ERROR * osc.getStdDev());
+    }
+
     private static @NonNull BasicSegment createDummySegment(int start, int end) {
         return new BasicSegment(start, end);
     }
index f71c65198469148ac37c7d1ae8959b13362b6d9f..8b80610b995a66d660bd8ac4102e732d21c6968a 100644 (file)
@@ -24,6 +24,9 @@ public class SegmentStoreStatistics {
     private ISegment fMax;
     private long fNbSegments;
     private double fAverage;
+    /**
+     * reminder, this is the variance * nb elem, as per the online algorithm
+     */
     private double fVariance;
     private double fTotal;
 
@@ -106,6 +109,16 @@ public class SegmentStoreStatistics {
         return fNbSegments > 2 ? Math.sqrt(fVariance / (fNbSegments - 1)) : Double.NaN;
     }
 
+    /**
+     * Get total value
+     *
+     * @return total value
+     * @since 1.1
+     */
+    public double getTotal() {
+        return fTotal;
+    }
+
     /**
      * Update the statistics based on a given segment
      * <p>
@@ -135,12 +148,79 @@ public class SegmentStoreStatistics {
     }
 
     /**
-     * Get total value
+     * Merge two statistics sets. If the pools are large, there may be a slight
+     * approximation error (empirically, the error is at most 0.001 but usually
+     * around 1e-5 for the standard deviation as this uses pooled variance.
      *
-     * @return total value
+     * @param other
+     *            The other segment store statistics
      * @since 1.1
      */
-    public double getTotal() {
-        return fTotal;
+    public void merge(SegmentStoreStatistics other) {
+        if (other.fNbSegments == 0) {
+            return;
+        } else if (fNbSegments == 0) {
+            copy(other);
+        } else if (other.fNbSegments == 1) {
+            update(other.fMax);
+        } else if (fNbSegments == 1) {
+            SegmentStoreStatistics copyOther = new SegmentStoreStatistics();
+            copyOther.copy(other);
+            copyOther.update(fMax);
+            copy(copyOther);
+        } else {
+            internalMerge(other);
+        }
+    }
+
+    private void internalMerge(SegmentStoreStatistics other) {
+        /*
+         * Min and max are trivial, as well as number of segments
+         */
+        long min = fMin.getLength();
+        long max = fMax.getLength();
+        fMin = min <= other.getMin() ? fMin : other.getMinSegment();
+        fMax = max >= other.getMax() ? fMax : other.getMaxSegment();
+
+        long oldNbSeg = fNbSegments;
+        double oldAverage = fAverage;
+        long otherSegments = other.getNbSegments();
+        double otherAverage = other.getAverage();
+        fNbSegments += otherSegments;
+        fTotal += other.getTotal();
+
+        /*
+         * Average is a weighted average
+         */
+        fAverage = ((oldNbSeg * oldAverage) + (otherAverage * otherSegments)) / fNbSegments;
+
+        /*
+         * This one is a bit tricky.
+         *
+         * The variance is the sum of the deltas from a mean squared.
+         *
+         * So if we add the old mean squared back to to variance and remove the
+         * new mean, the standard deviation can be easily calculated.
+         */
+        double avg1Sq = oldAverage * oldAverage;
+        double avg2sq = otherAverage * otherAverage;
+        double avgtSq = fAverage * fAverage;
+        /*
+         * This is a tricky part, bear in mind that the set is not continuous but discrete,
+         * Therefore, we have for n elements, n-1 intervals between them.
+         * Ergo, n-1 intervals are used for divisions and multiplications.
+         */
+        double variance1 = fVariance / (oldNbSeg - 1);
+        double variance2 = other.fVariance / (otherSegments - 1);
+        fVariance = ((variance1 + avg1Sq - avgtSq) * (oldNbSeg - 1) + (variance2 + avg2sq - avgtSq) * (otherSegments - 1));
+    }
+
+    private void copy(SegmentStoreStatistics copyOther) {
+        fAverage = copyOther.fAverage;
+        fMax = copyOther.fMax;
+        fMin = copyOther.fMin;
+        fNbSegments = copyOther.fNbSegments;
+        fTotal = copyOther.fTotal;
+        fVariance = copyOther.fVariance;
     }
 }
This page took 0.030086 seconds and 5 git commands to generate.