From: Matthew Khouzam Date: Thu, 25 Aug 2016 19:08:11 +0000 (-0400) Subject: timing.core: add merge function to segment store X-Git-Url: http://git.efficios.com/?a=commitdiff_plain;h=381e1541e35d26195bc8672879e152cae09aa69e;p=deliverable%2Ftracecompass.git timing.core: add merge function to segment store This allows merging statistics node. This will make statistics work in a more streamlined way. Note: merging introduces a slight error to standard deviation, This is due to the pooled variance algorithm used. Potential use-cases for this: * Map-reduce statistics on a segment store. * Merging trees of statistics. Change-Id: Ie6758bdcd5df03b58dc5521bf07fa5f9693c30bf Signed-off-by: Matthew Khouzam Reviewed-on: https://git.eclipse.org/r/79763 Reviewed-by: Genevieve Bastien Tested-by: Genevieve Bastien Reviewed-by: Hudson CI --- diff --git a/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/OfflineStatisticsCalculator.java b/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/OfflineStatisticsCalculator.java index 398e0a597f..432eb6e4d8 100644 --- a/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/OfflineStatisticsCalculator.java +++ b/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/OfflineStatisticsCalculator.java @@ -12,7 +12,6 @@ package org.eclipse.tracecompass.analysis.timing.core.tests.segmentstore.statist import java.util.Collection; import org.eclipse.jdt.annotation.NonNull; -import org.eclipse.tracecompass.segmentstore.core.BasicSegment; import org.eclipse.tracecompass.segmentstore.core.ISegment; /** @@ -23,7 +22,7 @@ import org.eclipse.tracecompass.segmentstore.core.ISegment; * */ public class OfflineStatisticsCalculator { - private final Collection<@NonNull BasicSegment> fSs; + private final Collection<@NonNull ISegment> fSs; /** * Constructor @@ -31,7 +30,7 @@ public class OfflineStatisticsCalculator { * @param ss * segment store, fully build */ - public OfflineStatisticsCalculator(Collection<@NonNull BasicSegment> ss) { + public OfflineStatisticsCalculator(Collection<@NonNull ISegment> ss) { fSs = ss; } @@ -75,7 +74,7 @@ public class OfflineStatisticsCalculator { } /** - * Get the standard deviation for a window. + * Get the standard deviation. * * @return the standard deviation */ @@ -92,4 +91,25 @@ public class OfflineStatisticsCalculator { } return Math.sqrt(totalVariance); } + + /** + * Get the total + * + * @return the total + */ + public long getTotal() { + long total = 0; + for (ISegment interval : fSs) { + total += interval.getLength(); + } + return total; + } + + /** + * Get the # of intervals + * @return the # of intervals + */ + public int count() { + return fSs.size(); + } } diff --git a/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/SegmentStoreStatisticsTest.java b/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/SegmentStoreStatisticsTest.java index 893d123248..3be02e3919 100644 --- a/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/SegmentStoreStatisticsTest.java +++ b/analysis/org.eclipse.tracecompass.analysis.timing.core.tests/src/org/eclipse/tracecompass/analysis/timing/core/tests/segmentstore/statistics/SegmentStoreStatisticsTest.java @@ -39,16 +39,10 @@ public class SegmentStoreStatisticsTest { private static final double NO_ERROR = 0.0; private static final double ERROR = 0.000001; + private static final double APPROX_ERROR = 0.0001; - private static void testOnlineVsOffline(List<@NonNull BasicSegment> fixture) { - SegmentStoreStatistics sss = getSegStoreStat(fixture); - OfflineStatisticsCalculator osc = new OfflineStatisticsCalculator(fixture); - assertEquals("Average", osc.getAvg(), sss.getAverage(), ERROR); - assertEquals("Standard Deviation", osc.getStdDev(), sss.getStdDev(), ERROR); - assertEquals("Min", osc.getMin(), sss.getMin()); - assertEquals("Max", osc.getMax(), sss.getMax()); - assertEquals("Min Segment", osc.getMin(), sss.getMinSegment().getLength()); - assertEquals("Max Segment", osc.getMax(), sss.getMaxSegment().getLength()); + private static void testOnlineVsOffline(List<@NonNull ISegment> fixture) { + validate(new OfflineStatisticsCalculator(fixture), getSegStoreStat(fixture)); } /** @@ -56,7 +50,7 @@ public class SegmentStoreStatisticsTest { */ @Test public void climbTest() { - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = 0; i < MEDIUM_AMOUNT_OF_SEGMENTS; i++) { fixture.add(createDummySegment(i, i * 2)); } @@ -70,7 +64,7 @@ public class SegmentStoreStatisticsTest { testOnlineVsOffline(fixture); } - private static SegmentStoreStatistics getSegStoreStat(List<@NonNull BasicSegment> fixture) { + private static SegmentStoreStatistics getSegStoreStat(List<@NonNull ISegment> fixture) { SegmentStoreStatistics sss = new SegmentStoreStatistics(); for (ISegment seg : fixture) { sss.update(seg); @@ -83,7 +77,7 @@ public class SegmentStoreStatisticsTest { */ @Test public void decrementingTest() { - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = MEDIUM_AMOUNT_OF_SEGMENTS; i >= 0; i--) { fixture.add(createDummySegment(i, i * 2)); } @@ -102,7 +96,7 @@ public class SegmentStoreStatisticsTest { */ @Test public void smallTest() { - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = 1; i >= 0; i--) { fixture.add(createDummySegment(i, i * 2)); } @@ -114,7 +108,7 @@ public class SegmentStoreStatisticsTest { */ @Test public void largeTest() { - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) { fixture.add(createDummySegment(i, i * 2)); } @@ -128,7 +122,7 @@ public class SegmentStoreStatisticsTest { public void noiseTest() { Random rnd = new Random(); rnd.setSeed(1234); - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) { int start = Math.abs(rnd.nextInt(100000000)); int end = start + Math.abs(rnd.nextInt(1000000)); @@ -144,7 +138,7 @@ public class SegmentStoreStatisticsTest { public void gaussianNoiseTest() { Random rnd = new Random(); rnd.setSeed(1234); - List<@NonNull BasicSegment> fixture = new ArrayList<>(); + List<@NonNull ISegment> fixture = new ArrayList<>(); for (int i = 1; i <= LARGE_AMOUNT_OF_SEGMENTS; i++) { int start = Math.abs(rnd.nextInt(100000000)); final int delta = Math.abs(rnd.nextInt(1000)); @@ -154,6 +148,185 @@ public class SegmentStoreStatisticsTest { testOnlineVsOffline(fixture); } + /** + * Test statistics nodes being merged. Two contiguous blocks. + */ + @Test + public void mergeStatisticsNodesTest() { + // calculates stats for all the segments + SegmentStoreStatistics expected = new SegmentStoreStatistics(); + // calculates stats for half of the segments + SegmentStoreStatistics a = new SegmentStoreStatistics(); + // calculates stats for another half of the segments + SegmentStoreStatistics b = new SegmentStoreStatistics(); + List<@NonNull ISegment> fixture = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + ISegment seg = new BasicSegment(i, i * 2 + 2); + expected.update(seg); + a.update(seg); + fixture.add(seg); + } + for (int i = 0; i < 10; i++) { + ISegment seg = new BasicSegment(i, i * 2 + 2); + expected.update(seg); + b.update(seg); + fixture.add(seg); + } + a.merge(b); + OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture); + // Compare the expected stats with the offline algorithm + validate(offlineExpected, expected); + // Compare the results of the merge with the expected results + validate(expected, a); + } + + /** + * Test statistics nodes being merged. Two random blocks. + */ + @Test + public void mergeStatisticsRandomNodesTest() { + // calculates stats for all the segments + SegmentStoreStatistics expected = new SegmentStoreStatistics(); + // calculates stats for half of the segments, randomly + SegmentStoreStatistics a = new SegmentStoreStatistics(); + // calculates stats for the other half of the segments + SegmentStoreStatistics b = new SegmentStoreStatistics(); + List<@NonNull ISegment> fixture = new ArrayList<>(); + Random rnd = new Random(); + rnd.setSeed(10); + int size = rnd.nextInt(1000); + int size2 = rnd.nextInt(1000); + for (int i = 0; i < size; i++) { + int start = Math.abs(rnd.nextInt(100000000)); + final int delta = Math.abs(rnd.nextInt(1000)); + int end = start + delta * delta; + ISegment seg = new BasicSegment(start, end); + expected.update(seg); + a.update(seg); + fixture.add(seg); + } + for (int i = 0; i < size2; i++) { + int start = Math.abs(rnd.nextInt(100000000)); + final int delta = Math.abs(rnd.nextInt(1000)); + int end = start + delta * delta; + ISegment seg = new BasicSegment(start, end); + expected.update(seg); + b.update(seg); + fixture.add(seg); + } + a.merge(b); + assertEquals(size + size2, a.getNbSegments()); + OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture); + // Compare the expected stats with the offline algorithm + validate(offlineExpected, expected); + // Compare the results of the merge with the expected results + validate(expected, a); + } + + /** + * Test statistics nodes being merged. Two overlapping blocks. + */ + @Test + public void mergeStatisticsOverlappingNodesTest() { + // calculates stats for all the segments + SegmentStoreStatistics expected = new SegmentStoreStatistics(); + // calculates stats for half of the segments + SegmentStoreStatistics a = new SegmentStoreStatistics(); + // calculates stats for the other half of the segments + SegmentStoreStatistics b = new SegmentStoreStatistics(); + List<@NonNull ISegment> fixture = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + BasicSegment seg = new BasicSegment(i, i * 2 + 2); + expected.update(seg); + if ((i & 2) != 0) { + a.update(seg); + } else { + b.update(seg); + } + fixture.add(seg); + } + a.merge(b); + OfflineStatisticsCalculator offlineExpected = new OfflineStatisticsCalculator(fixture); + validate(offlineExpected, expected); + validate(expected, a); + } + + private static @NonNull SegmentStoreStatistics fillSmallStatistics() { + SegmentStoreStatistics stats = new SegmentStoreStatistics(); + for (int i = 0; i < 10; i++) { + BasicSegment seg = new BasicSegment(i, i * 2 + 2); + stats.update(seg); + } + return stats; + } + + /** + * Test statistics nodes being merged. corner cases. + */ + @Test + public void mergeStatisticsCorenerCaseNodesTest() { + ISegment segment = new BasicSegment(1, 5); + + // Control statistics, not to be modified + SegmentStoreStatistics noSegments = new SegmentStoreStatistics(); + SegmentStoreStatistics oneSegment = new SegmentStoreStatistics(); + oneSegment.update(segment); + + // The segment store statistics to test + SegmentStoreStatistics testStats = new SegmentStoreStatistics(); + SegmentStoreStatistics testStats2 = new SegmentStoreStatistics(); + + // Test merging empty stats on a non-empty one + testStats.update(segment); + testStats.merge(testStats2); + validate(oneSegment, testStats); + validate(noSegments, testStats2); + + // Test merging on an empty stats + testStats2.merge(testStats); + validate(oneSegment, testStats); + validate(oneSegment, testStats2); + + // Fill a small segment store and add the one extra segment to it + SegmentStoreStatistics expected = fillSmallStatistics(); + expected.update(segment); + + // Test merging stats with only 1 segment + testStats = fillSmallStatistics(); + testStats.merge(testStats2); + validate(oneSegment, testStats2); + validate(expected, testStats); + + // Test merging on stats with only 1 segment + testStats = fillSmallStatistics(); + testStats2.merge(testStats); + validate(fillSmallStatistics(), testStats); + validate(expected, testStats2); + + } + + private static void validate(SegmentStoreStatistics expected, SegmentStoreStatistics toBeTested) { + assertEquals("# of Segments", expected.getNbSegments(), toBeTested.getNbSegments()); + assertEquals("Total duration", expected.getTotal(), toBeTested.getTotal(), ERROR * expected.getTotal()); + assertEquals("Average", expected.getAverage(), toBeTested.getAverage(), ERROR * expected.getAverage()); + assertEquals("Min", expected.getMin(), toBeTested.getMin()); + assertEquals("Max", expected.getMax(), toBeTested.getMax()); + assertEquals("Min Segment", expected.getMinSegment().getLength(), toBeTested.getMinSegment().getLength()); + assertEquals("Max Segment", expected.getMaxSegment().getLength(), toBeTested.getMaxSegment().getLength()); + assertEquals("Standard Deviation", expected.getStdDev(), toBeTested.getStdDev(), APPROX_ERROR * expected.getStdDev()); + } + + private static void validate(OfflineStatisticsCalculator osc, SegmentStoreStatistics sss) { + assertEquals("# of Segments", osc.count(), sss.getNbSegments()); + assertEquals("Total duration", osc.getTotal(), sss.getTotal(), ERROR * osc.getTotal()); + assertEquals("Average", osc.getAvg(), sss.getAverage(), ERROR * osc.getAvg()); + assertEquals("Min", osc.getMin(), sss.getMin()); + assertEquals("Max", osc.getMax(), sss.getMax()); + assertEquals("Min Segment", osc.getMin(), sss.getMinSegment().getLength()); + assertEquals("Max Segment", osc.getMax(), sss.getMaxSegment().getLength()); + assertEquals("Standard Deviation", osc.getStdDev(), sss.getStdDev(), ERROR * osc.getStdDev()); + } + private static @NonNull BasicSegment createDummySegment(int start, int end) { return new BasicSegment(start, end); } diff --git a/analysis/org.eclipse.tracecompass.analysis.timing.core/src/org/eclipse/tracecompass/analysis/timing/core/segmentstore/statistics/SegmentStoreStatistics.java b/analysis/org.eclipse.tracecompass.analysis.timing.core/src/org/eclipse/tracecompass/analysis/timing/core/segmentstore/statistics/SegmentStoreStatistics.java index f71c651984..8b80610b99 100644 --- a/analysis/org.eclipse.tracecompass.analysis.timing.core/src/org/eclipse/tracecompass/analysis/timing/core/segmentstore/statistics/SegmentStoreStatistics.java +++ b/analysis/org.eclipse.tracecompass.analysis.timing.core/src/org/eclipse/tracecompass/analysis/timing/core/segmentstore/statistics/SegmentStoreStatistics.java @@ -24,6 +24,9 @@ public class SegmentStoreStatistics { private ISegment fMax; private long fNbSegments; private double fAverage; + /** + * reminder, this is the variance * nb elem, as per the online algorithm + */ private double fVariance; private double fTotal; @@ -106,6 +109,16 @@ public class SegmentStoreStatistics { return fNbSegments > 2 ? Math.sqrt(fVariance / (fNbSegments - 1)) : Double.NaN; } + /** + * Get total value + * + * @return total value + * @since 1.1 + */ + public double getTotal() { + return fTotal; + } + /** * Update the statistics based on a given segment *

@@ -135,12 +148,79 @@ public class SegmentStoreStatistics { } /** - * Get total value + * Merge two statistics sets. If the pools are large, there may be a slight + * approximation error (empirically, the error is at most 0.001 but usually + * around 1e-5 for the standard deviation as this uses pooled variance. * - * @return total value + * @param other + * The other segment store statistics * @since 1.1 */ - public double getTotal() { - return fTotal; + public void merge(SegmentStoreStatistics other) { + if (other.fNbSegments == 0) { + return; + } else if (fNbSegments == 0) { + copy(other); + } else if (other.fNbSegments == 1) { + update(other.fMax); + } else if (fNbSegments == 1) { + SegmentStoreStatistics copyOther = new SegmentStoreStatistics(); + copyOther.copy(other); + copyOther.update(fMax); + copy(copyOther); + } else { + internalMerge(other); + } + } + + private void internalMerge(SegmentStoreStatistics other) { + /* + * Min and max are trivial, as well as number of segments + */ + long min = fMin.getLength(); + long max = fMax.getLength(); + fMin = min <= other.getMin() ? fMin : other.getMinSegment(); + fMax = max >= other.getMax() ? fMax : other.getMaxSegment(); + + long oldNbSeg = fNbSegments; + double oldAverage = fAverage; + long otherSegments = other.getNbSegments(); + double otherAverage = other.getAverage(); + fNbSegments += otherSegments; + fTotal += other.getTotal(); + + /* + * Average is a weighted average + */ + fAverage = ((oldNbSeg * oldAverage) + (otherAverage * otherSegments)) / fNbSegments; + + /* + * This one is a bit tricky. + * + * The variance is the sum of the deltas from a mean squared. + * + * So if we add the old mean squared back to to variance and remove the + * new mean, the standard deviation can be easily calculated. + */ + double avg1Sq = oldAverage * oldAverage; + double avg2sq = otherAverage * otherAverage; + double avgtSq = fAverage * fAverage; + /* + * This is a tricky part, bear in mind that the set is not continuous but discrete, + * Therefore, we have for n elements, n-1 intervals between them. + * Ergo, n-1 intervals are used for divisions and multiplications. + */ + double variance1 = fVariance / (oldNbSeg - 1); + double variance2 = other.fVariance / (otherSegments - 1); + fVariance = ((variance1 + avg1Sq - avgtSq) * (oldNbSeg - 1) + (variance2 + avg2sq - avgtSq) * (otherSegments - 1)); + } + + private void copy(SegmentStoreStatistics copyOther) { + fAverage = copyOther.fAverage; + fMax = copyOther.fMax; + fMin = copyOther.fMin; + fNbSegments = copyOther.fNbSegments; + fTotal = copyOther.fTotal; + fVariance = copyOther.fVariance; } }