From 2d626d38309943a9b40a819909804d8a4a17beaf Mon Sep 17 00:00:00 2001 From: Matthew Khouzam Date: Wed, 2 Dec 2015 17:15:55 -0500 Subject: [PATCH] analysis: add standard deviation to the segment store statistics This patch will be useful for extracting more key metrics and flagging outlyer segments. Some reminders: The standard deviation is the square root of the variance. The variance is the sum of squared elements divided by the cardinality of a list. To calculate the standard deviation incrementally, we keep an accumulator of all the squares of a latency. This is then divided when needed by the number of elements (segments) and square rooted. Also, this patch calculates an online mean with less rounding errors. Change-Id: Ia918f08f2351d7086bd05aac1ad645cfff13eb58 Signed-off-by: Matthew Khouzam Reviewed-on: https://git.eclipse.org/r/61824 Reviewed-by: Hudson CI Reviewed-by: Bernd Hufmann Tested-by: Bernd Hufmann --- .../statistics/SegmentStoreStatistics.java | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/analysis/org.eclipse.tracecompass.analysis.os.linux.core/src/org/eclipse/tracecompass/internal/analysis/os/linux/core/latency/statistics/SegmentStoreStatistics.java b/analysis/org.eclipse.tracecompass.analysis.os.linux.core/src/org/eclipse/tracecompass/internal/analysis/os/linux/core/latency/statistics/SegmentStoreStatistics.java index c70c3f6e24..4644dc811e 100644 --- a/analysis/org.eclipse.tracecompass.analysis.os.linux.core/src/org/eclipse/tracecompass/internal/analysis/os/linux/core/latency/statistics/SegmentStoreStatistics.java +++ b/analysis/org.eclipse.tracecompass.analysis.os.linux.core/src/org/eclipse/tracecompass/internal/analysis/os/linux/core/latency/statistics/SegmentStoreStatistics.java @@ -21,17 +21,19 @@ import org.eclipse.tracecompass.segmentstore.core.ISegment; public class SegmentStoreStatistics { private long fMin; private long fMax; - private long fSum; private long fNbSegments; + private double fAverage; + private double fVariance; /** * Constructor */ public SegmentStoreStatistics() { - this.fMin = Long.MAX_VALUE; - this.fMax = Long.MIN_VALUE; - this.fSum = 0; - this.fNbSegments = 0; + fMin = Long.MAX_VALUE; + fMax = Long.MIN_VALUE; + fNbSegments = 0; + fAverage = 0.0; + fVariance = 0.0; } /** @@ -67,20 +69,44 @@ public class SegmentStoreStatistics { * @return arithmetic average */ public double getAverage() { - return ((double) fSum) / fNbSegments; + return fAverage; + } + + /** + * Gets the standard deviation of the segments, uses the online algorithm + * shown here + * Wikipedia article of dec 3 2015 + * + * @return the standard deviation of the segment store, will return NaN if + * there are less than 3 elements + */ + public double getStdDev() { + return fNbSegments > 2 ? Math.sqrt(fVariance / (fNbSegments - 1)) : Double.NaN; } /** * Update the statistics based on a given segment + *

+ * This is an online algorithm and must retain a complexity of O(1) * * @param segment * the segment used for the update */ - public void update (ISegment segment) { + public void update(ISegment segment) { long value = segment.getLength(); + /* + * Min and max are trivial, as well as number of segments + */ fMin = Math.min(fMin, value); fMax = Math.max(fMax, value); - fSum += value; + fNbSegments++; + /* + * The running mean is not trivial, see proof in javadoc. + */ + double delta = value - fAverage; + fAverage += delta / fNbSegments; + fVariance += delta * (value - fAverage); } } -- 2.34.1