[PATCH] issue748: Change StandardDeviation implmentation to what BFG calls Standard Deviation

Wald Commits scm-commit at wald.intevation.org
Tue Jul 30 17:32:44 CEST 2013


# HG changeset patch
# User Andre Heinecke <aheinecke at intevation.de>
# Date 1375198348 -7200
# Node ID b265cd6cfda5fa03a9396ac1a951b3a26c3b8218
# Parent  5ce1b675517463946271e42c2f615f7de52bdd71
issue748: Change StandardDeviation implmentation to what BFG calls Standard Deviation

    Which is actually a calculation that removes outliers based on
    Standard Error

    Developed and analyized together with Tom.

diff -r 5ce1b6755174 -r b265cd6cfda5 artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java
--- a/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java	Tue Jul 30 16:24:59 2013 +0200
+++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java	Tue Jul 30 17:32:28 2013 +0200
@@ -10,9 +10,13 @@
 
 import java.util.List;
 
-import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+import org.apache.log4j.Logger;
 
-import org.apache.log4j.Logger;
+/* XXX:
+ * Warning: This class is called StdDevOutlier because it caculates the
+ * Standard Deviation method for outlier removal as the BFG calls it.
+ * But the actual calculation used to remove the outliers calculates
+ * the Standard Error and not the Standard Deviation! */
 
 public class StdDevOutlier
 {
@@ -30,12 +34,12 @@
     public static Integer findOutlier(
         List<Double> values,
         double       factor,
-        double []    stdDevResult
+        double []    stdErrResult
     ) {
         boolean debug = log.isDebugEnabled();
 
         if (debug) {
-            log.debug("factor for std dev: " + factor);
+            log.debug("factor for std dev test (that calculates std err): " + factor);
         }
 
         int N = values.size();
@@ -48,31 +52,34 @@
             return null;
         }
 
-        StandardDeviation stdDev = new StandardDeviation();
-
         double maxValue = -Double.MAX_VALUE;
         int    maxIndex = -1;
+
+        double squareSumResiduals = 0;
+        for (Double db: values) {
+            squareSumResiduals += Math.pow(db, 2);
+        }
+
+        double stdErr = Math.sqrt(squareSumResiduals / (N - 2));
+
+        double accepted = factor * stdErr;
+
         for (int i = N-1; i >= 0; --i) {
             double value = Math.abs(values.get(i));
-            stdDev.increment(value);
             if (value > maxValue) {
                 maxValue = value;
                 maxIndex = i;
             }
         }
 
-        double sd = stdDev.getResult();
-
-        double accepted = factor * sd;
-
         if (debug) {
-            log.debug("std dev: " + stdDev);
+            log.debug("std err: " + stdErr);
             log.debug("accepted: " + accepted);
             log.debug("max value: " + maxValue);
         }
 
-        if (stdDevResult != null) {
-            stdDevResult[0] = sd;
+        if (stdErrResult != null) {
+            stdErrResult[0] = stdErr;
         }
 
         return maxValue > accepted ? maxIndex : null;


More information about the Dive4elements-commits mailing list