[Dive4elements-commits] [PATCH] MINFO: Allow two methods for outlier test in SQ relation
Wald Commits
scm-commit at wald.intevation.org
Fri Jan 11 13:58:15 CET 2013
# HG changeset patch
# User Raimund Renkert <rrenkert at intevation.de>
# Date 1357909058 -3600
# Node ID a7d080347ac3e2d82af94f78b1518f711ce602d8
# Parent c0d6391bec6fb7a993f3a5460ca5b29add81a58c
MINFO: Allow two methods for outlier test in SQ relation.
* Methods can be switched as option in conf.xml.
* Methods:
- Find outliers via multiples of the standard deviation.
- Grubbs (used in Fix-Analysis)
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/doc/conf/conf.xml
--- a/flys-artifacts/doc/conf/conf.xml Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/doc/conf/conf.xml Fri Jan 11 13:57:38 2013 +0100
@@ -403,5 +403,9 @@
<zoom-scale river="Elbe" range="100" radius="5" />
<zoom-scale river="Elbe" range="500" radius="10" />
</zoom-scales>
+ <minfo-sq>
+ <!-- valid names: grubbs or std-dev -->
+ <outlier-method name="grubbs"/>
+ </minfo-sq>
</options>
</artifact-database>
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java Fri Jan 11 13:57:38 2013 +0100
@@ -0,0 +1,113 @@
+package de.intevation.flys.artifacts.math;
+
+import java.util.List;
+
+import org.apache.commons.math.MathException;
+
+import org.apache.commons.math.distribution.TDistributionImpl;
+
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+
+import org.apache.log4j.Logger;
+
+public class GrubbsOutlier
+{
+ public static final double EPSILON = 1e-5;
+
+ public static final double DEFAULT_ALPHA = 0.05;
+
+ private static Logger log = Logger.getLogger(GrubbsOutlier.class);
+
+ protected GrubbsOutlier() {
+ }
+
+ public static Integer findOutlier(List<Double> values) {
+ return findOutlier(values, DEFAULT_ALPHA, null);
+ }
+
+ public static Integer findOutlier(
+ List<Double> values,
+ double alpha,
+ double[] stdDevResult
+ ) {
+ boolean debug = log.isDebugEnabled();
+
+ if (debug) {
+ log.debug("outliers significance: " + alpha);
+ }
+
+ alpha = 1d - alpha;
+
+ int N = values.size();
+
+ if (debug) {
+ log.debug("Values to check: " + N);
+ }
+
+ if (N < 3) {
+ return null;
+ }
+
+ Mean mean = new Mean();
+ StandardDeviation std = new StandardDeviation();
+
+ for (Double value: values) {
+ double v = value.doubleValue();
+ mean.increment(v);
+ std .increment(v);
+ }
+
+ double m = mean.getResult();
+ double s = std.getResult();
+
+ if (debug) {
+ log.debug("mean: " + m);
+ log.debug("std dev: " + s);
+ }
+
+ double maxZ = -Double.MAX_VALUE;
+ int iv = -1;
+ for (int i = N-1; i >= 0; --i) {
+ double v = values.get(i).doubleValue();
+ double z = Math.abs(v - m);
+ if (z > maxZ) {
+ maxZ = z;
+ iv = i;
+ }
+ }
+
+ if (Math.abs(s) < EPSILON) {
+ return null;
+ }
+
+ maxZ /= s;
+
+ TDistributionImpl tdist = new TDistributionImpl(N-2);
+
+ double t;
+
+ try {
+ t = tdist.inverseCumulativeProbability(alpha/(N+N));
+ }
+ catch (MathException me) {
+ log.error(me);
+ return null;
+ }
+
+ t *= t;
+
+ double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
+
+ if (debug) {
+ log.debug("max: " + maxZ + " crit: " + za);
+ }
+ if (stdDevResult != null) {
+ stdDevResult[0] = std.getResult();
+ }
+ return maxZ > za
+ ? Integer.valueOf(iv)
+ : null;
+ }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java Wed Jan 09 13:17:09 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-package de.intevation.flys.artifacts.math;
-
-import java.util.List;
-
-import org.apache.commons.math.MathException;
-
-import org.apache.commons.math.distribution.TDistributionImpl;
-
-import org.apache.commons.math.stat.descriptive.moment.Mean;
-import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
-
-import org.apache.log4j.Logger;
-
-public class Outlier
-{
- public static final double EPSILON = 1e-5;
-
- public static final double DEFAULT_ALPHA = 0.05;
-
- private static Logger log = Logger.getLogger(Outlier.class);
-
- protected Outlier() {
- }
-
- public static Integer findOutlier(List<Double> values) {
- return findOutlier(values, DEFAULT_ALPHA);
- }
-
- public static Integer findOutlier(List<Double> values, double alpha) {
- boolean debug = log.isDebugEnabled();
-
- if (debug) {
- log.debug("outliers significance: " + alpha);
- }
-
- alpha = 1d - alpha;
-
- int N = values.size();
-
- if (debug) {
- log.debug("Values to check: " + N);
- }
-
- if (N < 3) {
- return null;
- }
-
- Mean mean = new Mean();
- StandardDeviation std = new StandardDeviation();
-
- for (Double value: values) {
- double v = value.doubleValue();
- mean.increment(v);
- std .increment(v);
- }
-
- double m = mean.getResult();
- double s = std.getResult();
-
- if (debug) {
- log.debug("mean: " + m);
- log.debug("std dev: " + s);
- }
-
- double maxZ = -Double.MAX_VALUE;
- int iv = -1;
- for (int i = N-1; i >= 0; --i) {
- double v = values.get(i).doubleValue();
- double z = Math.abs(v - m);
- if (z > maxZ) {
- maxZ = z;
- iv = i;
- }
- }
-
- if (Math.abs(s) < EPSILON) {
- return null;
- }
-
- maxZ /= s;
-
- TDistributionImpl tdist = new TDistributionImpl(N-2);
-
- double t;
-
- try {
- t = tdist.inverseCumulativeProbability(alpha/(N+N));
- }
- catch (MathException me) {
- log.error(me);
- return null;
- }
-
- t *= t;
-
- double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
-
- if (debug) {
- log.debug("max: " + maxZ + " crit: " + za);
- }
-
- return maxZ > za
- ? Integer.valueOf(iv)
- : null;
- }
-}
-// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java Fri Jan 11 13:57:38 2013 +0100
@@ -0,0 +1,78 @@
+package de.intevation.flys.artifacts.math;
+
+import java.util.List;
+
+import org.apache.commons.math.MathException;
+
+import org.apache.commons.math.distribution.TDistributionImpl;
+
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.artifacts.model.sq.SQ;
+
+public class StdDevOutlier
+{
+ public static final double EPSILON = 1e-5;
+
+ public static final double DEFAULT_FACTOR = 3;
+
+ private static Logger log = Logger.getLogger(StdDevOutlier.class);
+
+ protected StdDevOutlier() {
+ }
+
+ public static Integer findOutlier(List<Double> values) {
+ return findOutlier(values, DEFAULT_FACTOR, null);
+ }
+
+ public static Integer findOutlier(List<Double> values, double factor, double[] stdDevResult) {
+ boolean debug = log.isDebugEnabled();
+
+ if (debug) {
+ log.debug("factor for std dev: " + factor);
+ }
+
+ int N = values.size();
+
+ if (debug) {
+ log.debug("Values to check: " + N);
+ }
+
+ if (values.size() < 3) {
+ return null;
+ }
+
+ StandardDeviation stdDev = new StandardDeviation();
+
+ double maxValue = -Double.MAX_VALUE;
+ int maxIndex = -1;
+ int ndx = 0;
+ for (int i = values.size()-1; i >= 0; --i) {
+ double value = Math.abs(values.get(i));
+ stdDev.increment(value);
+ if (value > maxValue) {
+ maxValue = value;
+ maxIndex = ndx;
+ }
+ ++ndx;
+ }
+
+ double sd = stdDev.getResult();
+
+ double accepted = factor * sd;
+
+ if (debug) {
+ log.debug("std dev: " + stdDev);
+ log.debug("accepted: " + accepted);
+ log.debug("max value: " + maxValue);
+ }
+ if (stdDevResult != null) {
+ stdDevResult[0] = sd;
+ }
+ return maxValue > accepted ? maxIndex : null;
+ }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Fri Jan 11 13:57:38 2013 +0100
@@ -1,23 +1,18 @@
package de.intevation.flys.artifacts.model.fixings;
-import de.intevation.flys.artifacts.math.Outlier;
-
-import de.intevation.flys.artifacts.math.fitting.Function;
-
import gnu.trove.TDoubleArrayList;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math.MathException;
+import org.apache.commons.math.optimization.fitting.CurveFitter;
+import org.apache.commons.math.optimization.general.LevenbergMarquardtOptimizer;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+import org.apache.log4j.Logger;
-import org.apache.commons.math.optimization.fitting.CurveFitter;
-
-import org.apache.commons.math.optimization.general.LevenbergMarquardtOptimizer;
-
-import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
-
-import org.apache.log4j.Logger;
+import de.intevation.flys.artifacts.math.GrubbsOutlier;
+import de.intevation.flys.artifacts.math.fitting.Function;
public class Fitting
{
@@ -206,7 +201,7 @@
inputs.add(Double.valueOf(ys.getQuick(i) - y));
}
- Integer outlier = Outlier.findOutlier(inputs);
+ Integer outlier = GrubbsOutlier.findOutlier(inputs);
if (outlier == null) {
break;
diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java Fri Jan 11 13:57:38 2013 +0100
@@ -9,10 +9,23 @@
import org.apache.log4j.Logger;
+import de.intevation.artifacts.GlobalContext;
+import de.intevation.artifacts.common.utils.Config;
+import de.intevation.flys.artifacts.context.FLYSContext;
+import de.intevation.flys.artifacts.math.GrubbsOutlier;
+import de.intevation.flys.artifacts.math.StdDevOutlier;
+
public class Outlier
{
private static Logger log = Logger.getLogger(Outlier.class);
+ private static final String OUTLIER_METHOD =
+ "/artifact-database/options/minfo-sq/outlier-method/@name";
+
+ private static final String GRUBBS = "grubbs";
+
+ private static final String STD_DEV = "std-dev";
+
public interface Callback {
void initialize(List<SQ> sqs) throws MathException;
@@ -38,46 +51,39 @@
if (debug) {
log.debug("stdDevFactor: " + stdDevFactor);
}
-
+ String method = Config.getStringXPath(OUTLIER_METHOD);
+ log.debug("method: " + method);
+ if (method == null) {
+ method = "std-dev";
+ }
List<SQ> data = new ArrayList<SQ>(sqs);
while (data.size() > 2) {
callback.initialize(data);
- StandardDeviation stdDev = new StandardDeviation();
-
- double maxValue = -Double.MAX_VALUE;
- int maxIndex = -1;
-
- for (int i = data.size()-1; i >= 0; --i) {
- double value = Math.abs(callback.eval(data.get(i)));
- stdDev.increment(value);
- if (value > maxValue) {
- maxValue = value;
- maxIndex = i;
- }
+ List<Double> values = new ArrayList<Double>();
+ for (SQ sq: data) {
+ values.add(callback.eval(sq));
}
- double sd = stdDev.getResult();
-
- double accepted = stdDevFactor * sd;
-
- if (debug) {
- log.debug("std dev: " + stdDev);
- log.debug("accepted: " + accepted);
- log.debug("max value: " + maxValue);
+ Integer ndx = null;
+ double[] stdDev = new double[1];
+ if (method.equals(GRUBBS)) {
+ ndx = GrubbsOutlier.findOutlier(values, stdDevFactor/100, stdDev);
+ }
+ else {
+ ndx = StdDevOutlier.findOutlier(values, stdDevFactor, stdDev);
+ }
+ if (ndx == null) {
+ callback.iterationFinished(stdDev[0], null, data);
+ break;
}
- SQ outlier = maxValue > accepted
- ? data.remove(maxIndex)
- : null;
-
- callback.iterationFinished(sd, outlier, data);
-
- if (outlier == null) {
- break;
- }
+ SQ outlier = data.remove((int)ndx);
+ log.debug("stdDev: " + stdDev[0]);
+ log.debug("removed " + ndx + "; S: " + outlier.getS() + " Q: " + outlier.getQ());
+ callback.iterationFinished(stdDev[0], outlier, data);
}
}
}
More information about the Dive4elements-commits
mailing list