[Dive4elements-commits] [PATCH 2 of 2] Include improved DA66Parser into importer

Wald Commits scm-commit at wald.intevation.org
Wed Dec 19 13:58:34 CET 2012


# HG changeset patch
# User Felix Wolfsteller <felix.wolfsteller at intevation.de>
# Date 1355922252 -3600
# Node ID 70842db72ee49e14b36ae80edd81ce1262859da8
# Parent  35dd03e04e38c445d794296e46d66f7111c63335
Include improved DA66Parser into importer.

diff -r 35dd03e04e38 -r 70842db72ee4 flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java	Wed Dec 19 13:59:57 2012 +0100
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java	Wed Dec 19 14:04:12 2012 +0100
@@ -228,6 +228,7 @@
         parseGauges();
         parseAnnotations();
         parsePRFs();
+        parseDA66s();
         parseHYKs();
         parseWst();
         parseExtraWsts();
@@ -885,6 +886,67 @@
         });
     }
 
+
+    /** Create a DA66 Parser and parse the da66 files found. */
+    // TODO this is a copy of parsePRFs, extract interfaces (e.g. CrossSectionParser).
+    public void parseDA66s() {
+        if (Config.INSTANCE.skipDA66s()) {
+            log.info("skip parsing DA66s");
+            return;
+        }
+
+        log.info("looking for DA66 files");
+        DA66Parser parser = new DA66Parser();
+        File riverDir = wstFile
+            .getParentFile()  // Basisdaten
+            .getParentFile()  // Hydrologie
+            .getParentFile(); // <river>
+
+        parser.parseDA66s(riverDir, new DA66Parser.Callback() {
+
+            Set<HashedFile> da66s = new HashSet<HashedFile>();
+
+            @Override
+            public boolean da66Accept(File file) {
+                HashedFile hf = new HashedFile(file);
+                boolean success = da66s.add(hf);
+                return true;
+                // TODO for some reason they are all duplicates, reenable the check!
+                /*
+                if (!success) {
+                    log.warn("DA66 file '" + file + "' seems to be a duplicate.");
+                }
+                return success;
+                */
+            }
+
+            @Override
+            public void da66Parsed(DA66Parser parser) {
+                log.debug("callback from DA66 parser");
+
+                // TODO populate with real-world data
+                String  description = "dummy";//parser.getDescription();
+                Integer year        = 2012;//parser.getYear();
+                ImportTimeInterval ti = year != null
+                    ? new ImportTimeInterval(yearToDate(year))
+                    : null;
+
+                List<ImportCrossSectionLine> lines =
+                    new ArrayList<ImportCrossSectionLine>();
+
+                for (Map.Entry<Double, List<XY>> entry: parser.getData().entrySet()) {
+                    Double   km     = entry.getKey();
+                    List<XY> points = entry.getValue();
+                    lines.add(new ImportCrossSectionLine(km, points));
+                }
+
+                crossSections.add(new ImportCrossSection(
+                    ImportRiver.this, description, ti, lines));
+            }
+        });
+    }
+
+    /** Create a PRFParser and let it parse the prf files found. */
     public void parsePRFs() {
         if (Config.INSTANCE.skipPRFs()) {
             log.info("skip parsing PRFs");
@@ -988,7 +1050,7 @@
     }
 
     public void storeCrossSections() {
-        if (!Config.INSTANCE.skipPRFs()) {
+        if (!Config.INSTANCE.skipPRFs() || !Config.INSTANCE.skipDA66s()) {
             log.info("store cross sections");
             getPeer();
             for (ImportCrossSection crossSection: crossSections) {
diff -r 35dd03e04e38 -r 70842db72ee4 flys-backend/src/main/java/de/intevation/flys/importer/parsers/DA66Parser.java
--- a/flys-backend/src/main/java/de/intevation/flys/importer/parsers/DA66Parser.java	Wed Dec 19 13:59:57 2012 +0100
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/DA66Parser.java	Wed Dec 19 14:04:12 2012 +0100
@@ -28,32 +28,65 @@
     /** Private logger. */
     private static Logger logger = Logger.getLogger(DA66Parser.class);
 
-    // TODO: Most of the Point/y/z group matches are optional!
-    public static final Pattern LINE_PATTERN =
-        Pattern.compile("^([0-9 -]{2})" + // Type
+    private static String HEAD_HEAD = "00";
+    private static String HEAD_GEOM = "66"; // "Values"
+    private static String HEAD_ENDG = "88"; // Probably never used.
+
+    private static final Pattern LINE_PATTERN =
+        Pattern.compile("^([0-9 -]{2})" + // Type (00|66|88)
                         "([0-9 -]{5})" + // unset
                         "([0-9 -]{2})" + // id
                         "([0-9 -]{9})" + // station
                         "([0-9 -]{2})" + // running number
-                        "([0-9 -]{1})" + // point id
+                        "([0-9 -]{1})?" + // point id
                         /*
                         Would be great if we could express the pattern as this:
                         ([0-9 -]{1})([0-9 -JKMLMNOPQR]{7})([0-9 -]{7})+
                         */
-                        "([0-9 -JKMLMNOPQR]{7})" + // y
-                        "([0-9 -]{7})" + // z
-                        "([0-9 -]{1})" + // point id
-                        "([0-9 -JKMLMNOPQR]{7})" + // y
-                        "([0-9 -]{7})" + // z
-                        "([0-9 -]{1})" + // point id
-                        "([0-9 -JKMLMNOPQR]{7})" + // y
-                        "([0-9 -]{7})" + // z
-                        "([0-9 -]{1})" + // point id
-                        "([0-9 -JKMLMNOPQR]{7})" + // y
-                        "([0-9 -]{7})" // z
+                        "([0-9 -JKMLMNOPQR]{7})?" + // y
+                        "([0-9 -]{7})?" + // z
+                        "([0-9 -]{1})?" + // point id
+                        "([0-9 -JKMLMNOPQR]{7})?" + // y
+                        "([0-9 -]{7})?" + // z
+                        "([0-9 -]{1})?" + // point id
+                        "([0-9 -JKMLMNOPQR]{7})?" + // y
+                        "([0-9 -]{7})?" + // z
+                        "([0-9 -]{1})?" + // point id
+                        "([0-9 -JKMLMNOPQR]{7})?" + // y
+                        "([0-9 -]{7})?" // z
                         );
         //Pattern.compile("^([0-9 -]{2})");
 
+    /** Indices to match group of main regex. */
+    private static enum FIELD {
+        HEAD      ( 1),
+        UNSET     ( 2),
+        ID        ( 3),
+        STATION   ( 4),
+        RUNNR     ( 5),
+        POINT_1_ID( 6),
+        POINT_1_Y ( 7),
+        POINT_1_Z ( 8),
+        POINT_2_ID( 9),
+        POINT_2_Y (10),
+        POINT_2_Z (11),
+        POINT_3_ID(12),
+        POINT_3_Y (13),
+        POINT_3_Z (14),
+        POINT_4_ID(15),
+        POINT_4_Y (16),
+        POINT_4_Z (17);
+ 
+        private int idx;
+        FIELD(int idx) {
+            this.idx = idx;
+        }
+        int getIdx() {
+            return idx;
+        }
+    }
+
+    /** Header lines of da66 can define a type. */
     private static enum Type {
         DATE                     ( 0),
         HEKTOSTONE_LEFT          ( 1), //grm. "Standlinie"
@@ -117,15 +150,24 @@
         for (Type t: Type.values()) {
             typeMap.put(new Integer(t.getId()), t);
         }
+        // TODO populate and respect header type.
         implementedTypes = new ArrayList<Type>();
         //implementedTypes.add(..);
     }
 
+
+    /** The current line to which add points. */
+    private List<XY> currentLine;
+
+
+    // TODO refactor, its shared with PRFParser.
     public interface Callback {
-        boolean da60Accept(File file);
-        void    da60Parsed(DA66Parser parser);
+        boolean da66Accept(File file);
+        void    da66Parsed(DA66Parser parser);
     } // interface Parser
 
+
+    /** Data collected so far, last element will be currentLine. */
     protected Map<Double, List<XY>> data;
 
 
@@ -148,24 +190,21 @@
             : name.substring(0, index);
     }
 
-    public void reset() {
-        data.clear();
-    }
-
     public void parseDA66s(File root, final Callback callback) {
 
+        // TODO use the removeExtension/guess description and date.
         FileTools.walkTree(root, new FileTools.FileVisitor() {
             @Override
             public boolean visit(File file) {
                 if (file.isFile() && file.canRead()
                 && file.getName().toLowerCase().endsWith(".d66")
-                && (callback == null || callback.da60Accept(file))) {
+                && (callback == null || callback.da66Accept(file))) {
                     reset();
                     try {
                         parse(file);
                         logger.info("parsing done");
                         if (callback != null) {
-                            callback.da60Parsed(DA66Parser.this);
+                            callback.da66Parsed(DA66Parser.this);
                         }
                     }
                     catch (IOException ioe) {
@@ -178,30 +217,162 @@
         });
     }
 
-    // LineParser
-    @Override
-    protected void finish() {}
 
-    /** Called for each line. Try to extract info from a da66 line. */
+    /**
+     * Get the Index of the last cross-section lines point.
+     * @return last points index, -1 if not available.
+     */
+    private int lastPointIdx() {
+        if (currentLine == null || currentLine.isEmpty()) {
+            return -1;
+        }
+        XY lastPoint = this.currentLine.get(currentLine.size()-1);
+        return lastPoint.getIndex();
+    }
+
+
+    /**
+     * Add a Point (YZ,Index) to the current cross section line.
+     * @param y The y coordinate of new point.
+     * @param z The z coordinate of new point.
+     * @param idx Ignored, the parameter of new point.
+     * @return true if point could been added, false otherwise (e.g. not
+     *         parsable y or z values.
+     */
+    private boolean addPoint(String y, String z, String idx) {
+        if (z == null || y == null || idx == null) {
+            logger.error("Incomplete point definition");
+            return false;
+        }
+
+        Double iy;
+        Double iz;
+        try {
+            iy = Double.parseDouble(y);
+            iz = Double.parseDouble(z);
+        }
+        catch(java.lang.NumberFormatException nfe) {
+            logger.error("Could not parse Number: " + nfe.getMessage());
+            return false;
+        }
+
+        // We ignore idx, and increment instead.
+        Integer index;
+        int lastPointIdx = lastPointIdx();
+        if (lastPointIdx <= 0) {
+            index = 1;
+        } else {
+            index = lastPointIdx + 1;
+        }
+
+        currentLine.add(new XY(iy, iz, index));
+        return true;
+    }
+
+
+    /** Called before consuming first line of file. */
+    public void reset() {
+        data.clear();
+        currentLine = new ArrayList<XY>();
+    }
+
+
+    /**
+     * Called for each line. Try to extract info from a da66 line.
+     */
     @Override
     protected void handleLine(int lineNum, String line) {
-        if (line.substring(0,2).equals("00")) {
-                logger.warn("Hit a 00");
+        String head = line.substring(0,2);
+        if (HEAD_HEAD.equals(head)) {
+                logger.debug("Hit a 00");
+                Matcher m = LINE_PATTERN.matcher(line);
+                if (m.find()) {
+                    // Actually matches!
+                    currentLine = new ArrayList<XY>();
+                    data.put(Double.parseDouble(m.group(FIELD.STATION.getIdx())),
+                        currentLine);
+                }
+                else {
+                    logger.error("HEAD line bad.");
+                }
         }
-        else if (line.substring(0,2).equals("66")) {
-            String station = line.substring(10,18);
-            logger.info(station);
+        else if (HEAD_GEOM.equals(head)) {
             Matcher m = LINE_PATTERN.matcher(line);
-            if(m.find())
-            logger.warn("Group1: " + m.group(1));
-            else
-                    logger.warn("no match in " + line);
-            //logger.warn("Hit a 66");
+            if (m.find()) {
+                //logger.info("Station: " + m.group(FIELD.STATION.getIdx()));
+                // TODO if last station differs, error and abort
+                if (m.group(FIELD.POINT_1_ID.getIdx()) != null) {
+                    // Point 1
+                    if(addPoint(
+                        m.group(FIELD.POINT_1_Y.getIdx()),
+                        m.group(FIELD.POINT_1_Z.getIdx()),
+                        m.group(FIELD.POINT_1_ID.getIdx()))) {
+                        // Point added.
+                    }
+                    else {
+                        // Problematic point.
+                        logger.error("A point could not be added");
+                    }
+                }
+                if (m.group(FIELD.POINT_2_ID.getIdx()) != null) {
+                    // Point 2
+                    if(addPoint(
+                        m.group(FIELD.POINT_2_Y.getIdx()),
+                        m.group(FIELD.POINT_2_Z.getIdx()),
+                        m.group(FIELD.POINT_2_ID.getIdx()))) {
+                        // Point added.
+                    }
+                    else {
+                        // Problematic point.
+                        logger.error("A point could not be added");
+                    }
+                }
+                if (m.group(FIELD.POINT_3_ID.getIdx()) != null) {
+                    // Point 3
+                    if(addPoint(
+                        m.group(FIELD.POINT_3_Y.getIdx()),
+                        m.group(FIELD.POINT_3_Z.getIdx()),
+                        m.group(FIELD.POINT_3_ID.getIdx()))) {
+                        // Point added.
+                    }
+                    else {
+                        // Problematic point.
+                        logger.error("A point could not be added");
+                    }
+                }
+                if (m.group(FIELD.POINT_4_ID.getIdx()) != null) {
+                    // Point 4
+                    if(addPoint(
+                        m.group(FIELD.POINT_4_Y.getIdx()),
+                        m.group(FIELD.POINT_4_Z.getIdx()),
+                        m.group(FIELD.POINT_4_ID.getIdx()))) {
+                        // Point added.
+                    }
+                    else {
+                        // Problematic point.
+                        logger.error("A point could not be added");
+                    }
+                }
+            }
+            else {
+                logger.warn("Line could not be parsed: ");
+                logger.warn(line);
+            }
         }
-        else if (line.substring(0,2).equals("88"))
-                logger.warn("Hit a 88");
-        else
-            logger.error("Do not know how to treat da66 line.");
+        else if (HEAD_GEOM.equals(head)) {
+            logger.debug("Hit a 88");
+        }
+        else {
+            logger.error("Do not know how to treat da66 line:");
+            logger.error(line);
+        }
+    }
+
+
+    /** Called when file is fully consumed. */
+    @Override
+    protected void finish() {
+        logger.info("Parsed " + data.size() + " lines");
     }
 
 
@@ -215,7 +386,7 @@
             parser.parseDA66s(new File(arg), null);
             logger.warn("Parsing a file.");
         }
-        logger.error("Stopped Parsing files.");
+        logger.error("Finished Parsing files.");
     }
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :


More information about the Dive4elements-commits mailing list