[PATCH 09 of 12] Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date,

Wald Commits scm-commit at wald.intevation.org
Mon Mar 23 16:38:43 CET 2020


# HG changeset patch
# User mschaefer
# Date 1584974020 -3600
#      Mon Mar 23 15:33:40 2020 +0100
# Node ID d86c7cb68b41c2d694d894cd23d346ebf8d6eaa6
# Parent  a79881a892c91333ba32ae89407d77f2e5847013
Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date,
detecting wrong column titles and cancelling the import,
specific error message if gauge not found

diff -r a79881a892c9 -r d86c7cb68b41 backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java	Mon Mar 23 15:33:40 2020 +0100
@@ -46,7 +46,7 @@
 
     @Override
     protected DailyDischargeValue queryValueItem(final Session session, final DailyDischarge parent) {
-        final Query query = session.createQuery("FROM DailyDischargeValue WHERE (DailyDischarge=:parent) AND (day=:day)");
+        final Query query = session.createQuery("FROM DailyDischargeValue WHERE (dailyDischarge=:parent) AND (day=:day)");
         query.setParameter("parent", parent);
         query.setParameter("day", this.day);
         final List rows = query.list();
diff -r a79881a892c9 -r d86c7cb68b41 backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java	Mon Mar 23 15:33:40 2020 +0100
@@ -79,7 +79,7 @@
     }
 
     @Override
-    public List<DailyDischarge> querySeriesItem(final Session session, final River river) {
+    public List<DailyDischarge> querySeriesItem(final Session session, final River river, final boolean doQueryParent) {
         final Query query = session.createQuery("FROM DailyDischarge WHERE gauge=:gauge");
         query.setParameter("gauge", this.gauge);
         return query.list();
diff -r a79881a892c9 -r d86c7cb68b41 backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java	Mon Mar 23 15:33:40 2020 +0100
@@ -11,7 +11,6 @@
 package org.dive4elements.river.importer.sinfo.parsers;
 
 import java.io.File;
-import java.io.FilenameFilter;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
@@ -42,21 +41,31 @@
 
     private static final Logger log = Logger.getLogger(DailyDischargeParser.class);
 
-    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss.csv", Pattern.CASE_INSENSITIVE);
+    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE);
 
     private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);
 
-    private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);
+    // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*",
+    // Pattern.CASE_INSENSITIVE);
 
-    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Datum\\s*;\\s*Q.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE);
 
     private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");
 
+    private final List<Date> dates;
+
 
     /***** CONSTRUCTORS *****/
 
     public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
         super(importPath, rootRelativePath, river);
+        this.dates = new ArrayList<>();
     }
 
 
@@ -80,13 +89,7 @@
     public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
         final List<DailyDischargeParser> parsers = new ArrayList<>();
         if (importDir.exists()) {
-            final File[] files = importDir.listFiles(new FilenameFilter() {
-                @Override
-                public boolean accept(final File dir, final String name) {
-                    return IMPORT_FILENAME.matcher(name).matches();
-                }
-            });
-            for (final File file : files)
+            for (final File file : listFiles(importDir, IMPORT_FILENAME))
                 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river));
         }
         return parsers;
@@ -109,21 +112,37 @@
             day = dateFormat.parse(values[0]);
         }
         catch (final Exception e) {
-            logError("Invalid date in line " + this.in.getLineNumber());
+            logLineWarning("Invalid date");
             return null;
         }
-        if (parseDoubleWithNull(values[1]) == null) {
-            logError("Invalid discharge value in line " + this.in.getLineNumber());
+        final Number q = parseDoubleCheckNull(values, 1);
+        if ((q == null) || Double.isNaN(q.doubleValue())) {
+            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge");
             return null;
         }
-        return new DailyDischargeDayLineImport(day, Double.valueOf(parseDoubleWithNull(values[1]).doubleValue()));
+        if (this.dates.contains(day)) {
+            logLineWarning("Duplicate date");
+            return null;
+        }
+        this.dates.add(day);
+        return new DailyDischargeDayLineImport(day, q.doubleValue());
     }
 
     @Override
     protected boolean handleMetaOther() {
         if (handleMetaGaugeName())
             return true;
-        else if (handleMetaGaugeNumber())
+        // else if (handleMetaGaugeNumber())
+        // return true;
+        else if (META_BETREIBER.matcher(this.currentLine).matches())
+            return true;
+        else if (META_PARAMETER.matcher(this.currentLine).matches())
+            return true;
+        else if (META_ZEITREIHE.matcher(this.currentLine).matches())
+            return true;
+        else if (META_REIHEBEGINN.matcher(this.currentLine).matches())
+            return true;
+        else if (META_REIHEENDE.matcher(this.currentLine).matches())
             return true;
         else
             return false;
@@ -139,30 +158,50 @@
         return false;
     }
 
-    private boolean handleMetaGaugeNumber() {
-        final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
-        if (m.matches()) {
-            this.metaPatternsMatched.add(META_GAUGENUMBER);
-            this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
-            return true;
-        }
-        return false;
-    }
+    // private boolean handleMetaGaugeNumber() {
+    // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
+    // if (m.matches()) {
+    // this.metaPatternsMatched.add(META_GAUGENUMBER);
+    // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
+    // return true;
+    // }
+    // return false;
+    // }
 
     @Override
     protected boolean handleMetaColumnTitles() {
-        if (!META_COLUMNTITLES.matcher(this.currentLine).matches())
+        final Matcher m = META_COLUMNTITLES.matcher(this.currentLine);
+        if (!m.matches()) {
             return false;
+        }
         this.metaPatternsMatched.add(META_COLUMNTITLES);
         this.columnTitles.clear();
-        final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0);
-        for (int i = 0; i <= titles.length - 1; i++)
-            this.columnTitles.add(titles[i].trim());
+        this.columnTitles.add(m.group(1));
+        this.columnTitles.add(m.group(2));
+        return true;
+    }
+
+    /**
+     * Check meta data after all meta lines (#) have been read
+     */
+    @Override
+    protected boolean checkMetaData() {
+        if (!super.checkRiverExists())
+            return false;
         this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName()));
         if (this.seriesHeader.getGauge() == null) {
-            logError("Gauge not found, file skipped");
+            logError("Gauge not found (%s)", this.seriesHeader.getGaugeName());
             this.headerParsingState = ParsingState.STOP;
+            return false;
+        }
+        if (super.checkMetaData() == false)
+            return false;
+        if (this.columnTitles.size() <= 1) {
+            logError("No valid column title line (Datum, Q) found");
+            this.headerParsingState = ParsingState.STOP;
+            return false;
         }
         return true;
     }
+
 }


More information about the Dive4Elements-commits mailing list