[PATCH] Fixing Analysis: Significantly (8-10x) improved the performance of the generation of the overview data. The data is now prefetched in batches

Wald Commits scm-commit at wald.intevation.org
Sat Aug 17 20:09:54 CEST 2013


# HG changeset patch
# User Sascha L. Teichmann <teichmann at intevation.de>
# Date 1376762931 -7200
# Node ID b0c88ac9f63bbbcfbadcc65178fe1aa32d92238b
# Parent  b87b20c71407fdb8b8f142b14e5aae5993d69930
Fixing Analysis: Significantly (8-10x) improved the performance of the generation of the overview data. The data is now prefetched in batches.

diff -r b87b20c71407 -r b0c88ac9f63b artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java
--- a/artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java	Sat Aug 17 12:17:42 2013 +0200
+++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java	Sat Aug 17 20:08:51 2013 +0200
@@ -28,8 +28,10 @@
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 
+import org.dive4elements.river.utils.BatchLoader;
 
-/** Generate Fixings Table chart. */
+
+/** Generate Fixings Table overview data structure to be stored in cache. */
 public class FixingsOverview
 implements   Serializable
 {
@@ -56,39 +58,154 @@
         "WHERE" +
         "    river_id = :river_id AND kind = 2";
 
-    /** All columns from given wst. */
-    public static final String SQL_FIXING_COLUMNS =
-        "SELECT" +
-        "    wc.id         AS wst_column_id," +
-        "    ti.start_time AS start_time," +
-        "    wc.name       AS name " +
-        "FROM wst_columns wc" +
-        "    JOIN time_intervals ti ON wc.time_interval_id = ti.id " +
-        "WHERE" +
-        "    wc.wst_id = :wst_id " +
-        "ORDER BY position";
+    public static final String SQL_FIXING_COLUMNS_BATCH =
+        "SELECT " +
+            "wc.wst_id     AS wst_id," +
+            "wc.id         AS wst_column_id," +
+            "ti.start_time AS start_time," +
+            "wc.name       AS name " +
+        "FROM wst_columns wc " +
+            "JOIN time_intervals ti ON wc.time_interval_id = ti.id " +
+        "WHERE " +
+            "wc.wst_id IN ($IDS) " +
+        "ORDER BY wc.wst_id, position";
 
-    public static final String SQL_FIXING_COLUMN_Q_RANGES =
-        "SELECT" +
-        "    wqr.q AS q," +
-        "    r.a   AS start_km," +
-        "    r.b   AS stop_km " +
-        "FROM wst_column_q_ranges wcqr" +
-        "    JOIN wst_q_ranges wqr ON wcqr.wst_q_range_id = wqr.id" +
-        "    JOIN ranges       r   ON wqr.range_id        = r.id " +
-        "WHERE" +
-        "    wcqr.wst_column_id = :column_id " +
-        "ORDER BY r.a";
+    public static final String SQL_FIXING_COLUMN_Q_RANGES_BATCH =
+        "SELECT " +
+            "wcqr.wst_column_id AS wst_column_id," +
+            "wqr.q              AS q," +
+            "r.a                AS start_km," +
+            "r.b                AS stop_km " +
+        "FROM wst_column_q_ranges wcqr " +
+            "JOIN wst_q_ranges wqr ON wcqr.wst_q_range_id = wqr.id " +
+            "JOIN ranges       r   ON wqr.range_id        = r.id " +
+        "WHERE " +
+            "wcqr.wst_column_id IN ($IDS) " +
+        "ORDER BY wcqr.wst_column_id, r.a";
 
-    public static final String SQL_FIXING_COLUMN_KM_RANGE =
-        "SELECT" +
-        "    MIN(position) AS start_km," +
-        "    MAX(position) AS stop_km " +
-        "FROM" +
-        "    wst_column_values " +
-        "WHERE" +
-        "    wst_column_id = :column_id";
+    public static final String SQL_FIXING_COLUMN_KM_RANGE_BATCH =
+        "SELECT " +
+            "wst_column_id," +
+            "MIN(position) AS start_km," +
+            "MAX(position) AS stop_km " +
+        "FROM " +
+            "wst_column_values " +
+        "WHERE " +
+            "wst_column_id IN ($IDS) " +
+        "GROUP BY wst_column_id";
 
+    public static final class KMRangeLoader extends BatchLoader<double []> {
+
+        public KMRangeLoader(List<Integer> columns, Session session) {
+            super(columns, session, SQL_FIXING_COLUMN_KM_RANGE_BATCH);
+        }
+
+        @Override
+        protected void fill(SQLQuery query) {
+            query
+                .addScalar("wst_column_id", StandardBasicTypes.INTEGER)
+                .addScalar("start_km",      StandardBasicTypes.DOUBLE)
+                .addScalar("stop_km",       StandardBasicTypes.DOUBLE);
+
+            List<Object []> ranges = query.list();
+            for (Object [] r: ranges) {
+                Integer cid = (Integer)r[0];
+                double [] vs = new double [] { (Double)r[1], (Double)r[2] };
+                cache(cid, vs);
+            }
+        }
+    } // class KMRangeLoader
+
+    public static final class ColumnQRangeLoader extends BatchLoader<List<double []>> {
+
+        public ColumnQRangeLoader(List<Integer> columns, Session session) {
+            super(columns, session, SQL_FIXING_COLUMN_Q_RANGES_BATCH);
+        }
+
+        @Override
+        protected void fill(SQLQuery query) {
+            query
+                .addScalar("wst_column_id", StandardBasicTypes.INTEGER)
+                .addScalar("q",             StandardBasicTypes.DOUBLE)
+                .addScalar("start_km",      StandardBasicTypes.DOUBLE)
+                .addScalar("stop_km",       StandardBasicTypes.DOUBLE);
+
+            int lastId = Integer.MIN_VALUE;
+            List<double []> column = new ArrayList<double []>();
+
+            List<Object []> ranges = query.list();
+            for (Object [] r: ranges) {
+                int cid = (Integer)r[0];
+
+                if (cid != lastId && !column.isEmpty()) {
+                    cache(lastId, column);
+                    column = new ArrayList<double []>();
+                }
+                column.add(new double [] {
+                    (Double)r[1],
+                    (Double)r[2],
+                    (Double)r[3]
+                });
+
+                lastId = cid;
+            }
+
+            if (!column.isEmpty()) {
+                cache(lastId, column);
+            }
+        }
+    } // class ColumnQRangeLoader
+
+    /** Helper class to store data from batching fixing columns. */
+    private static final class FixColumn {
+        int    columnId;
+        Date   startTime;
+        String name;
+
+        FixColumn(int columnId, Date startTime, String name) {
+            this.columnId  = columnId;
+            this.startTime = startTime;
+            this.name      = name;
+        }
+    } // class FixColumn
+
+    public static final class FixColumnLoader extends BatchLoader<List<FixColumn>> {
+
+        public FixColumnLoader(List<Integer> columns, Session session) {
+            super(columns, session, SQL_FIXING_COLUMNS_BATCH);
+        }
+
+        @Override
+        protected void fill(SQLQuery query) {
+            query
+                .addScalar("wst_id",        StandardBasicTypes.INTEGER)
+                .addScalar("wst_column_id", StandardBasicTypes.INTEGER)
+                .addScalar("start_time",    StandardBasicTypes.DATE)
+                .addScalar("name",          StandardBasicTypes.STRING);
+
+            int lastId = Integer.MIN_VALUE;
+            List<FixColumn> cols = new ArrayList<FixColumn>();
+
+            List<Object []> columns = query.list();
+            for (Object [] c: columns) {
+                int wid = (Integer)c[0];
+
+                if (wid != lastId && !cols.isEmpty()) {
+                    cache(lastId, cols);
+                    cols = new ArrayList<FixColumn>();
+                }
+                cols.add(new FixColumn(
+                    (Integer)c[1],
+                    (Date)   c[2],
+                    (String) c[3]));
+
+                lastId = wid;
+            }
+            if (!cols.isEmpty()) {
+                cache(lastId, cols);
+            }
+        }
+    } // class FixColumnLoader
 
     public static class QRange extends Range {
 
@@ -255,34 +372,35 @@
                 } // for all Q ranges
             }
 
-            public void loadKmRange(SQLQuery query) {
-                query.setInteger("column_id", columnId);
+            public void loadKmRange(KMRangeLoader loader) {
 
-                List<Object []> kms = query.list();
+                double [] range = loader.get(columnId);
 
-                if (kms.isEmpty()) {
+                if (range == null) {
                     log.warn("No km range for column " + columnId + ".");
+                    return;
                 }
-                else {
-                    Object [] obj = kms.get(0);
-                    start = (Double)obj[0];
-                    end   = (Double)obj[1];
-                }
+                start = range[0];
+                end   = range[1];
             }
 
             public void loadQRanges(
-                SQLQuery    query,
-                GaugeFinder gaugeFinder
+                ColumnQRangeLoader loader,
+                GaugeFinder        gaugeFinder
             ) {
-                query.setInteger("column_id", columnId);
-                List<Object []> list = query.list();
+                List<double []> qrs = loader.get(columnId);
+                if (qrs == null) {
+                    log.warn("No q ranges found for column " + columnId);
+                    return;
+                }
 
-                List<QRange> qRanges = new ArrayList<QRange>(list.size());
+                List<QRange> qRanges = new ArrayList<QRange>(qrs.size());
 
-                for (Object [] row: list) {
-                    double q     = (Double)row[0];
-                    double start = (Double)row[1];
-                    double end   = (Double)row[2];
+                for (double [] qr: qrs) {
+                    double q     = qr[0];
+                    double start = qr[1];
+                    double end   = qr[2];
+
                     QRange qRange = new QRange(start, end, q);
                     if (qRange.clip(this)) {
                         qRanges.add(qRange);
@@ -314,20 +432,26 @@
             columns = new ArrayList<Column>();
         }
 
-        public void loadColumns(SQLQuery query) {
-            query.setInteger("wst_id", wstId);
-            List<Object []> list = query.list();
-            for (Object [] row: list) {
-                int    columnId  = (Integer)row[0];
-                Date   startTime = (Date)   row[1];
-                String name      = (String) row[2];
-                columns.add(new Column(columnId, startTime, name));
+        public void allColumnIds(List<Integer> cIds) {
+            for (Column column: columns) {
+                cIds.add(column.columnId);
             }
         }
 
-        public void loadColumnsKmRange(SQLQuery query) {
+        public void loadColumns(FixColumnLoader loader) {
+            List<FixColumn> fcs = loader.get(wstId);
+            if (fcs == null) {
+                log.warn("No columns for wst " + wstId);
+                return;
+            }
+            for (FixColumn fc: fcs) {
+                columns.add(new Column(fc.columnId, fc.startTime, fc.name));
+            }
+        }
+
+        public void loadColumnsKmRange(KMRangeLoader loader) {
             for (Column column: columns) {
-                column.loadKmRange(query);
+                column.loadKmRange(loader);
             }
         }
 
@@ -338,11 +462,11 @@
         }
 
         public void loadColumnsQRanges(
-            SQLQuery    query,
-            GaugeFinder gaugeFinder
+            ColumnQRangeLoader loader,
+            GaugeFinder        gaugeFinder
         ) {
             for (Column column: columns) {
-                column.loadQRanges(query, gaugeFinder);
+                column.loadQRanges(loader, gaugeFinder);
             }
         }
 
@@ -427,23 +551,40 @@
     }
 
     protected void loadFixingsColumns(Session session) {
-        SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMNS)
-            .addScalar("wst_column_id", StandardBasicTypes.INTEGER)
-            .addScalar("start_time",    StandardBasicTypes.DATE)
-            .addScalar("name",          StandardBasicTypes.STRING);
+
+        FixColumnLoader loader = new FixColumnLoader(
+            allFixingIds(),
+            session);
 
         for (Fixing fixing: fixings) {
-            fixing.loadColumns(query);
+            fixing.loadColumns(loader);
         }
     }
 
+    protected List<Integer> allFixingIds() {
+        List<Integer> ids = new ArrayList<Integer>(fixings.size());
+        for (Fixing fixing: fixings) {
+            ids.add(fixing.getId());
+        }
+        return ids;
+    }
+
+    protected List<Integer> allColumnIds() {
+        List<Integer> cIds = new ArrayList<Integer>();
+        for (Fixing fixing: fixings) {
+            fixing.allColumnIds(cIds);
+        }
+        return cIds;
+    }
+
     protected void loadFixingsColumnsKmRange(Session session) {
-        SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMN_KM_RANGE)
-            .addScalar("start_km", StandardBasicTypes.DOUBLE)
-            .addScalar("stop_km",  StandardBasicTypes.DOUBLE);
+
+        KMRangeLoader loader = new KMRangeLoader(
+            allColumnIds(),
+            session);
 
         for (Fixing fixing: fixings) {
-            fixing.loadColumnsKmRange(query);
+            fixing.loadColumnsKmRange(loader);
         }
     }
 
@@ -451,13 +592,13 @@
         Session     session,
         GaugeFinder gaugeFinder
     ) {
-        SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMN_Q_RANGES)
-            .addScalar("q",        StandardBasicTypes.DOUBLE)
-            .addScalar("start_km", StandardBasicTypes.DOUBLE)
-            .addScalar("stop_km",  StandardBasicTypes.DOUBLE);
+
+        ColumnQRangeLoader loader = new ColumnQRangeLoader(
+            allColumnIds(),
+            session);
 
         for (Fixing fixing: fixings) {
-            fixing.loadColumnsQRanges(query, gaugeFinder);
+            fixing.loadColumnsQRanges(loader, gaugeFinder);
         }
     }
 
diff -r b87b20c71407 -r b0c88ac9f63b artifacts/src/main/java/org/dive4elements/river/utils/BatchLoader.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/artifacts/src/main/java/org/dive4elements/river/utils/BatchLoader.java	Sat Aug 17 20:08:51 2013 +0200
@@ -0,0 +1,116 @@
+/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
+ * Software engineering by Intevation GmbH
+ *
+ * This file is Free Software under the GNU AGPL (>=v3)
+ * and comes with ABSOLUTELY NO WARRANTY! Check out the
+ * documentation coming with Dive4Elements River for details.
+ */
+
+package org.dive4elements.river.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+import org.hibernate.SQLQuery;
+import org.hibernate.Session;
+
+/** To reduce the number of SQL queries send to the backend
+ *  (mainly by the fixings overviews) we execute them in batches of ids
+ *  and store the results in a small cache.
+ *  TODO: It currently relies on dynamic SQL.
+ *  Is there a way to use Hibernate with java.sql.Array
+ *  in cross database compatible manner?
+ */
+public abstract class BatchLoader<T> {
+
+    private static Logger log = Logger.getLogger(BatchLoader.class);
+
+    public static final int BATCH_SIZE = 100;
+
+    private Map<Integer, T> loaded;
+    private List<Integer>   rest;
+    private Session         session;
+    private String          sqlTemplate;
+
+    public BatchLoader(
+        List<Integer> columns,
+        Session       session,
+        String        sqlTemplate
+    ) {
+        rest             = new ArrayList<Integer>(columns.size());
+        loaded           = new HashMap<Integer, T>();
+        this.session     = session;
+        this.sqlTemplate = sqlTemplate;
+
+        // Insert in reverse order to minize searching.
+        for (int i = columns.size()-1; i >= 0; --i) {
+            rest.add(columns.get(i));
+        }
+    }
+
+    /** Searches for id and fill a batch to load containing the found id. */
+    private List<Integer> prepareBatch(int id) {
+        List<Integer> batch = new ArrayList<Integer>(BATCH_SIZE);
+
+        boolean found = false;
+
+        for (int i = rest.size()-1; batch.size() < BATCH_SIZE && i >= 0; --i) {
+            Integer cid = rest.get(i);
+            if (cid == id) {
+                found = true;
+                batch.add(cid);
+                rest.remove(i);
+            }
+            else if ((found && batch.size() < BATCH_SIZE)
+                 || (!found && batch.size() < BATCH_SIZE-1)) {
+                batch.add(cid);
+                rest.remove(i);
+            }
+        }
+
+        return batch;
+    }
+
+    /** Converts id to a list of comma separated ints. */
+    private static String idsAsString(List<Integer> ids) {
+        StringBuilder sb = new StringBuilder();
+        for (Iterator<Integer> i = ids.iterator(); i.hasNext();) {
+            sb.append(i.next());
+            if (i.hasNext()) {
+                sb.append(',');
+            }
+        }
+        return sb.toString();
+    }
+
+    /** Get data for id. */
+    public T get(int id) {
+        T already = loaded.get(id);
+        if (already != null) {
+            return already;
+        }
+
+        List<Integer> batch = prepareBatch(id);
+        if (batch.isEmpty()) {
+            return null;
+        }
+        String sql = sqlTemplate.replace("$IDS", idsAsString(batch));
+        if (log.isDebugEnabled()) {
+            log.debug(sql + " " + sql.length());
+        }
+        fill(session.createSQLQuery(sql));
+        return get(id);
+    }
+
+    /** Call this from fill() to store data in the cache. */
+    protected void cache(int key, T data) {
+        loaded.put(key, data);
+    }
+
+    /** Override this to fill the cache */
+    protected abstract void fill(SQLQuery query);
+}


More information about the Dive4elements-commits mailing list