[Inteproxy-commits] r144 - in trunk: . inteproxy test

scm-commit@wald.intevation.org scm-commit at wald.intevation.org
Thu Jun 12 20:36:41 CEST 2008


Author: bh
Date: 2008-06-12 20:36:39 +0200 (Thu, 12 Jun 2008)
New Revision: 144

Modified:
   trunk/ChangeLog
   trunk/inteproxy/proxycore.py
   trunk/inteproxy/transcoder.py
   trunk/test/test_inteproxy.py
Log:
* inteproxy/proxycore.py (InteProxyHTTPRequestHandler.rewrite_urls):
New method to rewrite URLs in response bodies
(InteProxyHTTPRequestHandler.handle_proxy_request): Call
rewrite_urls on the response.
(MasterWorkerServer.__init__): New parameter and attribute:
rewrite_urls.  If true, the request handler should rewrite the
URLs

* inteproxy/transcoder.py (TranscoderMap.rewrite_urls): Actual URL
rewriting.  It's a method of the transcoder map because that's
where the information about the remote hosts is stored.

* test/test_inteproxy.py (ServerTest.create_transcoder_map): New
method to create a transcoder map
(ServerTest.setUp): Extend to create transcoder maps.
(TestInteProxyURLRewriting): New.  Some URL rewriting tests


Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog	2008-06-12 17:17:43 UTC (rev 143)
+++ trunk/ChangeLog	2008-06-12 18:36:39 UTC (rev 144)
@@ -1,5 +1,24 @@
 2008-06-12  Bernhard Herzog  <bh at intevation.de>
 
+	* inteproxy/proxycore.py (InteProxyHTTPRequestHandler.rewrite_urls):
+	New method to rewrite URLs in response bodies
+	(InteProxyHTTPRequestHandler.handle_proxy_request): Call
+	rewrite_urls on the response.
+	(MasterWorkerServer.__init__): New parameter and attribute:
+	rewrite_urls.  If true, the request handler should rewrite the
+	URLs
+
+	* inteproxy/transcoder.py (TranscoderMap.rewrite_urls): Actual URL
+	rewriting.  It's a method of the transcoder map because that's
+	where the information about the remote hosts is stored.
+
+	* test/test_inteproxy.py (ServerTest.create_transcoder_map): New
+	method to create a transcoder map
+	(ServerTest.setUp): Extend to create transcoder maps.
+	(TestInteProxyURLRewriting): New.  Some URL rewriting tests
+
+2008-06-12  Bernhard Herzog  <bh at intevation.de>
+
 	* inteproxy/httpmessage.py: New module with abstractions of http
 	messages.
 

Modified: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py	2008-06-12 17:17:43 UTC (rev 143)
+++ trunk/inteproxy/proxycore.py	2008-06-12 18:36:39 UTC (rev 144)
@@ -16,7 +16,6 @@
 import BaseHTTPServer
 import socket
 
-import inteproxy.transcoder
 from inteproxy.threadpool import ThreadPool
 from inteproxy.feesdialog import handle_fees_and_access_constraints
 from inteproxy.httpserver import HTTPServer
@@ -116,6 +115,7 @@
             response.debug_log_message(self.log_debug)
             # check for fees and access constraints and run a dialog
             handle_fees_and_access_constraints(remote_url, response)
+            self.rewrite_urls(response)
             self.handle_response(response)
 
         self.log_debug("request finished")
@@ -276,6 +276,21 @@
         if chunked:
             write("0\r\n\r\n")
 
+    def rewrite_urls(self, response):
+        """Rewrites URLs in the response if enabled in the server
+
+        This method rewrites URLs in the response if the request is a
+        old-style InteProxy request -- one where InteProxy is not used
+        as a normal HTTP proxy -- and rewriting has been activated in
+        the server.  The actual rewriting is done by the server's
+        transcoder_map.
+        """
+        if not urlparse.urlsplit(self.path)[0] and self.server.rewrite_urls:
+            transcoder_map = self.server.transcoder_map
+            prefix = self.server.get_inteproxy_url()
+            response.body = transcoder_map.rewrite_urls(response.body, prefix,
+                                                        self.log_debug)
+
     def log_exception(self, exc_info):
         """Log an exception
 
@@ -301,11 +316,13 @@
     do_shutdown = False
 
     def __init__(self, server_address, RequestHandlerClass, num_workers,
-                 http_proxy_url, https_proxy_url, transcoder_map):
+                 http_proxy_url, https_proxy_url, transcoder_map,
+                 rewrite_urls=False):
         HTTPServer.__init__(self, server_address, RequestHandlerClass)
         self.http_proxy_url = http_proxy_url
         self.https_proxy_url = https_proxy_url
         self.transcoder_map = transcoder_map
+        self.rewrite_urls = rewrite_urls
         self.thread_pool = ThreadPool(num_workers, lambda f: f())
         sys.stderr.write("[%s] starting %d worker threads\n" \
                          % (log_date_time_string(), num_workers))

Modified: trunk/inteproxy/transcoder.py
===================================================================
--- trunk/inteproxy/transcoder.py	2008-06-12 17:17:43 UTC (rev 143)
+++ trunk/inteproxy/transcoder.py	2008-06-12 18:36:39 UTC (rev 144)
@@ -7,6 +7,7 @@
 
 """Classes to modify HTTP requests."""
 
+import re
 import base64
 import urlparse
 from urllib import quote_plus
@@ -246,7 +247,50 @@
             cls = parser.get(section, "class")
             self.add_host(host, path, cls)
 
+    def rewrite_urls(self, data, prefix, log_debug):
+        """Prefix all known URLs in data with prefix.
 
+        This method looks for occurrences of all urls that this
+        transcode map is responsible for in data replaces them with
+        equivalent urls that refer to this inteproxy instance explicitly
+        by prefixing them with the given prefix.  The prefix should be
+        the URL of the InteProxy instance.  E.g. if the proxy is
+        responsible for connections to http://example.com/wms, and this
+        URL occurs in the body, it will be replaced by
+        http://localhost:64609/example.com/wms (this example assumes the
+        default port for inteproxy).
+
+        The log_debug parameter should be a callable for debug log
+        messages, usually the log_debug method of the
+        InteProxyHTTPRequestHandler.
+        """
+        pairs = dict()
+        for (host, path), cls in self.hostmap.items():
+            if not host:
+                log_debug("rewrite_urls: ignoring host %r", host)
+                continue
+            remote_url = "http://%s%s" % (host, path)
+            local_url = "%s%s%s" % (prefix, host, path)
+            pairs[remote_url] = local_url
+        regex = re.compile("(" + "|".join([re.escape(url)
+                                           for url in pairs.keys()])
+                           + ")")
+        rewritten = StringIO()
+        while data:
+            match = regex.search(data)
+            if match:
+                log_debug("rewriting %r to %r at %d",
+                          match.group(0), pairs[match.group(0)], match.start(0))
+                rewritten.write(data[:match.start()])
+                rewritten.write(pairs[match.group(0)])
+                data = data[match.end():]
+            else:
+                rewritten.write(data)
+                data = ""
+
+        return rewritten.getvalue()
+
+
 def create_transcoder_map():
     return TranscoderMap([
         ("identity", IdentityTranscoder, IdentityTranscoder),

Modified: trunk/test/test_inteproxy.py
===================================================================
--- trunk/test/test_inteproxy.py	2008-06-12 17:17:43 UTC (rev 143)
+++ trunk/test/test_inteproxy.py	2008-06-12 18:36:39 UTC (rev 144)
@@ -29,6 +29,12 @@
     # serversupport.HTTPServer.  Override in derived classes.
     remote_contents = []
 
+    # transcoders to use
+    transcoder_definitions = []
+
+    # whether the server should rewrite urls
+    rewrite_urls = False
+
     def setUp(self):
         self.old_stderr = sys.stderr
         sys.stderr = open(os.path.devnull, "w")
@@ -45,7 +51,8 @@
         proxyserver = MasterWorkerServer(("127.0.0.1", 0),
                                          InteProxyHTTPRequestHandler, 5,
                                          self.http_proxy_url, None,
-                                         transcoder_map=create_transcoder_map())
+                                   transcoder_map=self.create_transcoder_map(),
+                                         rewrite_urls=self.rewrite_urls)
         self.server = httpserver.ServerThread(proxyserver)
         self.server.start(daemon=True)
         remote_server = serversupport.HTTPServer(self.remote_contents)
@@ -54,6 +61,12 @@
         self.remote_server_base_url = "http://localhost:%d/" \
                                       % (self.remote_server.server_port,)
 
+    def create_transcoder_map(self):
+        transcoder_map = transcoder_map=create_transcoder_map()
+        for host, path, cls in self.transcoder_definitions:
+            transcoder_map.add_host(host, path, cls)
+        return transcoder_map
+
     def create_http_proxy(self):
         """Override in derived classes to create a proxy upstream from inteproxy
         The method should return the proxy server object object.
@@ -97,6 +110,59 @@
         self.assertEquals(data, "some text")
 
 
+class TestInteProxyURLRewriting(ServerTest):
+
+    remote_contents = [
+        ("/rewrite-01", [("Content-Type", "text/plain")],
+         "An url that may be rewritten: http://example.com/a/wms"),
+        ("/rewrite-02", [("Content-Type", "text/plain")],
+         "An URL that may be rewritten: http://example.com/a/wms\n"
+         "and one that may not: http://example.com/foo\n"
+         "and one that may: http://example.com/another/wms\n"),
+        ]
+
+
+    transcoder_definitions = [
+        ("example.com", "/a/wms", "owsproxy"),
+        ("example.com", "/another/wms", "owsproxy"),
+        ]
+
+    rewrite_urls = True
+
+    def test_httpproxy_url_rewriting_single_url(self):
+        http = httplib.HTTPConnection("localhost", self.server.server_port)
+        http.request("GET",
+                     "http://localhost:%d/localhost:%d/rewrite-01"
+                     % (self.server.server_port,
+                        self.remote_server.server_port))
+        response = http.getresponse()
+        self.assertEquals(response.status, 200)
+        data = response.read()
+        self.assertEquals(data,
+                          "An url that may be rewritten:"
+                          " http://localhost:%d/example.com/a/wms"
+                          % (self.server.server_port,))
+
+    def test_httpproxy_url_rewriting_multiple_urls(self):
+        http = httplib.HTTPConnection("localhost", self.server.server_port)
+        http.request("GET",
+                     "http://localhost:%d/localhost:%d/rewrite-02"
+                     % (self.server.server_port,
+                        self.remote_server.server_port))
+        response = http.getresponse()
+        self.assertEquals(response.status, 200)
+        data = response.read()
+        self.assertEquals(data,
+                          "An URL that may be rewritten:"
+                          " http://localhost:%d/example.com/a/wms\n"
+                          "and one that may not: http://example.com/foo\n"
+                          "and one that may:"
+                          " http://localhost:%d/example.com/another/wms\n"
+                          % (self.server.server_port,
+                             self.server.server_port))
+
+
+
 class TestInteProxyWithExtraProxy(ServerTest):
 
     remote_contents = [



More information about the Inteproxy-commits mailing list