[Inteproxy-commits] r188 - in trunk: . inteproxy

scm-commit@wald.intevation.org scm-commit at wald.intevation.org
Wed Mar 11 21:12:35 CET 2009


Author: bh
Date: 2009-03-11 21:12:33 +0100 (Wed, 11 Mar 2009)
New Revision: 188

Modified:
   trunk/ChangeLog
   trunk/inteproxy/proxyconnection.py
   trunk/inteproxy/proxycore.py
Log:
Make connections via https-proxy more correct.

* inteproxy/proxycore.py
(InteProxyHTTPRequestHandler.open_http_connection): Pass the Host
header explicitly to make sure it always refers to the real remote
host (connections using the HTTP CONNECT method omitted it under
some circumstances but Host is required in HTTP/1.1 requests).

* inteproxy/proxyconnection.py (parse_netloc): New method to parse
the netloc into host and port
(HTTPSProxyConnection.putrequest): Omit scheme and host from the
url used in the actual request.  Check parameters more thoroughly
and only accept callers that want to specify their own Host
header.
(HTTPSProxyConnection.connect): Adapt to some changes in
putrequest


Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog	2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/ChangeLog	2009-03-11 20:12:33 UTC (rev 188)
@@ -1,5 +1,24 @@
 2009-03-11  Bernhard Herzog  <bh at intevation.de>
 
+	Make connections via https-proxy more correct.
+
+	* inteproxy/proxycore.py
+	(InteProxyHTTPRequestHandler.open_http_connection): Pass the Host
+	header explicitly to make sure it always refers to the real remote
+	host (connections using the HTTP CONNECT method omitted it under
+	some circumstances but Host is required in HTTP/1.1 requests).
+
+	* inteproxy/proxyconnection.py (parse_netloc): New method to parse
+	the netloc into host and port
+	(HTTPSProxyConnection.putrequest): Omit scheme and host from the
+	url used in the actual request.  Check parameters more thoroughly
+	and only accept callers that want to specify their own Host
+	header.
+	(HTTPSProxyConnection.connect): Adapt to some changes in
+	putrequest
+
+2009-03-11  Bernhard Herzog  <bh at intevation.de>
+
 	* test/test_inteproxy.py: Doc-string and minor formatting fixes
 
 2009-03-11  Bernhard Herzog  <bh at intevation.de>

Modified: trunk/inteproxy/proxyconnection.py
===================================================================
--- trunk/inteproxy/proxyconnection.py	2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/inteproxy/proxyconnection.py	2009-03-11 20:12:33 UTC (rev 188)
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 by Intevation GmbH
+# Copyright (C) 2008, 2009 by Intevation GmbH
 #
 # Based on code from
 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/456195 which
@@ -18,32 +18,60 @@
 import socket
 
 
+default_ports = {'http' : 80, 'https' : 443}
+
+
+def parse_netloc(scheme, netloc):
+    """Parses netloc and returns (hostname, port).
+
+    If the netloc string doesn't contain a port, the default port of
+    scheme (either 'http' or 'https') is used.  If the port cannot be
+    determined, a ValueError is raise.
+    """
+    host, port = urllib.splitport(netloc)
+    if port is None:
+        port = default_ports.get(scheme)
+        if port is None:
+            raise ValueError("Cannot determine port for URL scheme %r"
+                             % scheme)
+    else:
+        port = int(port)
+    return host, port
+
+
 class HTTPSProxyConnection(httplib.HTTPConnection):
 
-    _ports = {'http' : 80, 'https' : 443}
 
     debuglevel = 2
 
     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
-        #print "HTTPSProxyConnection.putrequest", method, url
+        if not skip_host:
+            raise ValueError("HTTPSProxyConnection.putrequest must be called"
+                             " with skip_host=True, because the correct Host"
+                             " request header field will not be generated"
+                             " automatically")
+
         scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
-        host, port = urllib.splitport(netloc)
-        if port is None:
-            port = self._ports.get(scheme)
-            if port is None:
-                raise ValueError("Cannot determine port for URL scheme %r"
-                                 % scheme)
-        self._real_host = host
-        self._real_port = port
+        if not netloc or not scheme:
+            raise ValueError("HTTPSProxyConnection.putrequest must be called"
+                             " with a full url including a scheme and a"
+                             " hostname.")
+
+        self._real_address = parse_netloc(scheme, netloc)
+
+        # Make sure the actual request url only contains the abspath
+        # part of the URL, and not the scheme and hostname.  HTTP/1.1
+        # servers are supposed to accept such URLs in requests, but
+        # clients should not send them.
+        url = urlparse.urlunsplit(("", "", path, query, fragment))
+
         httplib.HTTPConnection.putrequest(self, method, url,
                                           skip_host=skip_host,
                                      skip_accept_encoding=skip_accept_encoding)
 
     def connect(self):
         httplib.HTTPConnection.connect(self)
-        #print "HTTPSProxyConnection.connect", self._real_host, self._real_port
-        self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host,
-                                                      self._real_port))
+        self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % self._real_address)
         #expect a HTTP/1.0 200 Connection established
         response = self.response_class(self.sock, strict=self.strict,
                                        method=self._method)

Modified: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py	2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/inteproxy/proxycore.py	2009-03-11 20:12:33 UTC (rev 188)
@@ -1,5 +1,5 @@
 #! /usr/bin/python
-# Copyright (C) 2006, 2007, 2008 by Intevation GmbH
+# Copyright (C) 2006, 2007, 2008, 2009 by Intevation GmbH
 # Authors:
 # Bernhard Herzog <bh at intevation.de>
 #
@@ -21,7 +21,7 @@
 from inteproxy.feesdialog import handle_fees_and_access_constraints
 from inteproxy.httpserver import HTTPServer
 from inteproxy.httpmessage import HTTPRequestMessage, HTTPResponseMessage
-from inteproxy.proxyconnection import HTTPSProxyConnection
+from inteproxy.proxyconnection import HTTPSProxyConnection, parse_netloc
 
 
 # same as the BaseHTTPRequestHandler method, but as a standalone function:
@@ -114,12 +114,14 @@
         containing the remote server's or proxy's response.
         """
         scheme, netloc, path, query, fragment = urlparse.urlsplit(remote_url)
+        remote_address = connect_address = parse_netloc(scheme, netloc)
+
         # the URI used in the request.  Usually, it's like remote_url
         # but with schem and netloc removed.
         request_uri = urlparse.urlunsplit(("", "", path, query, fragment))
 
         proxy = None
-        extra_headers = []
+        extra_headers = [("Host", "%s:%d" % remote_address)]
 
         if scheme == "http":
             connection_class = httplib.HTTPConnection
@@ -131,7 +133,7 @@
                 connection_class = HTTPSProxyConnection
 
         if proxy is not None:
-            netloc = "%s:%d" % (proxy.host, proxy.port)
+            connect_address = (proxy.host, proxy.port)
             request_uri = remote_url
             if proxy.username and proxy.password:
                 userpass = base64.b64encode("%s:%s" % (proxy.username,
@@ -139,20 +141,19 @@
                 extra_headers.append(("Proxy-Authorization",
                                       "Basic %s" % userpass))
 
-        connection = connection_class(netloc)
+        connection = connection_class(*connect_address)
         if self.request_version == "HTTP/1.0":
             connection._http_vsn = 10
 
         connection.putrequest(client_request.method, request_uri,
-                              skip_accept_encoding=True)
+                              skip_accept_encoding=True, skip_host=True)
 
         for header, value in client_request.headers.items():
             if header.lower() == "host":
-                # the host header will be set by httplib and it should
-                # not be set twice, so we omit it.  The value we
-                # received from the client may have been wrong anyway as
-                # in the old inteproxy scheme it refers to the host this
-                # proxy is running on.
+                # the host header is already in extra_headers.  The
+                # value we received from the client may have been wrong
+                # anyway as in the old inteproxy scheme it refers to the
+                # host this proxy is running on.
                 continue
             else:
                 connection.putheader(header, value)



More information about the Inteproxy-commits mailing list