[Inteproxy-commits] r188 - in trunk: . inteproxy
scm-commit@wald.intevation.org
scm-commit at wald.intevation.org
Wed Mar 11 21:12:35 CET 2009
Author: bh
Date: 2009-03-11 21:12:33 +0100 (Wed, 11 Mar 2009)
New Revision: 188
Modified:
trunk/ChangeLog
trunk/inteproxy/proxyconnection.py
trunk/inteproxy/proxycore.py
Log:
Make connections via https-proxy more correct.
* inteproxy/proxycore.py
(InteProxyHTTPRequestHandler.open_http_connection): Pass the Host
header explicitly to make sure it always refers to the real remote
host (connections using the HTTP CONNECT method omitted it under
some circumstances but Host is required in HTTP/1.1 requests).
* inteproxy/proxyconnection.py (parse_netloc): New method to parse
the netloc into host and port
(HTTPSProxyConnection.putrequest): Omit scheme and host from the
url used in the actual request. Check parameters more thoroughly
and only accept callers that want to specify their own Host
header.
(HTTPSProxyConnection.connect): Adapt to some changes in
putrequest
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/ChangeLog 2009-03-11 20:12:33 UTC (rev 188)
@@ -1,5 +1,24 @@
2009-03-11 Bernhard Herzog <bh at intevation.de>
+ Make connections via https-proxy more correct.
+
+ * inteproxy/proxycore.py
+ (InteProxyHTTPRequestHandler.open_http_connection): Pass the Host
+ header explicitly to make sure it always refers to the real remote
+ host (connections using the HTTP CONNECT method omitted it under
+ some circumstances but Host is required in HTTP/1.1 requests).
+
+ * inteproxy/proxyconnection.py (parse_netloc): New method to parse
+ the netloc into host and port
+ (HTTPSProxyConnection.putrequest): Omit scheme and host from the
+ url used in the actual request. Check parameters more thoroughly
+ and only accept callers that want to specify their own Host
+ header.
+ (HTTPSProxyConnection.connect): Adapt to some changes in
+ putrequest
+
+2009-03-11 Bernhard Herzog <bh at intevation.de>
+
* test/test_inteproxy.py: Doc-string and minor formatting fixes
2009-03-11 Bernhard Herzog <bh at intevation.de>
Modified: trunk/inteproxy/proxyconnection.py
===================================================================
--- trunk/inteproxy/proxyconnection.py 2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/inteproxy/proxyconnection.py 2009-03-11 20:12:33 UTC (rev 188)
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 by Intevation GmbH
+# Copyright (C) 2008, 2009 by Intevation GmbH
#
# Based on code from
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/456195 which
@@ -18,32 +18,60 @@
import socket
+default_ports = {'http' : 80, 'https' : 443}
+
+
+def parse_netloc(scheme, netloc):
+ """Parses netloc and returns (hostname, port).
+
+ If the netloc string doesn't contain a port, the default port of
+ scheme (either 'http' or 'https') is used. If the port cannot be
+ determined, a ValueError is raise.
+ """
+ host, port = urllib.splitport(netloc)
+ if port is None:
+ port = default_ports.get(scheme)
+ if port is None:
+ raise ValueError("Cannot determine port for URL scheme %r"
+ % scheme)
+ else:
+ port = int(port)
+ return host, port
+
+
class HTTPSProxyConnection(httplib.HTTPConnection):
- _ports = {'http' : 80, 'https' : 443}
debuglevel = 2
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
- #print "HTTPSProxyConnection.putrequest", method, url
+ if not skip_host:
+ raise ValueError("HTTPSProxyConnection.putrequest must be called"
+ " with skip_host=True, because the correct Host"
+ " request header field will not be generated"
+ " automatically")
+
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
- host, port = urllib.splitport(netloc)
- if port is None:
- port = self._ports.get(scheme)
- if port is None:
- raise ValueError("Cannot determine port for URL scheme %r"
- % scheme)
- self._real_host = host
- self._real_port = port
+ if not netloc or not scheme:
+ raise ValueError("HTTPSProxyConnection.putrequest must be called"
+ " with a full url including a scheme and a"
+ " hostname.")
+
+ self._real_address = parse_netloc(scheme, netloc)
+
+ # Make sure the actual request url only contains the abspath
+ # part of the URL, and not the scheme and hostname. HTTP/1.1
+ # servers are supposed to accept such URLs in requests, but
+ # clients should not send them.
+ url = urlparse.urlunsplit(("", "", path, query, fragment))
+
httplib.HTTPConnection.putrequest(self, method, url,
skip_host=skip_host,
skip_accept_encoding=skip_accept_encoding)
def connect(self):
httplib.HTTPConnection.connect(self)
- #print "HTTPSProxyConnection.connect", self._real_host, self._real_port
- self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host,
- self._real_port))
+ self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % self._real_address)
#expect a HTTP/1.0 200 Connection established
response = self.response_class(self.sock, strict=self.strict,
method=self._method)
Modified: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py 2009-03-11 17:24:39 UTC (rev 187)
+++ trunk/inteproxy/proxycore.py 2009-03-11 20:12:33 UTC (rev 188)
@@ -1,5 +1,5 @@
#! /usr/bin/python
-# Copyright (C) 2006, 2007, 2008 by Intevation GmbH
+# Copyright (C) 2006, 2007, 2008, 2009 by Intevation GmbH
# Authors:
# Bernhard Herzog <bh at intevation.de>
#
@@ -21,7 +21,7 @@
from inteproxy.feesdialog import handle_fees_and_access_constraints
from inteproxy.httpserver import HTTPServer
from inteproxy.httpmessage import HTTPRequestMessage, HTTPResponseMessage
-from inteproxy.proxyconnection import HTTPSProxyConnection
+from inteproxy.proxyconnection import HTTPSProxyConnection, parse_netloc
# same as the BaseHTTPRequestHandler method, but as a standalone function:
@@ -114,12 +114,14 @@
containing the remote server's or proxy's response.
"""
scheme, netloc, path, query, fragment = urlparse.urlsplit(remote_url)
+ remote_address = connect_address = parse_netloc(scheme, netloc)
+
# the URI used in the request. Usually, it's like remote_url
# but with schem and netloc removed.
request_uri = urlparse.urlunsplit(("", "", path, query, fragment))
proxy = None
- extra_headers = []
+ extra_headers = [("Host", "%s:%d" % remote_address)]
if scheme == "http":
connection_class = httplib.HTTPConnection
@@ -131,7 +133,7 @@
connection_class = HTTPSProxyConnection
if proxy is not None:
- netloc = "%s:%d" % (proxy.host, proxy.port)
+ connect_address = (proxy.host, proxy.port)
request_uri = remote_url
if proxy.username and proxy.password:
userpass = base64.b64encode("%s:%s" % (proxy.username,
@@ -139,20 +141,19 @@
extra_headers.append(("Proxy-Authorization",
"Basic %s" % userpass))
- connection = connection_class(netloc)
+ connection = connection_class(*connect_address)
if self.request_version == "HTTP/1.0":
connection._http_vsn = 10
connection.putrequest(client_request.method, request_uri,
- skip_accept_encoding=True)
+ skip_accept_encoding=True, skip_host=True)
for header, value in client_request.headers.items():
if header.lower() == "host":
- # the host header will be set by httplib and it should
- # not be set twice, so we omit it. The value we
- # received from the client may have been wrong anyway as
- # in the old inteproxy scheme it refers to the host this
- # proxy is running on.
+ # the host header is already in extra_headers. The
+ # value we received from the client may have been wrong
+ # anyway as in the old inteproxy scheme it refers to the
+ # host this proxy is running on.
continue
else:
connection.putheader(header, value)
More information about the Inteproxy-commits
mailing list