[Inteproxy-commits] r140 - in trunk: . inteproxy test
scm-commit@wald.intevation.org
scm-commit at wald.intevation.org
Thu Jun 12 15:43:43 CEST 2008
Author: bh
Date: 2008-06-12 15:43:43 +0200 (Thu, 12 Jun 2008)
New Revision: 140
Modified:
trunk/ChangeLog
trunk/inteproxy/feesdialog.py
trunk/inteproxy/main.py
trunk/inteproxy/proxyconnection.py
trunk/inteproxy/proxycore.py
trunk/test/test_inteproxy.py
Log:
Use httplib instead of urllib2. This makes the code much simpler
and we can remove the various workarounds that were added to make
urllib2 less intelligent.
* inteproxy/proxycore.py
(InteProxyHTTPRequestHandler.open_http_connection): New method
that opens a http connection using httplib and the proxies defined
by the server object, if any.
(InteProxyHTTPRequestHandler.handle_proxy_request): Use
open_http_connection and do not use urllib2 anymore.
(InteProxyHTTPRequestHandler.handle_response): Adapt to httplib
response objects
(MasterWorkerServer.__init__): Instead of urllib2 openers, takes
http_proxy and https_proxy urls as parameters now and makes them
available as instance variables
* inteproxy/main.py (HTTPRedirectHandler, eat_https_proxy_envvar)
(build_opener): Removed. Only needed for urllib2
(run_server): Instead of the urllib2 openers, pass the proxy urls
to the MasterWorkerServer
* inteproxy/proxyconnection.py (ProxyHTTPConnection)
(ProxyHTTPSConnection, ProxyHTTPS10Connection, ConnectHTTPHandler)
(ConnectHTTPSHandler, ConnectHTTPS10Handler): Removed. Only
needed for urllib2
(HTTPSProxyConnection): New. Connection class for use with httplib
that uses the proxy's CONNECT method for connections.
* inteproxy/feesdialog.py (handle_fees_and_access_constraints):
Adapt to httplib
* test/test_inteproxy.py (ServerTest.create_http_proxy): New
method to be implemented by derived classes that need additional
upstream http proxies
(ServerTest.setUp): Allo self.create_http_proxy to create a http
proxy whose url is passed to the MasterWorkerServer
(ServerTest.tearDown): Stop the proxy server if one was created
(TestInteProxyWithExtraProxy): New class with test for an
InteProxy that uses another upstrean http proxy to access servers
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/ChangeLog 2008-06-12 13:43:43 UTC (rev 140)
@@ -1,3 +1,45 @@
+2008-06-12 Bernhard Herzog <bh at intevation.de>
+
+ Use httplib instead of urllib2. This makes the code much simpler
+ and we can remove the various workarounds that were added to make
+ urllib2 less intelligent.
+
+ * inteproxy/proxycore.py
+ (InteProxyHTTPRequestHandler.open_http_connection): New method
+ that opens a http connection using httplib and the proxies defined
+ by the server object, if any.
+ (InteProxyHTTPRequestHandler.handle_proxy_request): Use
+ open_http_connection and do not use urllib2 anymore.
+ (InteProxyHTTPRequestHandler.handle_response): Adapt to httplib
+ response objects
+ (MasterWorkerServer.__init__): Instead of urllib2 openers, takes
+ http_proxy and https_proxy urls as parameters now and makes them
+ available as instance variables
+
+ * inteproxy/main.py (HTTPRedirectHandler, eat_https_proxy_envvar)
+ (build_opener): Removed. Only needed for urllib2
+ (run_server): Instead of the urllib2 openers, pass the proxy urls
+ to the MasterWorkerServer
+
+ * inteproxy/proxyconnection.py (ProxyHTTPConnection)
+ (ProxyHTTPSConnection, ProxyHTTPS10Connection, ConnectHTTPHandler)
+ (ConnectHTTPSHandler, ConnectHTTPS10Handler): Removed. Only
+ needed for urllib2
+ (HTTPSProxyConnection): New. Connection class for use with httplib
+ that uses the proxy's CONNECT method for connections.
+
+ * inteproxy/feesdialog.py (handle_fees_and_access_constraints):
+ Adapt to httplib
+
+ * test/test_inteproxy.py (ServerTest.create_http_proxy): New
+ method to be implemented by derived classes that need additional
+ upstream http proxies
+ (ServerTest.setUp): Allo self.create_http_proxy to create a http
+ proxy whose url is passed to the MasterWorkerServer
+ (ServerTest.tearDown): Stop the proxy server if one was created
+ (TestInteProxyWithExtraProxy): New class with test for an
+ InteProxy that uses another upstrean http proxy to access servers
+
2008-06-11 Jan-Oliver Wagner <jan-oliver.wagner at intevation.de>
* inteproxy.iss: Removed. This was the InnoSetup Windows
Modified: trunk/inteproxy/feesdialog.py
===================================================================
--- trunk/inteproxy/feesdialog.py 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/inteproxy/feesdialog.py 2008-06-12 13:43:43 UTC (rev 140)
@@ -68,7 +68,7 @@
# only try to handle anything for successful requests.
# Should we allow any other success response codes?
- if response.code != 200:
+ if response.status != 200:
return response_read
if remote_url in dialog_shown_for:
Modified: trunk/inteproxy/main.py
===================================================================
--- trunk/inteproxy/main.py 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/inteproxy/main.py 2008-06-12 13:43:43 UTC (rev 140)
@@ -24,59 +24,6 @@
inteproxy_version = "0.3.1"
-class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
-
- """A version of urllib2's HTTPRedirectHandler that does not redirect
-
- The class urllib2.HTTPRedirectHandler implements error handlers for
- the redirect error codes sent by the server and then starts a new
- request. In InteProxy we want to let InteProxy's client to handle
- the redirects themselves, so we overrride the error handling and do
- not handle any of the errors. urrlib2 then falls back on the real
- http error handler so that InteProxy handles them like other errors
- such as 404.
- """
-
- def http_error_302(self, *args):
- """Overrides the inherited method and simply returns None.
- None indicates to urllib2 that the method could not handle the
- error and that urllib2 should look for another handler.
- """
- return None
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
-
-def eat_https_proxy_envvar():
- if os.environ.has_key('https_proxy'):
- https_proxy = urlparse.urlsplit(os.getenv('https_proxy'))[1]
- del os.environ['https_proxy']
- sys.stderr.write("[%s] Using HTTPS proxy: %s\n"
- % (log_date_time_string(), https_proxy))
- return https_proxy
- return None
-
-def build_opener(https_proxy = None, debuglevel = 0, http_vsn = 11):
- handlers = []
- if https_proxy:
- sys.stderr.write("[%s] Using HTTPS proxy: %s\n"
- % (log_date_time_string(), https_proxy))
- if http_vsn == 10:
- proxy_handler = proxyconnection.ConnectHTTPS10Handler(
- proxy=https_proxy, debuglevel=debuglevel)
- else:
- proxy_handler = proxyconnection.ConnectHTTPSHandler(
- proxy=https_proxy, debuglevel=debuglevel)
- handlers.append(proxy_handler)
- else:
- #FIXME: We would need HTTP10*Handlers here optionally to be complete
- httphandler = urllib2.HTTPHandler(debuglevel=debuglevel)
- httpshandler = urllib2.HTTPSHandler(debuglevel=debuglevel)
- handlers.extend((httphandler, httpshandler))
-
- handlers.append(HTTPRedirectHandler)
- return(urllib2.build_opener(*handlers))
-
-
def setup_logging(opts):
"""Sets up logging according to opts.
@@ -106,6 +53,7 @@
elif inteproxy.resources.in_py2exe():
sys.stderr = open(os.devnull, "w")
+
def setup_language():
"""Sets the language environment if it's not set yet.
This function checks if any of the common language environment
@@ -164,17 +112,15 @@
setup_logging(opts)
- https_proxy = eat_https_proxy_envvar()
- httpopener = build_opener(https_proxy, opts.debug_level)
- http10opener = build_opener(https_proxy, opts.debug_level, 10)
-
server_address = ('localhost', opts.port)
sys.stderr.write("InteProxy Version %s\n" % inteproxy_version)
sys.stderr.write("[%s] server starting up\n" % log_date_time_string())
+ http_proxy = os.environ.get("http_proxy")
+ https_proxy = os.environ.get("https_proxy")
httpd = ServerClass(server_address, HandlerClass, opts.workers,
- httpopener, http10opener, transcoder_map=transcoder_map)
+ http_proxy, https_proxy, transcoder_map=transcoder_map)
# import the gtkapp here instead of at top-level to avoid loading
# the gtk module. The gtk module requires an Xserver connection
Modified: trunk/inteproxy/proxyconnection.py
===================================================================
--- trunk/inteproxy/proxyconnection.py 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/inteproxy/proxyconnection.py 2008-06-12 13:43:43 UTC (rev 140)
@@ -1,127 +1,65 @@
-# Copyright (C) 2006 by Alessandro Budai
-# from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/456195
+# Copyright (C) 2008 by Intevation GmbH
#
+# Based on code from
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/456195 which
+# is Copyright (C) 2006 by Alessandro Budai
+#
# This software may be used and distributed according to the terms
# of the Python License 2.3 or newer, see http://www.python.org/license
"""
-urrlib2 opener for SSL proxy (CONNECT method)
-
-This small module builds an urllib2 opener that can be used to make a
-connection through a proxy using the http CONNECT method (that can be used to
-proxy SSLconnections). The current urrlib2 seems to not support this method.
-
-tested with python 2.4
+Variant of httplib.HTTPConnection that connects through a proxy using
+the http CONNECT method (that can be used to proxy SSL connections).
"""
-import urllib2
+import urlparse
import urllib
import httplib
import socket
-class ProxyHTTPConnection(httplib.HTTPConnection):
+class HTTPSProxyConnection(httplib.HTTPConnection):
_ports = {'http' : 80, 'https' : 443}
+ debuglevel = 2
- def request(self, method, url, body=None, headers={}):
- #request is called before connect, so can interpret url and get
- #real host/port to be used to make CONNECT request to proxy
- proto, rest = urllib.splittype(url)
- if proto is None:
- raise ValueError, "unknown URL type: %s" % url
- #get host
- host, rest = urllib.splithost(rest)
- #try to get port
- host, port = urllib.splitport(host)
- #if port is not defined try to get from proto
+ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
+ #print "HTTPSProxyConnection.putrequest", method, url
+ scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+ host, port = urllib.splitport(netloc)
if port is None:
- try:
- port = self._ports[proto]
- except KeyError:
- raise ValueError, "unknown protocol for: %s" % url
+ port = self._ports.get(scheme)
+ if port is None:
+ raise ValueError("Cannot determine port for URL scheme %r"
+ % scheme)
self._real_host = host
- self._real_port = int(port)
- httplib.HTTPConnection.request(self, method, rest or '/', body, headers)
+ self._real_port = port
+ httplib.HTTPConnection.putrequest(self, method, url,
+ skip_host=skip_host,
+ skip_accept_encoding=skip_accept_encoding)
-
def connect(self):
httplib.HTTPConnection.connect(self)
- #send proxy CONNECT request
- self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port))
+ #print "HTTPSProxyConnection.connect", self._real_host, self._real_port
+ self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host,
+ self._real_port))
#expect a HTTP/1.0 200 Connection established
- response = self.response_class(self.sock, strict=self.strict, method=self._method)
+ response = self.response_class(self.sock, strict=self.strict,
+ method=self._method)
(version, code, message) = response._read_status()
#probably here we can handle auth requests...
if code != 200:
#proxy returned and error, abort connection, and raise exception
self.close()
- raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip())
+ raise socket.error, ("Proxy connection failed: %d %s"
+ % (code, message.strip()))
#eat up header block from proxy....
while True:
#should not use directly fp probably
line = response.fp.readline()
- if line == '\r\n': break
+ if line == '\r\n':
+ break
-
-class ProxyHTTPSConnection(ProxyHTTPConnection):
-
- default_port = 443
-
- def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None):
- ProxyHTTPConnection.__init__(self, host, port)
- self.key_file = key_file
- self.cert_file = cert_file
-
- def connect(self):
- ProxyHTTPConnection.connect(self)
- #make the sock ssl-aware
- ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
+ ssl = socket.ssl(self.sock, None, None)
self.sock = httplib.FakeSocket(self.sock, ssl)
-
-class ProxyHTTPS10Connection(ProxyHTTPSConnection):
- _http_vsn = 10
- _http_vsn_str = 'HTTP/1.0'
-
-class ConnectHTTPHandler(urllib2.HTTPHandler):
-
- def __init__(self, proxy=None, debuglevel=0):
- self.proxy = proxy
- urllib2.HTTPHandler.__init__(self, debuglevel)
-
- def do_open(self, http_class, req):
- if self.proxy is not None:
- req.set_proxy(self.proxy, 'http')
- return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req)
-
-
-class ConnectHTTPSHandler(urllib2.HTTPSHandler):
-
- def __init__(self, proxy=None, debuglevel=0):
- self.proxy = proxy
- urllib2.HTTPSHandler.__init__(self, debuglevel)
-
- def do_open(self, http_class, req):
- if self.proxy is not None:
- req.set_proxy(self.proxy, 'https')
- return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)
-
-class ConnectHTTPS10Handler(ConnectHTTPSHandler):
-
- def do_open(self, http_class, req):
- if self.proxy is not None:
- req.set_proxy(self.proxy, 'https')
- return urllib2.HTTPSHandler.do_open(self, ProxyHTTPS10Connection, req)
-
-
-if __name__ == '__main__':
-
- import sys
-
- opener = urllib2.build_opener(ConnectHTTPHandler(sys.argv[2]),
- ConnectHTTPSHandler(sys.argv[2]))
- urllib2.install_opener(opener)
- req = urllib2.Request(url=sys.argv[1])
- f = urllib2.urlopen(req)
- print f.read()
Modified: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/inteproxy/proxycore.py 2008-06-12 13:43:43 UTC (rev 140)
@@ -12,7 +12,7 @@
import httplib
import traceback
import time
-import urllib2
+import urlparse
import BaseHTTPServer
import socket
@@ -20,6 +20,7 @@
from inteproxy.threadpool import ThreadPool
from inteproxy.feesdialog import handle_fees_and_access_constraints
from inteproxy.httpserver import HTTPServer
+from inteproxy.proxyconnection import HTTPSProxyConnection
# same as the BaseHTTPRequestHandler method, but as a standalone function:
@@ -101,66 +102,18 @@
#
remote_url = transcoder.get_url()
self.log_debug("Converted url: %r", remote_url)
- request = urllib2.Request(remote_url)
- for header, value in client_request.headers.items():
- if header.lower() == "host":
- # the host header will be set by httplib and it should
- # not be set twice, so we omit it. The value we
- # received from the client would have been wrong anyway
- # as it refers to the host this proxy is running on.
- continue
- elif header.lower() in ("content-length", "content-type"):
- # When a post request is redirected with the 302 code,
- # urllib2 will do a GET request for the new url even
- # when the original request was POST. This does not
- # conform to RFC 2616, but it's apparently what many
- # clients do and some servers expect (e.g. a POST
- # request to http://intevation.de/roundup/kolab/ is
- # redirected to
- # https://intevation.de/roundup/kolab/issue<num> and the
- # server expects a GET request for that.). However,
- # when doing the new request urllib2 copies all headers
- # except for those added with add_unredirected_header.
- # The content-length and content-type headers are only
- # needed for POST request so we have to make sure
- # they're not sent when redirection occurs.
- request.add_unredirected_header(header, value)
- else:
- request.add_header(header, value)
-
- if client_request.body is not None:
- request.add_data(client_request.body)
-
- self.log_debug("request sent")
-
- #
- # Retrieve the url described by the request and pass everything
- # through to the client.
- #
response = None
try:
- # We need to make sure that urllib2 uses HTTP 1.0 when we
- # got asked as HTTP 1.0 from the client, so we need to use
- # derived classes
- if self.request_version == "HTTP/1.0":
- self.log_debug("Trying http10opener.")
- response = self.server.http10_opener.open(request)
- else:
- response = self.server.http11_opener.open(request)
+ response = self.open_http_connection(remote_url, client_request)
+ except socket.error, exc:
+ # some low lever error occurred (e.g. the connection to an
+ # upstream proxy could not be established). Send a Bad
+ # Gateway error to the client
+ self.send_error(502)
- except urllib2.HTTPError, err:
- # a HTTPError is a valid http response object, so we can
- # treat it like a normal response.
- response = err
- except urllib2.URLError, err:
- # a more generic error and probably more low-level error.
- # Could be an error raised by ProxyHTTPConnection, for
- # instance. We cannot do much here, so we simply send the
- # client a 502 (Bad Gateway) error
- self.send_error(502, str(err))
if response is not None:
- self.log_debug("received response: %s: %r", response.code,
- response.msg)
+ self.log_debug("received response: %s: %r", response.status,
+ response.reason)
# check for fees and access constraints and run a dialog
response_read = handle_fees_and_access_constraints(remote_url,
response)
@@ -184,6 +137,52 @@
return inteproxy.transcoder.TranscoderRequest(self.command, self.path,
self.headers, body)
+ def open_http_connection(self, remote_url, client_request):
+ """Open a HTTP connection to remote_url and send client_request
+
+ This method handles both http and https URLs as well as proxies
+ for both. The return value is the httplib response object.
+ """
+ scheme, netloc, path, query, fragment = urlparse.urlsplit(remote_url)
+ if scheme == "http":
+ connection_class = httplib.HTTPConnection
+ if self.server.http_proxy_url:
+ netloc = urlparse.urlsplit(self.server.http_proxy_url)[1]
+ path = remote_url
+ elif scheme == "https":
+ connection_class = httplib.HTTPSConnection
+ if self.server.https_proxy_url:
+ netloc = urlparse.urlsplit(self.server.https_proxy_url)[1]
+ path = remote_url
+ connection_class = HTTPSProxyConnection
+
+ connection = connection_class(netloc)
+ if self.request_version == "HTTP/1.0":
+ connection._http_vsn = 10
+
+ connection.putrequest(client_request.method, path,
+ skip_accept_encoding=True)
+
+ for header, value in client_request.headers.items():
+ if header.lower() == "host":
+ # the host header will be set by httplib and it should
+ # not be set twice, so we omit it. The value we
+ # received from the client may have been wrong anyway as
+ # in the old inteproxy scheme it refers to the host this
+ # proxy is running on.
+ continue
+ else:
+ connection.putheader(header, value)
+ connection.endheaders()
+
+ if client_request.body is not None:
+ connection.send(client_request.body)
+
+ self.log_debug("request sent")
+
+ return connection.getresponse()
+
+
def handle_response(self, response, response_read):
# Ideally, the HTTP version in our reply to the client should be
# what the remote server used in its reply. Unfortunately,
@@ -201,14 +200,14 @@
# The HTTP version in the reply generated by send_response is
# taken from self.protocol_version.
self.protocol_version = self.request_version
- self.send_response(response.code, response.msg)
+ self.send_response(response.status, response.reason)
- for header, value in response.info().items():
+ for header, value in response.msg.items():
self.log_debug("received header: %s:%r", header, value)
self.send_header(header, value)
self.end_headers()
- transfer_encoding = response.info().get("Transfer-encoding")
+ transfer_encoding = response.msg.get("Transfer-encoding")
self.transfer_data(response_read, self.wfile.write,
chunked = (transfer_encoding == "chunked"))
@@ -291,10 +290,10 @@
do_shutdown = False
def __init__(self, server_address, RequestHandlerClass, num_workers,
- http11_opener, http10_opener, transcoder_map):
+ http_proxy_url, https_proxy_url, transcoder_map):
HTTPServer.__init__(self, server_address, RequestHandlerClass)
- self.http11_opener = http11_opener
- self.http10_opener = http10_opener
+ self.http_proxy_url = http_proxy_url
+ self.https_proxy_url = https_proxy_url
self.transcoder_map = transcoder_map
self.thread_pool = ThreadPool(num_workers, lambda f: f())
sys.stderr.write("[%s] starting %d worker threads\n" \
Modified: trunk/test/test_inteproxy.py
===================================================================
--- trunk/test/test_inteproxy.py 2008-06-11 21:32:49 UTC (rev 139)
+++ trunk/test/test_inteproxy.py 2008-06-12 13:43:43 UTC (rev 140)
@@ -16,7 +16,6 @@
from inteproxy.proxycore import MasterWorkerServer, InteProxyHTTPRequestHandler
from inteproxy.transcoder import create_transcoder_map
-from inteproxy.main import build_opener
import inteproxy.httpserver as httpserver
@@ -25,6 +24,7 @@
"""Base class for tests that run the InteProxy in a thread"""
+
remote_contents = [
("/wms", [("Content-Type", "text/plain")], "wms data"),
]
@@ -32,10 +32,19 @@
def setUp(self):
self.old_stderr = sys.stderr
sys.stderr = open(os.path.devnull, "w")
+
+ http_proxy = self.create_http_proxy()
+ if http_proxy:
+ self.http_proxy = httpserver.ServerThread(http_proxy)
+ self.http_proxy.start(daemon=True)
+ self.http_proxy_url = "http://localhost:%d/" \
+ % (self.http_proxy.server_port,)
+ else:
+ self.http_proxy = self.http_proxy_url = None
+
proxyserver = MasterWorkerServer(("127.0.0.1", 0),
InteProxyHTTPRequestHandler, 5,
- build_opener(None, 0),
- build_opener(None, 0, 10),
+ self.http_proxy_url, None,
transcoder_map=create_transcoder_map())
self.server = httpserver.ServerThread(proxyserver)
self.server.start(daemon=True)
@@ -45,10 +54,18 @@
self.remote_server_base_url = "http://localhost:%d/" \
% (self.remote_server.server_port,)
+ def create_http_proxy(self):
+ """Override in derived classes to create a proxy upstream from inteproxy
+ The method should return the proxy server object object.
+ """
+ return None
+
def tearDown(self):
try:
- self.remote_server.stop()
- self.server.stop()
+ for server in [self.remote_server, self.http_proxy,
+ self.server]:
+ if server is not None:
+ server.stop()
finally:
sys.stderr = self.old_stderr
@@ -64,3 +81,19 @@
data = response.read()
self.assertEquals(data, "wms data")
+
+class TestInteProxyWithExtraProxy(ServerTest):
+
+ def create_http_proxy(self):
+ return MasterWorkerServer(("127.0.0.1", 0),
+ InteProxyHTTPRequestHandler, 5,
+ None, None,
+ transcoder_map=create_transcoder_map())
+
+ def test(self):
+ http = httplib.HTTPConnection("localhost", self.server.server_port)
+ http.request("GET", self.remote_server_base_url + "wms")
+ response = http.getresponse()
+ self.assertEquals(response.status, 200)
+ data = response.read()
+ self.assertEquals(data, "wms data")
More information about the Inteproxy-commits
mailing list