[Inteproxy-commits] r63 - in trunk: . inteproxy
scm-commit@wald.intevation.org
scm-commit at wald.intevation.org
Tue Apr 24 16:16:15 CEST 2007
Author: bh
Date: 2007-04-24 16:16:15 +0200 (Tue, 24 Apr 2007)
New Revision: 63
Added:
trunk/inteproxy/proxycore.py
Modified:
trunk/ChangeLog
trunk/InteProxy.py
Log:
* inteproxy/proxycore.py: New. Core classes and functions of
InteProxy.
* InteProxy.py (MasterWorkerServer, InteProxyHTTPRequestHandler)
(log_date_time_string): Moved to inteproxy/proxycore.py
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/ChangeLog 2007-04-24 14:16:15 UTC (rev 63)
@@ -1,5 +1,13 @@
2007-04-24 Bernhard Herzog <bh at intevation.de>
+ * inteproxy/proxycore.py: New. Core classes and functions of
+ InteProxy.
+
+ * InteProxy.py (MasterWorkerServer, InteProxyHTTPRequestHandler)
+ (log_date_time_string): Moved to inteproxy/proxycore.py
+
+2007-04-24 Bernhard Herzog <bh at intevation.de>
+
* inteproxy/httpserver.py: New. Move parts of InteProxy.py here
to form a new base class for HTTP servers that can be run in a
thread and stopped from other threads.
Modified: trunk/InteProxy.py
===================================================================
--- trunk/InteProxy.py 2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/InteProxy.py 2007-04-24 14:16:15 UTC (rev 63)
@@ -16,301 +16,18 @@
import os
import sys
import optparse
-import traceback
-import time
import urlparse
import urllib2
-import BaseHTTPServer
-import socket
import inteproxy.proxyconnection as proxyconnection
from inteproxy.transcoder import transcoder_map
from inteproxy.getpassword import getpassword
-from inteproxy.threadpool import ThreadPool
-from inteproxy.feesdialog import handle_fees_and_access_constraints
-from inteproxy.httpserver import HTTPServer
+from inteproxy.proxycore import (InteProxyHTTPRequestHandler,
+ MasterWorkerServer,
+ log_date_time_string)
inteproxy_version = "0.1.2"
-class InteProxyHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
-
- # Whether the SHUTDOWN method is allowed. If allowed, anybody who can
- # connect to the server can shutdown the server.
- allow_shutdown = False
-
- # Whether debug output should be logged. Supported values:
- # 0 no debug output
- # 1 normal debug level with abbreviated log of the data
- # transferred
- # >=2 Also logs all the data transferred by the proxy
- #
- debug_level = 0
-
- def do_SHUTDOWN(self):
- """Shutdown the server if self.allow_shutdown is True (default False).
-
- To shutdown the server, this method calls the server's
- server_close method.
- """
- if self.allow_shutdown:
- self.server.server_close()
- self.send_response(200)
- # somewhat ugly hack: connect to this server, to unblock the
- # accept call.
- s = socket.socket(socket.AF_INET)
- s.connect(("localhost", self.server.server_port))
- s.close()
- else:
- self.send_error(400)
-
- def do_GET(self):
- """Handle a GET request"""
- self.handle_proxy_request("GET")
-
- def do_POST(self):
- """Handle a POST request"""
- self.handle_proxy_request("POST")
-
- def handle_proxy_request(self, method):
- """Handle a request that needs to be 'proxied'
-
- Method should be either 'POST' or 'GET'. Both methods are
- handled in the same way. The real host to connect to is
- determined from the url: The path component starts with the real
- hostname. The method strips that hostname from the path and
- connects to the new host. All headers from the HTTP request and
- any extra data are passed through to the remote host and the
- reponse is likewise simply copied through. The only exception
- is the Host: header from the original request. It would be
- wrong, so it is removed and a new one is generated pointing to
- the real remote host.
- """
- self.log_debug("Got %s request for %s", method, self.path)
- for header, value in self.headers.items():
- self.log_debug("header from client: %s:%r", header, value)
-
- #
- # Create a http request for the real location
- #
- transcoder = transcoder_map.get_transcoder(method, self.path)
- remote_url = transcoder.get_url()
- self.log_debug("Converted url: %r", remote_url)
-
- # If any data is associated with the request read it
- length = int(self.headers.getheader("Content-Length", "0"))
- if length:
- # FIXME: check whether length bytes were read
- data = self.rfile.read(length)
- self.log_debug("body of client request (%d bytes):\n%r",
- length, data)
- content_type = self.headers.getheader("Content-Type")
- content_type, data = transcoder.convert_body(content_type, data)
- self.log_debug("modified body of client request (%d bytes):\n%r",
- len(data), data)
- if len(data) != length:
- self.headers["Content-length"] = str(len(data))
- if content_type is not None:
- self.headers["Content-type"] = content_type
- else:
- self.log_debug("client request has no body")
- data = None
-
- request = urllib2.Request(remote_url)
- for header, value in self.headers.items():
- if header.lower() == "host":
- # the host header will be set by httplib and it should
- # not be set twice, so we omit it. The value we
- # received from the client would have been wrong anyway
- # as it refers to the host this proxy is running on.
- continue
- elif header.lower() in ("content-length", "content-type"):
- # When a post request is redirected with the 302 code,
- # urllib2 will do a GET request for the new url even
- # when the original request was POST. This does not
- # conform to RFC 2616, but it's apparently what many
- # clients do and some servers expect (e.g. a POST
- # request to http://intevation.de/roundup/kolab/ is
- # redirected to
- # https://intevation.de/roundup/kolab/issue<num> and the
- # server expects a GET request for that.). However,
- # when doing the new request urllib2 copies all headers
- # except for those added with add_unredirected_header.
- # The content-length and content-type headers are only
- # needed for POST request so we have to make sure
- # they're not sent when redirection occurs.
- request.add_unredirected_header(header, value)
- else:
- request.add_header(header, value)
-
- if data is not None:
- request.add_data(data)
-
- self.log_debug("request sent")
-
- #
- # Retrieve the url described by the request and pass everything
- # through to the client.
- #
- try:
- response = urllib2.urlopen(request)
- except urllib2.HTTPError, err:
- # a HTTPError is a valid http response object, so we can
- # treat it like a normal response.
- response = err
- self.log_debug("received response: %s: %r", response.code,
- response.msg)
-
- # check for fees and access constraints and run a dialog
- response_read = handle_fees_and_access_constraints(remote_url, response)
-
- # Ideally, the HTTP version in our reply to the client should be
- # what the remote server used in its reply. Unfortunately,
- # there doesn't seem to be a way to get that information from
- # urllib2. So we pretend that the reply uses the same version
- # as the request we got from the client.
- #
- # If we do not set the version of the reply,
- # BaseHTTPRequestHandler will use HTTP/1.0 which can lead to
- # problems if the real version is HTTP/1.1, because the server
- # might send the data using the chunked transfer encoding which
- # not alled in HTTP/1.0. Some clients, including Firefox 1.5,
- # produce errors in that case.
- #
- # The HTTP version in the reply generated by send_response is
- # taken from self.protocol_version.
- self.protocol_version = self.request_version
- self.send_response(response.code, response.msg)
-
- for header, value in response.info().items():
- self.log_debug("received header: %s:%r", header, value)
- self.send_header(header, value)
- self.end_headers()
-
- transfer_encoding = response.info().get("Transfer-encoding")
- self.transfer_data(response_read, self.wfile.write,
- chunked = (transfer_encoding == "chunked"))
- self.log_debug("request finished")
-
- def transfer_data(self, read, write, length=None, chunked=False):
- """Transfer data from one 'file' to another in chunks
-
- The files are given by their read and write methods so it
- doesn't have to be a file. The read parameter must be callable
- with an integer argument indicating the maximum number of bytes
- to read and the write parameter must be callable with a string.
- If the parameter chunked is true, the method uses the 'chunked'
- transfer encoding when writing the data.
- """
-
- # wrap the read/write functions if debug logging is active so
- # that the data read from the server and written to the client
- # is logged.
- if self.debug_level > 0:
- orig_read = read
- orig_write = write
-
- def limit_length(data):
- """Returns a shortened version of the string data"""
- if self.debug_level < 2 and len(data) > 13:
- data = "%s ... %x bytes ... %s" % (data[:5],
- len(data) - 10,
- data[-5:])
- return data
-
- def read(length):
- data = orig_read(length)
- self.log_debug("from server: %r", limit_length(data))
- return data
- def write(data):
- self.log_debug("to client: %r", limit_length(data))
- orig_write(data)
-
- # Now transfer the data in blocks of max_chunk_size
- max_chunk_size = 4096
- while 1:
- if length is not None:
- chunk_size = min(length, max_chunk_size)
- else:
- chunk_size = max_chunk_size
- chunk = read(chunk_size)
- if not chunk:
- break
- if length is not None:
- length -= len(chunk)
- if chunked:
- write("%x\r\n" % len(chunk))
- write(chunk)
- if chunked:
- write("\r\n")
- if chunked:
- write("0\r\n\r\n")
-
- def log_exception(self, exc_info):
- """Log an exception
-
- This method produces a log message via self.log_message and then
- prints a the exception given by exc_info, which should be the
- return value of sys.exc_info, to stderr. By default log_message
- also writes to stderr, so both the message and the traceback
- should go to the same place.
- """
- self.log_message("Exception for previous 500 code\n")
- traceback.print_exception(*(exc_info + (None, sys.stderr)))
- sys.stderr.flush()
-
- def log_debug(self, template, *args):
- if self.debug_level > 0:
- message = "DEBUG: " + template % args
- message = message.replace("%", "%%")
- self.log_message(message)
-
-
-# same as the BaseHTTPRequestHandler method, but as a standalone function:
-def log_date_time_string():
- """Return the current time formatted for logging."""
- now = time.time()
- year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
- s = "%02d/%3s/%04d %02d:%02d:%02d" % (
- day, BaseHTTPServer.BaseHTTPRequestHandler.monthname[month],
- year, hh, mm, ss)
- return s
-
-
-class MasterWorkerServer(HTTPServer):
-
- do_shutdown = False
-
- def __init__(self, server_address, RequestHandlerClass, num_workers):
- HTTPServer.__init__(self, server_address, RequestHandlerClass)
- self.thread_pool = ThreadPool(num_workers, lambda f: f())
- sys.stderr.write("[%s] starting %d worker threads\n" \
- % (log_date_time_string(), num_workers))
- self.thread_pool.start()
-
- def process_request(self, request, client_address):
- """Put the request into the queue to be handled by the worker thread
- """
- def process_in_worker():
- try:
- self.finish_request(request, client_address)
- self.close_request(request)
- except:
- self.handle_error(request, client_address)
- self.close_request(request)
- sys.stderr.write("[%s] queue contains %d items\n" %
- (log_date_time_string(),
- self.thread_pool.queue.qsize()))
- self.thread_pool.put(process_in_worker)
-
- def handle_error(self, request, client_address):
- """Override to integrate the error reporting better with the other logs
- """
- sys.stderr.write("[%s] Exception while handling request from %r\n"
- % (log_date_time_string(), client_address))
- traceback.print_exception(*(sys.exc_info() + (None, sys.stderr)))
- sys.stderr.flush()
-
-
class IntePasswordManager(urllib2.HTTPPasswordMgr):
def find_user_password(self, realm, authuri):
Added: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py 2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/inteproxy/proxycore.py 2007-04-24 14:16:15 UTC (rev 63)
@@ -0,0 +1,300 @@
+#! /usr/bin/python
+# Copyright (C) 2006, 2007 by Intevation GmbH
+# Authors:
+# Bernhard Herzog <bh at intevation.de>
+#
+# This program is free software under the GPL (>=v2)
+# Read the file COPYING coming with the software for details.
+
+"""The HTTP proxy at the core of InteProxy"""
+
+import sys
+import traceback
+import time
+import urllib2
+import BaseHTTPServer
+import socket
+
+from inteproxy.transcoder import transcoder_map
+from inteproxy.threadpool import ThreadPool
+from inteproxy.feesdialog import handle_fees_and_access_constraints
+from inteproxy.httpserver import HTTPServer
+
+
+# same as the BaseHTTPRequestHandler method, but as a standalone function:
+def log_date_time_string():
+ """Return the current time formatted for logging."""
+ now = time.time()
+ year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
+ s = "%02d/%3s/%04d %02d:%02d:%02d" % (
+ day, BaseHTTPServer.BaseHTTPRequestHandler.monthname[month],
+ year, hh, mm, ss)
+ return s
+
+
+class InteProxyHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+
+ # Whether the SHUTDOWN method is allowed. If allowed, anybody who can
+ # connect to the server can shutdown the server.
+ allow_shutdown = False
+
+ # Whether debug output should be logged. Supported values:
+ # 0 no debug output
+ # 1 normal debug level with abbreviated log of the data
+ # transferred
+ # >=2 Also logs all the data transferred by the proxy
+ #
+ debug_level = 0
+
+ def do_SHUTDOWN(self):
+ """Shutdown the server if self.allow_shutdown is True (default False).
+
+ To shutdown the server, this method calls the server's
+ server_close method.
+ """
+ if self.allow_shutdown:
+ self.server.server_close()
+ self.send_response(200)
+ # somewhat ugly hack: connect to this server, to unblock the
+ # accept call.
+ s = socket.socket(socket.AF_INET)
+ s.connect(("localhost", self.server.server_port))
+ s.close()
+ else:
+ self.send_error(400)
+
+ def do_GET(self):
+ """Handle a GET request"""
+ self.handle_proxy_request("GET")
+
+ def do_POST(self):
+ """Handle a POST request"""
+ self.handle_proxy_request("POST")
+
+ def handle_proxy_request(self, method):
+ """Handle a request that needs to be 'proxied'
+
+ Method should be either 'POST' or 'GET'. Both methods are
+ handled in the same way. The real host to connect to is
+ determined from the url: The path component starts with the real
+ hostname. The method strips that hostname from the path and
+ connects to the new host. All headers from the HTTP request and
+ any extra data are passed through to the remote host and the
+ reponse is likewise simply copied through. The only exception
+ is the Host: header from the original request. It would be
+ wrong, so it is removed and a new one is generated pointing to
+ the real remote host.
+ """
+ self.log_debug("Got %s request for %s", method, self.path)
+ for header, value in self.headers.items():
+ self.log_debug("header from client: %s:%r", header, value)
+
+ #
+ # Create a http request for the real location
+ #
+ transcoder = transcoder_map.get_transcoder(method, self.path)
+ remote_url = transcoder.get_url()
+ self.log_debug("Converted url: %r", remote_url)
+
+ # If any data is associated with the request read it
+ length = int(self.headers.getheader("Content-Length", "0"))
+ if length:
+ # FIXME: check whether length bytes were read
+ data = self.rfile.read(length)
+ self.log_debug("body of client request (%d bytes):\n%r",
+ length, data)
+ content_type = self.headers.getheader("Content-Type")
+ content_type, data = transcoder.convert_body(content_type, data)
+ self.log_debug("modified body of client request (%d bytes):\n%r",
+ len(data), data)
+ if len(data) != length:
+ self.headers["Content-length"] = str(len(data))
+ if content_type is not None:
+ self.headers["Content-type"] = content_type
+ else:
+ self.log_debug("client request has no body")
+ data = None
+
+ request = urllib2.Request(remote_url)
+ for header, value in self.headers.items():
+ if header.lower() == "host":
+ # the host header will be set by httplib and it should
+ # not be set twice, so we omit it. The value we
+ # received from the client would have been wrong anyway
+ # as it refers to the host this proxy is running on.
+ continue
+ elif header.lower() in ("content-length", "content-type"):
+ # When a post request is redirected with the 302 code,
+ # urllib2 will do a GET request for the new url even
+ # when the original request was POST. This does not
+ # conform to RFC 2616, but it's apparently what many
+ # clients do and some servers expect (e.g. a POST
+ # request to http://intevation.de/roundup/kolab/ is
+ # redirected to
+ # https://intevation.de/roundup/kolab/issue<num> and the
+ # server expects a GET request for that.). However,
+ # when doing the new request urllib2 copies all headers
+ # except for those added with add_unredirected_header.
+ # The content-length and content-type headers are only
+ # needed for POST request so we have to make sure
+ # they're not sent when redirection occurs.
+ request.add_unredirected_header(header, value)
+ else:
+ request.add_header(header, value)
+
+ if data is not None:
+ request.add_data(data)
+
+ self.log_debug("request sent")
+
+ #
+ # Retrieve the url described by the request and pass everything
+ # through to the client.
+ #
+ try:
+ response = urllib2.urlopen(request)
+ except urllib2.HTTPError, err:
+ # a HTTPError is a valid http response object, so we can
+ # treat it like a normal response.
+ response = err
+ self.log_debug("received response: %s: %r", response.code,
+ response.msg)
+
+ # check for fees and access constraints and run a dialog
+ response_read = handle_fees_and_access_constraints(remote_url, response)
+
+ # Ideally, the HTTP version in our reply to the client should be
+ # what the remote server used in its reply. Unfortunately,
+ # there doesn't seem to be a way to get that information from
+ # urllib2. So we pretend that the reply uses the same version
+ # as the request we got from the client.
+ #
+ # If we do not set the version of the reply,
+ # BaseHTTPRequestHandler will use HTTP/1.0 which can lead to
+ # problems if the real version is HTTP/1.1, because the server
+ # might send the data using the chunked transfer encoding which
+ # not alled in HTTP/1.0. Some clients, including Firefox 1.5,
+ # produce errors in that case.
+ #
+ # The HTTP version in the reply generated by send_response is
+ # taken from self.protocol_version.
+ self.protocol_version = self.request_version
+ self.send_response(response.code, response.msg)
+
+ for header, value in response.info().items():
+ self.log_debug("received header: %s:%r", header, value)
+ self.send_header(header, value)
+ self.end_headers()
+
+ transfer_encoding = response.info().get("Transfer-encoding")
+ self.transfer_data(response_read, self.wfile.write,
+ chunked = (transfer_encoding == "chunked"))
+ self.log_debug("request finished")
+
+ def transfer_data(self, read, write, length=None, chunked=False):
+ """Transfer data from one 'file' to another in chunks
+
+ The files are given by their read and write methods so it
+ doesn't have to be a file. The read parameter must be callable
+ with an integer argument indicating the maximum number of bytes
+ to read and the write parameter must be callable with a string.
+ If the parameter chunked is true, the method uses the 'chunked'
+ transfer encoding when writing the data.
+ """
+
+ # wrap the read/write functions if debug logging is active so
+ # that the data read from the server and written to the client
+ # is logged.
+ if self.debug_level > 0:
+ orig_read = read
+ orig_write = write
+
+ def limit_length(data):
+ """Returns a shortened version of the string data"""
+ if self.debug_level < 2 and len(data) > 13:
+ data = "%s ... %x bytes ... %s" % (data[:5],
+ len(data) - 10,
+ data[-5:])
+ return data
+
+ def read(length):
+ data = orig_read(length)
+ self.log_debug("from server: %r", limit_length(data))
+ return data
+ def write(data):
+ self.log_debug("to client: %r", limit_length(data))
+ orig_write(data)
+
+ # Now transfer the data in blocks of max_chunk_size
+ max_chunk_size = 4096
+ while 1:
+ if length is not None:
+ chunk_size = min(length, max_chunk_size)
+ else:
+ chunk_size = max_chunk_size
+ chunk = read(chunk_size)
+ if not chunk:
+ break
+ if length is not None:
+ length -= len(chunk)
+ if chunked:
+ write("%x\r\n" % len(chunk))
+ write(chunk)
+ if chunked:
+ write("\r\n")
+ if chunked:
+ write("0\r\n\r\n")
+
+ def log_exception(self, exc_info):
+ """Log an exception
+
+ This method produces a log message via self.log_message and then
+ prints a the exception given by exc_info, which should be the
+ return value of sys.exc_info, to stderr. By default log_message
+ also writes to stderr, so both the message and the traceback
+ should go to the same place.
+ """
+ self.log_message("Exception for previous 500 code\n")
+ traceback.print_exception(*(exc_info + (None, sys.stderr)))
+ sys.stderr.flush()
+
+ def log_debug(self, template, *args):
+ if self.debug_level > 0:
+ message = "DEBUG: " + template % args
+ message = message.replace("%", "%%")
+ self.log_message(message)
+
+
+class MasterWorkerServer(HTTPServer):
+
+ do_shutdown = False
+
+ def __init__(self, server_address, RequestHandlerClass, num_workers):
+ HTTPServer.__init__(self, server_address, RequestHandlerClass)
+ self.thread_pool = ThreadPool(num_workers, lambda f: f())
+ sys.stderr.write("[%s] starting %d worker threads\n" \
+ % (log_date_time_string(), num_workers))
+ self.thread_pool.start()
+
+ def process_request(self, request, client_address):
+ """Put the request into the queue to be handled by the worker thread
+ """
+ def process_in_worker():
+ try:
+ self.finish_request(request, client_address)
+ self.close_request(request)
+ except:
+ self.handle_error(request, client_address)
+ self.close_request(request)
+ sys.stderr.write("[%s] queue contains %d items\n" %
+ (log_date_time_string(),
+ self.thread_pool.queue.qsize()))
+ self.thread_pool.put(process_in_worker)
+
+ def handle_error(self, request, client_address):
+ """Override to integrate the error reporting better with the other logs
+ """
+ sys.stderr.write("[%s] Exception while handling request from %r\n"
+ % (log_date_time_string(), client_address))
+ traceback.print_exception(*(sys.exc_info() + (None, sys.stderr)))
+ sys.stderr.flush()
Property changes on: trunk/inteproxy/proxycore.py
___________________________________________________________________
Name: svn:keywords
+ Id Revision
Name: svn:eol-style
+ native
More information about the Inteproxy-commits
mailing list