[Inteproxy-commits] r63 - in trunk: . inteproxy

Tue Apr 24 16:16:15 CEST 2007

Author: bh
Date: 2007-04-24 16:16:15 +0200 (Tue, 24 Apr 2007)
New Revision: 63

Added:
   trunk/inteproxy/proxycore.py
Modified:
   trunk/ChangeLog
   trunk/InteProxy.py
Log:
* inteproxy/proxycore.py: New.  Core classes and functions of
InteProxy.

* InteProxy.py (MasterWorkerServer, InteProxyHTTPRequestHandler)
(log_date_time_string): Moved to inteproxy/proxycore.py


Modified: trunk/ChangeLog
===================================================================

--- trunk/ChangeLog	2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/ChangeLog	2007-04-24 14:16:15 UTC (rev 63)
@@ -1,5 +1,13 @@
 2007-04-24  Bernhard Herzog  <bh at intevation.de>
 
+	* inteproxy/proxycore.py: New.  Core classes and functions of
+	InteProxy.
+
+	* InteProxy.py (MasterWorkerServer, InteProxyHTTPRequestHandler)
+	(log_date_time_string): Moved to inteproxy/proxycore.py
+
+2007-04-24  Bernhard Herzog  <bh at intevation.de>
+
 	* inteproxy/httpserver.py: New.  Move parts of InteProxy.py here
 	to form a new base class for HTTP servers that can be run in a
 	thread and stopped from other threads.

Modified: trunk/InteProxy.py
===================================================================
--- trunk/InteProxy.py	2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/InteProxy.py	2007-04-24 14:16:15 UTC (rev 63)
@@ -16,301 +16,18 @@
 import os
 import sys
 import optparse
-import traceback
-import time
 import urlparse
 import urllib2
-import BaseHTTPServer
-import socket
 import inteproxy.proxyconnection as proxyconnection
 from inteproxy.transcoder import transcoder_map
 from inteproxy.getpassword import getpassword
-from inteproxy.threadpool import ThreadPool
-from inteproxy.feesdialog import handle_fees_and_access_constraints
-from inteproxy.httpserver import HTTPServer
+from inteproxy.proxycore import (InteProxyHTTPRequestHandler,
+                                 MasterWorkerServer,
+                                 log_date_time_string)
 
 inteproxy_version = "0.1.2"
 
 
-class InteProxyHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
-
-    # Whether the SHUTDOWN method is allowed.  If allowed, anybody who can
-    # connect to the server can shutdown the server.
-    allow_shutdown = False
-
-    # Whether debug output should be logged.  Supported values:
-    #     0    no debug output
-    #     1    normal debug level with abbreviated log of the data
-    #          transferred
-    #   >=2    Also logs all the data transferred by the proxy
-    #
-    debug_level = 0
-
-    def do_SHUTDOWN(self):
-        """Shutdown the server if self.allow_shutdown is True (default False).
-
-        To shutdown the server, this method calls the server's
-        server_close method.
-        """
-        if self.allow_shutdown:
-            self.server.server_close()
-            self.send_response(200)
-            # somewhat ugly hack: connect to this server, to unblock the
-            # accept call.
-            s = socket.socket(socket.AF_INET)
-            s.connect(("localhost", self.server.server_port))
-            s.close()
-        else:
-            self.send_error(400)
-
-    def do_GET(self):
-        """Handle a GET request"""
-        self.handle_proxy_request("GET")
-
-    def do_POST(self):
-        """Handle a POST request"""
-        self.handle_proxy_request("POST")
-
-    def handle_proxy_request(self, method):
-        """Handle a request that needs to be 'proxied'
-
-        Method should be either 'POST' or 'GET'.  Both methods are
-        handled in the same way.  The real host to connect to is
-        determined from the url: The path component starts with the real
-        hostname.  The method strips that hostname from the path and
-        connects to the new host.  All headers from the HTTP request and
-        any extra data are passed through to the remote host and the
-        reponse is likewise simply copied through.  The only exception
-        is the Host: header from the original request.  It would be
-        wrong, so it is removed and a new one is generated pointing to
-        the real remote host.
-        """
-        self.log_debug("Got %s request for %s", method, self.path)
-        for header, value in self.headers.items():
-            self.log_debug("header from client: %s:%r", header, value)
-
-        #
-        # Create a http request for the real location
-        #
-        transcoder = transcoder_map.get_transcoder(method, self.path)
-        remote_url = transcoder.get_url()
-        self.log_debug("Converted url: %r", remote_url)
-
-        # If any data is associated with the request read it
-        length = int(self.headers.getheader("Content-Length", "0"))
-        if length:
-            # FIXME: check whether length bytes were read
-            data = self.rfile.read(length)
-            self.log_debug("body of client request (%d bytes):\n%r",
-                           length, data)
-            content_type = self.headers.getheader("Content-Type")
-            content_type, data = transcoder.convert_body(content_type, data)
-            self.log_debug("modified body of client request (%d bytes):\n%r",
-                           len(data), data)
-            if len(data) != length:
-                self.headers["Content-length"] = str(len(data))
-            if content_type is not None:
-                self.headers["Content-type"] = content_type
-        else:
-            self.log_debug("client request has no body")
-            data = None
-
-        request = urllib2.Request(remote_url)
-        for header, value in self.headers.items():
-            if header.lower() == "host":
-                # the host header will be set by httplib and it should
-                # not be set twice, so we omit it.  The value we
-                # received from the client would have been wrong anyway
-                # as it refers to the host this proxy is running on.
-                continue
-            elif header.lower() in ("content-length", "content-type"):
-                # When a post request is redirected with the 302 code,
-                # urllib2 will do a GET request for the new url even
-                # when the original request was POST.  This does not
-                # conform to RFC 2616, but it's apparently what many
-                # clients do and some servers expect (e.g. a POST
-                # request to http://intevation.de/roundup/kolab/ is
-                # redirected to
-                # https://intevation.de/roundup/kolab/issue<num> and the
-                # server expects a GET request for that.).  However,
-                # when doing the new request urllib2 copies all headers
-                # except for those added with add_unredirected_header.
-                # The content-length and content-type headers are only
-                # needed for POST request so we have to make sure
-                # they're not sent when redirection occurs.
-                request.add_unredirected_header(header, value)
-            else:
-                request.add_header(header, value)
-
-        if data is not None:
-            request.add_data(data)
-
-        self.log_debug("request sent")
-
-        #
-        # Retrieve the url described by the request and pass everything
-        # through to the client.
-        #
-        try:
-            response = urllib2.urlopen(request)
-        except urllib2.HTTPError, err:
-            # a HTTPError is a valid http response object, so we can
-            # treat it like a normal response.
-            response = err
-        self.log_debug("received response: %s: %r", response.code,
-                       response.msg)
-
-        # check for fees and access constraints and run a dialog
-        response_read = handle_fees_and_access_constraints(remote_url, response)
-
-        # Ideally, the HTTP version in our reply to the client should be
-        # what the remote server used in its reply.  Unfortunately,
-        # there doesn't seem to be a way to get that information from
-        # urllib2.  So we pretend that the reply uses the same version
-        # as the request we got from the client.
-        #
-        # If we do not set the version of the reply,
-        # BaseHTTPRequestHandler will use HTTP/1.0 which can lead to
-        # problems if the real version is HTTP/1.1, because the server
-        # might send the data using the chunked transfer encoding which
-        # not alled in HTTP/1.0.  Some clients, including Firefox 1.5,
-        # produce errors in that case.
-        #
-        # The HTTP version in the reply generated by send_response is
-        # taken from self.protocol_version.
-        self.protocol_version = self.request_version
-        self.send_response(response.code, response.msg)
-
-        for header, value in response.info().items():
-            self.log_debug("received header: %s:%r", header, value)
-            self.send_header(header, value)
-        self.end_headers()
-
-        transfer_encoding = response.info().get("Transfer-encoding")
-        self.transfer_data(response_read, self.wfile.write,
-                           chunked = (transfer_encoding == "chunked"))
-        self.log_debug("request finished")
-
-    def transfer_data(self, read, write, length=None, chunked=False):
-        """Transfer data from one 'file' to another in chunks
-
-        The files are given by their read and write methods so it
-        doesn't have to be a file.  The read parameter must be callable
-        with an integer argument indicating the maximum number of bytes
-        to read and the write parameter must be callable with a string.
-        If the parameter chunked is true, the method uses the 'chunked'
-        transfer encoding when writing the data.
-        """
-
-        # wrap the read/write functions if debug logging is active so
-        # that the data read from the server and written to the client
-        # is logged.
-        if self.debug_level > 0:
-            orig_read = read
-            orig_write = write
-
-            def limit_length(data):
-                """Returns a shortened version of the string data"""
-                if self.debug_level < 2 and len(data) > 13:
-                    data = "%s ... %x bytes ... %s" % (data[:5],
-                                                       len(data) - 10,
-                                                       data[-5:])
-                return data
-
-            def read(length):
-                data = orig_read(length)
-                self.log_debug("from server: %r", limit_length(data))
-                return data
-            def write(data):
-                self.log_debug("to client: %r", limit_length(data))
-                orig_write(data)
-
-        # Now transfer the data in blocks of max_chunk_size
-        max_chunk_size = 4096
-        while 1:
-            if length is not None:
-                chunk_size = min(length, max_chunk_size)
-            else:
-                chunk_size = max_chunk_size
-            chunk = read(chunk_size)
-            if not chunk:
-                break
-            if length is not None:
-                length -= len(chunk)
-            if chunked:
-                write("%x\r\n" % len(chunk))
-            write(chunk)
-            if chunked:
-                write("\r\n")
-        if chunked:
-            write("0\r\n\r\n")
-
-    def log_exception(self, exc_info):
-        """Log an exception
-
-        This method produces a log message via self.log_message and then
-        prints a the exception given by exc_info, which should be the
-        return value of sys.exc_info, to stderr.  By default log_message
-        also writes to stderr, so both the message and the traceback
-        should go to the same place.
-        """
-        self.log_message("Exception for previous 500 code\n")
-        traceback.print_exception(*(exc_info + (None, sys.stderr)))
-        sys.stderr.flush()
-
-    def log_debug(self, template, *args):
-        if self.debug_level > 0:
-            message = "DEBUG: " + template % args
-            message = message.replace("%", "%%")
-            self.log_message(message)
-
-
-# same as the BaseHTTPRequestHandler method, but as a standalone function:
-def log_date_time_string():
-    """Return the current time formatted for logging."""
-    now = time.time()
-    year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
-    s = "%02d/%3s/%04d %02d:%02d:%02d" % (
-        day, BaseHTTPServer.BaseHTTPRequestHandler.monthname[month],
-        year, hh, mm, ss)
-    return s
-
-
-class MasterWorkerServer(HTTPServer):
-
-    do_shutdown = False
-
-    def __init__(self, server_address, RequestHandlerClass, num_workers):
-        HTTPServer.__init__(self, server_address, RequestHandlerClass)
-        self.thread_pool = ThreadPool(num_workers, lambda f: f())
-        sys.stderr.write("[%s] starting %d worker threads\n" \
-                         % (log_date_time_string(), num_workers))
-        self.thread_pool.start()
-
-    def process_request(self, request, client_address):
-        """Put the request into the queue to be handled by the worker thread
-        """
-        def process_in_worker():
-            try:
-                self.finish_request(request, client_address)
-                self.close_request(request)
-            except:
-                self.handle_error(request, client_address)
-                self.close_request(request)
-        sys.stderr.write("[%s] queue contains %d items\n" %
-                         (log_date_time_string(),
-                          self.thread_pool.queue.qsize()))
-        self.thread_pool.put(process_in_worker)
-
-    def handle_error(self, request, client_address):
-        """Override to integrate the error reporting better with the other logs
-        """
-        sys.stderr.write("[%s] Exception while handling request from %r\n"
-                         % (log_date_time_string(), client_address))
-        traceback.print_exception(*(sys.exc_info() + (None, sys.stderr)))
-        sys.stderr.flush()
-
-
 class IntePasswordManager(urllib2.HTTPPasswordMgr):
 
     def find_user_password(self, realm, authuri):

Added: trunk/inteproxy/proxycore.py
===================================================================
--- trunk/inteproxy/proxycore.py	2007-04-24 12:55:09 UTC (rev 62)
+++ trunk/inteproxy/proxycore.py	2007-04-24 14:16:15 UTC (rev 63)
@@ -0,0 +1,300 @@
+#! /usr/bin/python
+# Copyright (C) 2006, 2007 by Intevation GmbH
+# Authors:
+# Bernhard Herzog <bh at intevation.de>
+#
+# This program is free software under the GPL (>=v2)
+# Read the file COPYING coming with the software for details.
+
+"""The HTTP proxy at the core of InteProxy"""
+
+import sys
+import traceback
+import time
+import urllib2
+import BaseHTTPServer
+import socket
+
+from inteproxy.transcoder import transcoder_map
+from inteproxy.threadpool import ThreadPool
+from inteproxy.feesdialog import handle_fees_and_access_constraints
+from inteproxy.httpserver import HTTPServer
+
+
+# same as the BaseHTTPRequestHandler method, but as a standalone function:
+def log_date_time_string():
+    """Return the current time formatted for logging."""
+    now = time.time()
+    year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
+    s = "%02d/%3s/%04d %02d:%02d:%02d" % (
+        day, BaseHTTPServer.BaseHTTPRequestHandler.monthname[month],
+        year, hh, mm, ss)
+    return s
+
+
+class InteProxyHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+
+    # Whether the SHUTDOWN method is allowed.  If allowed, anybody who can
+    # connect to the server can shutdown the server.
+    allow_shutdown = False
+
+    # Whether debug output should be logged.  Supported values:
+    #     0    no debug output
+    #     1    normal debug level with abbreviated log of the data
+    #          transferred
+    #   >=2    Also logs all the data transferred by the proxy
+    #
+    debug_level = 0
+
+    def do_SHUTDOWN(self):
+        """Shutdown the server if self.allow_shutdown is True (default False).
+
+        To shutdown the server, this method calls the server's
+        server_close method.
+        """
+        if self.allow_shutdown:
+            self.server.server_close()
+            self.send_response(200)
+            # somewhat ugly hack: connect to this server, to unblock the
+            # accept call.
+            s = socket.socket(socket.AF_INET)
+            s.connect(("localhost", self.server.server_port))
+            s.close()
+        else:
+            self.send_error(400)
+
+    def do_GET(self):
+        """Handle a GET request"""
+        self.handle_proxy_request("GET")
+
+    def do_POST(self):
+        """Handle a POST request"""
+        self.handle_proxy_request("POST")
+
+    def handle_proxy_request(self, method):
+        """Handle a request that needs to be 'proxied'
+
+        Method should be either 'POST' or 'GET'.  Both methods are
+        handled in the same way.  The real host to connect to is
+        determined from the url: The path component starts with the real
+        hostname.  The method strips that hostname from the path and
+        connects to the new host.  All headers from the HTTP request and
+        any extra data are passed through to the remote host and the
+        reponse is likewise simply copied through.  The only exception
+        is the Host: header from the original request.  It would be
+        wrong, so it is removed and a new one is generated pointing to
+        the real remote host.
+        """
+        self.log_debug("Got %s request for %s", method, self.path)
+        for header, value in self.headers.items():
+            self.log_debug("header from client: %s:%r", header, value)
+
+        #
+        # Create a http request for the real location
+        #
+        transcoder = transcoder_map.get_transcoder(method, self.path)
+        remote_url = transcoder.get_url()
+        self.log_debug("Converted url: %r", remote_url)
+
+        # If any data is associated with the request read it
+        length = int(self.headers.getheader("Content-Length", "0"))
+        if length:
+            # FIXME: check whether length bytes were read
+            data = self.rfile.read(length)
+            self.log_debug("body of client request (%d bytes):\n%r",
+                           length, data)
+            content_type = self.headers.getheader("Content-Type")
+            content_type, data = transcoder.convert_body(content_type, data)
+            self.log_debug("modified body of client request (%d bytes):\n%r",
+                           len(data), data)
+            if len(data) != length:
+                self.headers["Content-length"] = str(len(data))
+            if content_type is not None:
+                self.headers["Content-type"] = content_type
+        else:
+            self.log_debug("client request has no body")
+            data = None
+
+        request = urllib2.Request(remote_url)
+        for header, value in self.headers.items():
+            if header.lower() == "host":
+                # the host header will be set by httplib and it should
+                # not be set twice, so we omit it.  The value we
+                # received from the client would have been wrong anyway
+                # as it refers to the host this proxy is running on.
+                continue
+            elif header.lower() in ("content-length", "content-type"):
+                # When a post request is redirected with the 302 code,
+                # urllib2 will do a GET request for the new url even
+                # when the original request was POST.  This does not
+                # conform to RFC 2616, but it's apparently what many
+                # clients do and some servers expect (e.g. a POST
+                # request to http://intevation.de/roundup/kolab/ is
+                # redirected to
+                # https://intevation.de/roundup/kolab/issue<num> and the
+                # server expects a GET request for that.).  However,
+                # when doing the new request urllib2 copies all headers
+                # except for those added with add_unredirected_header.
+                # The content-length and content-type headers are only
+                # needed for POST request so we have to make sure
+                # they're not sent when redirection occurs.
+                request.add_unredirected_header(header, value)
+            else:
+                request.add_header(header, value)
+
+        if data is not None:
+            request.add_data(data)
+
+        self.log_debug("request sent")
+
+        #
+        # Retrieve the url described by the request and pass everything
+        # through to the client.
+        #
+        try:
+            response = urllib2.urlopen(request)
+        except urllib2.HTTPError, err:
+            # a HTTPError is a valid http response object, so we can
+            # treat it like a normal response.
+            response = err
+        self.log_debug("received response: %s: %r", response.code,
+                       response.msg)
+
+        # check for fees and access constraints and run a dialog
+        response_read = handle_fees_and_access_constraints(remote_url, response)
+
+        # Ideally, the HTTP version in our reply to the client should be
+        # what the remote server used in its reply.  Unfortunately,
+        # there doesn't seem to be a way to get that information from
+        # urllib2.  So we pretend that the reply uses the same version
+        # as the request we got from the client.
+        #
+        # If we do not set the version of the reply,
+        # BaseHTTPRequestHandler will use HTTP/1.0 which can lead to
+        # problems if the real version is HTTP/1.1, because the server
+        # might send the data using the chunked transfer encoding which
+        # not alled in HTTP/1.0.  Some clients, including Firefox 1.5,
+        # produce errors in that case.
+        #
+        # The HTTP version in the reply generated by send_response is
+        # taken from self.protocol_version.
+        self.protocol_version = self.request_version
+        self.send_response(response.code, response.msg)
+
+        for header, value in response.info().items():
+            self.log_debug("received header: %s:%r", header, value)
+            self.send_header(header, value)
+        self.end_headers()
+
+        transfer_encoding = response.info().get("Transfer-encoding")
+        self.transfer_data(response_read, self.wfile.write,
+                           chunked = (transfer_encoding == "chunked"))
+        self.log_debug("request finished")
+
+    def transfer_data(self, read, write, length=None, chunked=False):
+        """Transfer data from one 'file' to another in chunks
+
+        The files are given by their read and write methods so it
+        doesn't have to be a file.  The read parameter must be callable
+        with an integer argument indicating the maximum number of bytes
+        to read and the write parameter must be callable with a string.
+        If the parameter chunked is true, the method uses the 'chunked'
+        transfer encoding when writing the data.
+        """
+
+        # wrap the read/write functions if debug logging is active so
+        # that the data read from the server and written to the client
+        # is logged.
+        if self.debug_level > 0:
+            orig_read = read
+            orig_write = write
+
+            def limit_length(data):
+                """Returns a shortened version of the string data"""
+                if self.debug_level < 2 and len(data) > 13:
+                    data = "%s ... %x bytes ... %s" % (data[:5],
+                                                       len(data) - 10,
+                                                       data[-5:])
+                return data
+
+            def read(length):
+                data = orig_read(length)
+                self.log_debug("from server: %r", limit_length(data))
+                return data
+            def write(data):
+                self.log_debug("to client: %r", limit_length(data))
+                orig_write(data)
+
+        # Now transfer the data in blocks of max_chunk_size
+        max_chunk_size = 4096
+        while 1:
+            if length is not None:
+                chunk_size = min(length, max_chunk_size)
+            else:
+                chunk_size = max_chunk_size
+            chunk = read(chunk_size)
+            if not chunk:
+                break
+            if length is not None:
+                length -= len(chunk)
+            if chunked:
+                write("%x\r\n" % len(chunk))
+            write(chunk)
+            if chunked:
+                write("\r\n")
+        if chunked:
+            write("0\r\n\r\n")
+
+    def log_exception(self, exc_info):
+        """Log an exception
+
+        This method produces a log message via self.log_message and then
+        prints a the exception given by exc_info, which should be the
+        return value of sys.exc_info, to stderr.  By default log_message
+        also writes to stderr, so both the message and the traceback
+        should go to the same place.
+        """
+        self.log_message("Exception for previous 500 code\n")
+        traceback.print_exception(*(exc_info + (None, sys.stderr)))
+        sys.stderr.flush()
+
+    def log_debug(self, template, *args):
+        if self.debug_level > 0:
+            message = "DEBUG: " + template % args
+            message = message.replace("%", "%%")
+            self.log_message(message)
+
+
+class MasterWorkerServer(HTTPServer):
+
+    do_shutdown = False
+
+    def __init__(self, server_address, RequestHandlerClass, num_workers):
+        HTTPServer.__init__(self, server_address, RequestHandlerClass)
+        self.thread_pool = ThreadPool(num_workers, lambda f: f())
+        sys.stderr.write("[%s] starting %d worker threads\n" \
+                         % (log_date_time_string(), num_workers))
+        self.thread_pool.start()
+
+    def process_request(self, request, client_address):
+        """Put the request into the queue to be handled by the worker thread
+        """
+        def process_in_worker():
+            try:
+                self.finish_request(request, client_address)
+                self.close_request(request)
+            except:
+                self.handle_error(request, client_address)
+                self.close_request(request)
+        sys.stderr.write("[%s] queue contains %d items\n" %
+                         (log_date_time_string(),
+                          self.thread_pool.queue.qsize()))
+        self.thread_pool.put(process_in_worker)
+
+    def handle_error(self, request, client_address):
+        """Override to integrate the error reporting better with the other logs
+        """
+        sys.stderr.write("[%s] Exception while handling request from %r\n"
+                         % (log_date_time_string(), client_address))
+        traceback.print_exception(*(sys.exc_info() + (None, sys.stderr)))
+        sys.stderr.flush()


Property changes on: trunk/inteproxy/proxycore.py
___________________________________________________________________
Name: svn:keywords
   + Id Revision
Name: svn:eol-style
   + native