Skip to content

Commit

Permalink
Simplify the Request class. The basic components of the parsed
Browse files Browse the repository at this point in the history
Request are now available as public attributes, e.g. full_url and
host.  The accessor methods are deprecated.  The implementation
replace the complicated __getattr__ machinery with a _parse() method.

The response from an HTTP request is now an HTTPResponse instance
instead of an addinfourl() wrapper instance.  The wrapper provided
minimal extract functionality and was undocumented.  The API of
addinfourl() was preserved, except for close hooks, by adding a few
methods and public attributes to the HTTPResponse class.
  • Loading branch information
jeremyhylton committed Mar 31, 2009
1 parent 16caab0 commit 6c5e28c
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 125 deletions.
88 changes: 60 additions & 28 deletions Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@
MAXAMOUNT = 1048576

class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in
# http.server.CGIHTTPRequestHandler. Maybe move the code there so
# that it doesn't need to be part of the public API. The API has
# never been defined so this could cause backwards compatibility
# issues.

def getallmatchingheaders(self, name):
"""Find all header lines matching a given header name.
Expand Down Expand Up @@ -261,20 +267,26 @@ class HTTPResponse(io.RawIOBase):
# text following RFC 2047. The basic status line parsing only
# accepts iso-8859-1.

def __init__(self, sock, debuglevel=0, strict=0, method=None):
# If the response includes a content-length header, we
# need to make sure that the client doesn't read more than the
def __init__(self, sock, debuglevel=0, strict=0, method=None, url=None):
# If the response includes a content-length header, we need to
# make sure that the client doesn't read more than the
# specified number of bytes. If it does, it will block until
# the server times out and closes the connection. (The only
# applies to HTTP/1.1 connections.) This will happen if a self.fp.read()
# is done (without a size) whether self.fp is buffered or not.
# So, no self.fp.read() by clients unless they know what they are doing.
# the server times out and closes the connection. This will
# happen if a self.fp.read() is done (without a size) whether
# self.fp is buffered or not. So, no self.fp.read() by
# clients unless they know what they are doing.
self.fp = sock.makefile("rb")
self.debuglevel = debuglevel
self.strict = strict
self._method = method

self.msg = None
# The HTTPResponse object is returned via urllib. The clients
# of http and urllib expect different attributes for the
# headers. headers is used here and supports urllib. msg is
# provided as a backwards compatibility layer for http
# clients.

self.headers = self.msg = None

# from the Status-Line of the response
self.version = _UNKNOWN # HTTP-Version
Expand Down Expand Up @@ -326,7 +338,7 @@ def _read_status(self):
return version, status, reason

def begin(self):
if self.msg is not None:
if self.headers is not None:
# we've already started reading the response
return

Expand All @@ -343,7 +355,7 @@ def begin(self):
if self.debuglevel > 0:
print("header:", skip)

self.status = status
self.code = self.status = status
self.reason = reason.strip()
if version == "HTTP/1.0":
self.version = 10
Expand All @@ -358,17 +370,17 @@ def begin(self):
self.length = None
self.chunked = False
self.will_close = True
self.msg = email.message_from_string('')
self.headers = self.msg = email.message_from_string('')
return

self.msg = parse_headers(self.fp)
self.headers = self.msg = parse_headers(self.fp)

if self.debuglevel > 0:
for hdr in self.msg:
for hdr in self.headers:
print("header:", hdr, end=" ")

# are we using the chunked-style of transfer encoding?
tr_enc = self.msg.get("transfer-encoding")
tr_enc = self.headers.get("transfer-encoding")
if tr_enc and tr_enc.lower() == "chunked":
self.chunked = True
self.chunk_left = None
Expand All @@ -381,10 +393,10 @@ def begin(self):
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
self.length = None
length = self.msg.get("content-length")
length = self.headers.get("content-length")

# are we using the chunked-style of transfer encoding?
tr_enc = self.msg.get("transfer-encoding")
tr_enc = self.headers.get("transfer-encoding")
if length and not self.chunked:
try:
self.length = int(length)
Expand All @@ -411,11 +423,11 @@ def begin(self):
self.will_close = True

def _check_close(self):
conn = self.msg.get("connection")
conn = self.headers.get("connection")
if self.version == 11:
# An HTTP/1.1 proxy is assumed to stay open unless
# explicitly closed.
conn = self.msg.get("connection")
conn = self.headers.get("connection")
if conn and "close" in conn.lower():
return True
return False
Expand All @@ -424,7 +436,7 @@ def _check_close(self):
# connections, using rules different than HTTP/1.1.

# For older HTTP, Keep-Alive indicates persistent connection.
if self.msg.get("keep-alive"):
if self.headers.get("keep-alive"):
return False

# At least Akamai returns a "Connection: Keep-Alive" header,
Expand All @@ -433,7 +445,7 @@ def _check_close(self):
return False

# Proxy-Connection is a netscape hack.
pconn = self.msg.get("proxy-connection")
pconn = self.headers.get("proxy-connection")
if pconn and "keep-alive" in pconn.lower():
return False

Expand Down Expand Up @@ -584,21 +596,31 @@ def fileno(self):
return self.fp.fileno()

def getheader(self, name, default=None):
if self.msg is None:
if self.headers is None:
raise ResponseNotReady()
return ', '.join(self.msg.get_all(name, default))
return ', '.join(self.headers.get_all(name, default))

def getheaders(self):
"""Return list of (header, value) tuples."""
if self.msg is None:
if self.headers is None:
raise ResponseNotReady()
return list(self.msg.items())
return list(self.headers.items())

# We override IOBase.__iter__ so that it doesn't check for closed-ness

def __iter__(self):
return self

# For compatibility with old-style urllib responses.

def info(self):
return self.headers

def geturl(self):
return self.url

def getcode(self):
return self.status

class HTTPConnection:

Expand Down Expand Up @@ -757,7 +779,7 @@ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
if self.__state == _CS_IDLE:
self.__state = _CS_REQ_STARTED
else:
raise CannotSendRequest()
raise CannotSendRequest(self.__state)

# Save the method we use, we need it later in the response phase
self._method = method
Expand Down Expand Up @@ -906,13 +928,23 @@ def _send_request(self, method, url, body, headers):
self.endheaders(body)

def getresponse(self):
"""Get the response from the server."""
"""Get the response from the server.
If the HTTPConnection is in the correct state, returns an
instance of HTTPResponse or of whatever object is returned by
class the response_class variable.
If a request has not been sent or if a previous response has
not be handled, ResponseNotReady is raised. If the HTTP
response indicates that the connection should be closed, then
it will be closed before the response is returned. When the
connection is closed, the underlying socket is closed.
"""

# if a prior response has been completed, then forget about it.
if self.__response and self.__response.isclosed():
self.__response = None

#
# if a prior response exists, then it must be completed (otherwise, we
# cannot read this response's header to determine the connection-close
# behavior)
Expand All @@ -929,7 +961,7 @@ def getresponse(self):
# isclosed() status to become true.
#
if self.__state != _CS_REQ_SENT or self.__response:
raise ResponseNotReady()
raise ResponseNotReady(self.__state)

if self.debuglevel > 0:
response = self.response_class(self.sock, self.debuglevel,
Expand Down
5 changes: 0 additions & 5 deletions Lib/test/test_http_cookiejar.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,11 +583,6 @@ def test_request_host(self):
req = urllib.request.Request("http://www.acme.com/",
headers={"Host": "irrelevant.com"})
self.assertEquals(request_host(req), "www.acme.com")
# not actually sure this one is valid Request object, so maybe should
# remove test for no host in url in request_host function?
req = urllib.request.Request("/resource.html",
headers={"Host": "www.acme.com"})
self.assertEquals(request_host(req), "www.acme.com")
# port shouldn't be in request-host
req = urllib.request.Request("http://www.acme.com:2345/resource.html",
headers={"Host": "www.acme.com:5432"})
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,13 @@ def __init__(self, fp, msg, status, reason):
self.msg = msg
self.status = status
self.reason = reason
self.code = 200
def read(self):
return ''
def info(self):
return {}
def geturl(self):
return self.url
class MockHTTPClass:
def __init__(self):
self.level = 0
Expand Down
Loading

0 comments on commit 6c5e28c

Please sign in to comment.