Skip to content

Commit

Permalink
Fix some string encoding issues with entity bodies in HTTP requests.
Browse files Browse the repository at this point in the history
RFC 2616 says that iso-8859-1 is the default charset for HTTP entity
bodies, but we encoded strings using ascii.  See
http://bugs.python.org/issue5314.  Changed docs and code to use
iso-8859-1.

Also fix some brokenness with passing a file as the body instead of a
string.

Add tests to show that some of this behavior actually works.
  • Loading branch information
jeremyhylton committed Mar 27, 2009
1 parent 98eb6c2 commit 236654b
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 19 deletions.
24 changes: 16 additions & 8 deletions Doc/library/http.client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -351,14 +351,22 @@ HTTPConnection Objects

.. method:: HTTPConnection.request(method, url[, body[, headers]])

This will send a request to the server using the HTTP request method *method*
and the selector *url*. If the *body* argument is present, it should be a
string of data to send after the headers are finished. Alternatively, it may
be an open file object, in which case the contents of the file is sent; this
file object should support ``fileno()`` and ``read()`` methods. The header
Content-Length is automatically set to the correct value. The *headers*
argument should be a mapping of extra HTTP headers to send with the request.

This will send a request to the server using the HTTP request
method *method* and the selector *url*. If the *body* argument is
present, it should be string or bytes object of data to send after
the headers are finished. Strings are encoded as ISO-8859-1, the
default charset for HTTP. To use other encodings, pass a bytes
object. The Content-Length header is set to the length of the
string.

The *body* may also be an open file object, in which case the
contents of the file is sent; this file object should support
``fileno()`` and ``read()`` methods. The header Content-Length is
automatically set to the length of the file as reported by
stat.

The *headers* argument should be a mapping of extra HTTP
headers to send with the request.

.. method:: HTTPConnection.getresponse()

Expand Down
30 changes: 20 additions & 10 deletions Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
if line in (b'\r\n', b'\n', b''):
break
hstring = b''.join(headers).decode('iso-8859-1')

return email.parser.Parser(_class=_class).parsestr(hstring)

class HTTPResponse(io.RawIOBase):
Expand Down Expand Up @@ -675,13 +674,22 @@ def send(self, str):
if self.debuglevel > 0:
print("send:", repr(str))
try:
blocksize=8192
if hasattr(str,'read') :
if self.debuglevel > 0: print("sendIng a read()able")
data=str.read(blocksize)
while data:
blocksize = 8192
if hasattr(str, "read") :
if self.debuglevel > 0:
print("sendIng a read()able")
encode = False
if "b" not in str.mode:
encode = True
if self.debuglevel > 0:
print("encoding file using iso-8859-1")
while 1:
data = str.read(blocksize)
if not data:
break
if encode:
data = data.encode("iso-8859-1")
self.sock.sendall(data)
data=str.read(blocksize)
else:
self.sock.sendall(str)
except socket.error as v:
Expand Down Expand Up @@ -713,8 +721,8 @@ def _send_output(self, message_body=None):
message_body = None
self.send(msg)
if message_body is not None:
#message_body was not a string (i.e. it is a file) and
#we must run the risk of Nagle
# message_body was not a string (i.e. it is a file), and
# we must run the risk of Nagle.
self.send(message_body)

def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Expand Down Expand Up @@ -904,7 +912,9 @@ def _send_request(self, method, url, body, headers):
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
body = body.encode('ascii')
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = body.encode('iso-8859-1')
self.endheaders(body)

def getresponse(self):
Expand Down
73 changes: 72 additions & 1 deletion Lib/test/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,80 @@ def test_attributes(self):
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
self.assertEqual(h.timeout, 30)

class RequestBodyTest(TestCase):
"""Test cases where a request includes a message body."""

def setUp(self):
self.conn = httplib.HTTPConnection('example.com')
self.sock = FakeSocket("")
self.conn.sock = self.sock

def get_headers_and_fp(self):
f = io.BytesIO(self.sock.data)
f.readline() # read the request line
message = httplib.parse_headers(f)
return message, f

def test_manual_content_length(self):
# Set an incorrect content-length so that we can verify that
# it will not be over-ridden by the library.
self.conn.request("PUT", "/url", "body",
{"Content-Length": "42"})
message, f = self.get_headers_and_fp()
self.assertEqual("42", message.get("content-length"))
self.assertEqual(4, len(f.read()))

def test_ascii_body(self):
self.conn.request("PUT", "/url", "body")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())

def test_latin1_body(self):
self.conn.request("PUT", "/url", "body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())

def test_bytes_body(self):
self.conn.request("PUT", "/url", b"body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())

def test_file_body(self):
f = open(support.TESTFN, "w")
f.write("body")
f.close()
f = open(support.TESTFN)
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())

def test_binary_file_body(self):
f = open(support.TESTFN, "wb")
f.write(b"body\xc1")
f.close()
f = open(support.TESTFN, "rb")
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())

def test_main(verbose=None):
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
HTTPSTimeoutTest)
HTTPSTimeoutTest, RequestBodyTest)

if __name__ == '__main__':
test_main()

0 comments on commit 236654b

Please sign in to comment.