Skip to content

Commit

Permalink
bpo-27397: Make email module properly handle invalid-length base64 st…
Browse files Browse the repository at this point in the history
…rings (python#7583)

When attempting to base64-decode a payload of invalid length (1 mod 4),
properly recognize and handle it.  The given data will be returned as-is,
i.e. not decoded, along with a new defect, InvalidBase64LengthDefect.
  • Loading branch information
taleinat authored Jun 12, 2018
1 parent 5a98209 commit c3f55be
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 18 deletions.
4 changes: 4 additions & 0 deletions Doc/library/email.errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
encoded bytes, characters outside the base64 alphabet were encountered.
The characters are ignored, but the resulting decoded bytes may be invalid.

* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
bytes, the number of non-padding base64 characters was invalid (1 more than
a multiple of 4). The encoded block was kept as-is.
48 changes: 30 additions & 18 deletions Lib/email/_encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,30 +98,42 @@ def len_q(bstring):
#

def decode_b(encoded):
defects = []
# First try encoding with validate=True, fixing the padding if needed.
# This will succeed only if encoded includes no invalid characters.
pad_err = len(encoded) % 4
if pad_err:
defects.append(errors.InvalidBase64PaddingDefect())
padded_encoded = encoded + b'==='[:4-pad_err]
else:
padded_encoded = encoded
missing_padding = b'==='[:4-pad_err] if pad_err else b''
try:
return base64.b64decode(padded_encoded, validate=True), defects
return (
base64.b64decode(encoded + missing_padding, validate=True),
[errors.InvalidBase64PaddingDefect()] if pad_err else [],
)
except binascii.Error:
# Since we had correct padding, this must an invalid char error.
defects = [errors.InvalidBase64CharactersDefect()]
# Since we had correct padding, this is likely an invalid char error.
#
# The non-alphabet characters are ignored as far as padding
# goes, but we don't know how many there are. So we'll just
# try various padding lengths until something works.
for i in 0, 1, 2, 3:
# goes, but we don't know how many there are. So try without adding
# padding to see if it works.
try:
return (
base64.b64decode(encoded, validate=False),
[errors.InvalidBase64CharactersDefect()],
)
except binascii.Error:
# Add as much padding as could possibly be necessary (extra padding
# is ignored).
try:
return base64.b64decode(encoded+b'='*i, validate=False), defects
return (
base64.b64decode(encoded + b'==', validate=False),
[errors.InvalidBase64CharactersDefect(),
errors.InvalidBase64PaddingDefect()],
)
except binascii.Error:
if i==0:
defects.append(errors.InvalidBase64PaddingDefect())
else:
# This should never happen.
raise AssertionError("unexpected binascii.Error")
# This only happens when the encoded string's length is 1 more
# than a multiple of 4, which is invalid.
#
# bpo-27397: Just return the encoded string since there's no
# way to decode.
return encoded, [errors.InvalidBase64LengthDefect()]

def encode_b(bstring):
return base64.b64encode(bstring).decode('ascii')
Expand Down
3 changes: 3 additions & 0 deletions Lib/email/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
class InvalidBase64CharactersDefect(MessageDefect):
"""base64 encoded sequence had characters not in base64 alphabet"""

class InvalidBase64LengthDefect(MessageDefect):
"""base64 encoded sequence had invalid length (1 mod 4)"""

# These errors are specific to header parsing.

class HeaderDefect(MessageDefect):
Expand Down
6 changes: 6 additions & 0 deletions Lib/test/test_email/test__encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def test_simple(self):
self._test(b'Zm9v', b'foo')

def test_missing_padding(self):
# 1 missing padding character
self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
# 2 missing padding characters
self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])

def test_invalid_character(self):
self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])
Expand All @@ -42,6 +45,9 @@ def test_invalid_character_and_bad_padding(self):
self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
errors.InvalidBase64PaddingDefect])

def test_invalid_length(self):
self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])


class TestDecode(TestEmailBase):

Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,15 @@ def test_get_unstructured_invalid_base64_character_and_bad_padding(self):
errors.InvalidBase64PaddingDefect],
'')

def test_get_unstructured_invalid_base64_length(self):
# bpo-27397: Return the encoded string since there's no way to decode.
self._test_get_x(self._get_unst,
'=?utf-8?b?abcde?=',
'abcde',
'abcde',
[errors.InvalidBase64LengthDefect],
'')

def test_get_unstructured_no_whitespace_between_ews(self):
self._test_get_x(self._get_unst,
'=?utf-8?q?foo?==?utf-8?q?bar?=',
Expand Down
17 changes: 17 additions & 0 deletions Lib/test/test_email/test_defect_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,23 @@ def test_invalid_chars_in_base64_payload(self):
self.assertDefectsEqual(self.get_defects(msg),
[errors.InvalidBase64CharactersDefect])

def test_invalid_length_of_base64_payload(self):
source = textwrap.dedent("""\
Subject: test
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
abcde
""")
msg = self._str_msg(source)
with self._raise_point(errors.InvalidBase64LengthDefect):
payload = msg.get_payload(decode=True)
if self.raise_expected: return
self.assertEqual(payload, b'abcde')
self.assertDefectsEqual(self.get_defects(msg),
[errors.InvalidBase64LengthDefect])

def test_missing_ending_boundary(self):
source = textwrap.dedent("""\
To: [email protected]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make email module properly handle invalid-length base64 strings.

0 comments on commit c3f55be

Please sign in to comment.