mirror of
https://github.com/python/cpython.git
synced 2024-11-24 02:15:30 +08:00
gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). This fixes or at least ameliorates CVE-2023-27043. --------- Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
parent
6782fc0502
commit
18dfbd0357
@ -65,6 +65,11 @@ of the new API.
|
||||
*email address* parts. Returns a tuple of that information, unless the parse
|
||||
fails, in which case a 2-tuple of ``('', '')`` is returned.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
For security reasons, addresses that were ambiguous and could parse into
|
||||
multiple different addresses now cause ``('', '')`` to be returned
|
||||
instead of only one of the *potential* addresses.
|
||||
|
||||
|
||||
.. function:: formataddr(pair, charset='utf-8')
|
||||
|
||||
@ -87,7 +92,7 @@ of the new API.
|
||||
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
|
||||
*fieldvalues* is a sequence of header field values as might be returned by
|
||||
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
|
||||
example that gets all the recipients of a message::
|
||||
example that gets all the recipients of a message:
|
||||
|
||||
from email.utils import getaddresses
|
||||
|
||||
@ -97,6 +102,25 @@ of the new API.
|
||||
resent_ccs = msg.get_all('resent-cc', [])
|
||||
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
|
||||
|
||||
When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
|
||||
is returned in its place. Other errors in parsing the list of
|
||||
addresses such as a fieldvalue seemingly parsing into multiple
|
||||
addresses may result in a list containing a single empty 2-tuple
|
||||
``[('', '')]`` being returned rather than returning potentially
|
||||
invalid output.
|
||||
|
||||
Example malformed input parsing:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> from email.utils import getaddresses
|
||||
>>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
|
||||
[('', '')]
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
The 2-tuple of ``('', '')`` in the returned values when parsing
|
||||
fails were added as to address a security issue.
|
||||
|
||||
|
||||
.. function:: parsedate(date)
|
||||
|
||||
|
@ -570,6 +570,14 @@ dis
|
||||
:data:`~dis.hasarg` collection instead.
|
||||
(Contributed by Irit Katriel in :gh:`94216`.)
|
||||
|
||||
email
|
||||
-----
|
||||
|
||||
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
|
||||
``('', '')`` 2-tuples in more situations where invalid email addresses are
|
||||
encountered instead of potentially inaccurate values.
|
||||
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
|
||||
|
||||
fractions
|
||||
---------
|
||||
|
||||
|
@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'):
|
||||
return address
|
||||
|
||||
|
||||
def _pre_parse_validation(email_header_fields):
|
||||
accepted_values = []
|
||||
for v in email_header_fields:
|
||||
s = v.replace('\\(', '').replace('\\)', '')
|
||||
if s.count('(') != s.count(')'):
|
||||
v = "('', '')"
|
||||
accepted_values.append(v)
|
||||
|
||||
return accepted_values
|
||||
|
||||
|
||||
def _post_parse_validation(parsed_email_header_tuples):
|
||||
accepted_values = []
|
||||
# The parser would have parsed a correctly formatted domain-literal
|
||||
# The existence of an [ after parsing indicates a parsing failure
|
||||
for v in parsed_email_header_tuples:
|
||||
if '[' in v[1]:
|
||||
v = ('', '')
|
||||
accepted_values.append(v)
|
||||
|
||||
return accepted_values
|
||||
|
||||
|
||||
def getaddresses(fieldvalues):
|
||||
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
||||
all = COMMASPACE.join(str(v) for v in fieldvalues)
|
||||
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
|
||||
|
||||
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
|
||||
its place.
|
||||
|
||||
If the resulting list of parsed address is not the same as the number of
|
||||
fieldvalues in the input list a parsing error has occurred. A list
|
||||
containing a single empty 2-tuple [('', '')] is returned in its place.
|
||||
This is done to avoid invalid output.
|
||||
"""
|
||||
fieldvalues = [str(v) for v in fieldvalues]
|
||||
fieldvalues = _pre_parse_validation(fieldvalues)
|
||||
all = COMMASPACE.join(v for v in fieldvalues)
|
||||
a = _AddressList(all)
|
||||
return a.addresslist
|
||||
result = _post_parse_validation(a.addresslist)
|
||||
|
||||
n = 0
|
||||
for v in fieldvalues:
|
||||
n += v.count(',') + 1
|
||||
|
||||
if len(result) != n:
|
||||
return [('', '')]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _format_timetuple_and_zone(timetuple, zone):
|
||||
@ -212,9 +254,18 @@ def parseaddr(addr):
|
||||
Return a tuple of realname and email address, unless the parse fails, in
|
||||
which case return a 2-tuple of ('', '').
|
||||
"""
|
||||
addrs = _AddressList(addr).addresslist
|
||||
if not addrs:
|
||||
return '', ''
|
||||
if isinstance(addr, list):
|
||||
addr = addr[0]
|
||||
|
||||
if not isinstance(addr, str):
|
||||
return ('', '')
|
||||
|
||||
addr = _pre_parse_validation([addr])[0]
|
||||
addrs = _post_parse_validation(_AddressList(addr).addresslist)
|
||||
|
||||
if not addrs or len(addrs) > 1:
|
||||
return ('', '')
|
||||
|
||||
return addrs[0]
|
||||
|
||||
|
||||
|
@ -3319,15 +3319,90 @@ Foo
|
||||
[('Al Person', 'aperson@dom.ain'),
|
||||
('Bud Person', 'bperson@dom.ain')])
|
||||
|
||||
def test_getaddresses_parsing_errors(self):
|
||||
"""Test for parsing errors from CVE-2023-27043"""
|
||||
eq = self.assertEqual
|
||||
eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
|
||||
[('', 'alice@example.org'), ('', 'bob@example.com')])
|
||||
eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
|
||||
[('', '')])
|
||||
|
||||
def test_parseaddr_parsing_errors(self):
|
||||
"""Test for parsing errors from CVE-2023-27043"""
|
||||
eq = self.assertEqual
|
||||
eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
|
||||
('', ''))
|
||||
eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
|
||||
('', ''))
|
||||
|
||||
def test_getaddresses_nasty(self):
|
||||
eq = self.assertEqual
|
||||
eq(utils.getaddresses(['foo: ;']), [('', '')])
|
||||
eq(utils.getaddresses(
|
||||
['[]*-- =~$']),
|
||||
[('', ''), ('', ''), ('', '*--')])
|
||||
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
|
||||
eq(utils.getaddresses(
|
||||
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
|
||||
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
|
||||
eq(utils.getaddresses(
|
||||
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
|
||||
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
|
||||
eq(utils.getaddresses(
|
||||
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(
|
||||
['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
|
||||
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
|
||||
eq(utils.getaddresses(
|
||||
['John Doe <jdoe@machine(comment). example>']),
|
||||
[('John Doe (comment)', 'jdoe@machine.example')])
|
||||
eq(utils.getaddresses(
|
||||
['"Mary Smith: Personal Account" <smith@home.example>']),
|
||||
[('Mary Smith: Personal Account', 'smith@home.example')])
|
||||
eq(utils.getaddresses(
|
||||
['Undisclosed recipients:;']),
|
||||
[('', '')])
|
||||
eq(utils.getaddresses(
|
||||
[r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
|
||||
[('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
|
||||
|
||||
def test_getaddresses_embedded_comment(self):
|
||||
"""Test proper handling of a nested comment"""
|
||||
|
@ -0,0 +1,4 @@
|
||||
CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
|
||||
and :func:`email.utils.getaddresses` from returning the realname portion of an
|
||||
invalid RFC2822 email header in the email address portion of the 2-tuple
|
||||
returned after being parsed by :class:`email._parseaddr.AddressList`.
|
Loading…
Reference in New Issue
Block a user