#665194: support roundtripping RFC2822 date stamps in the email.utils module

This commit is contained in:
R David Murray 2011-07-20 11:41:21 -04:00
parent 3e44612726
commit 875048bd4c
5 changed files with 133 additions and 8 deletions

View File

@ -81,6 +81,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
indexes 6, 7, and 8 of the result tuple are not usable. indexes 6, 7, and 8 of the result tuple are not usable.
.. function:: parsedate_to_datetime(date)
The inverse of :func:`format_datetime`. Performs the same function as
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
``datetime``, and if the date is conforming to the RFCs it will represent a
time in UTC but with no indication of the actual source timezone of the
message the date comes from. If the input date has any other valid timezone
offset, the ``datetime`` will be an aware ``datetime`` with the
corresponding a :class:`~datetime.timezone` :class:`~datetime.tzinfo`.
.. versionadded:: 3.3
.. function:: mktime_tz(tuple) .. function:: mktime_tz(tuple)
Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It
@ -112,6 +126,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
``False``. The default is ``False``. ``False``. The default is ``False``.
.. function:: format_datetime(dt, usegmt=False)
Like ``formatdate``, but the input is a :mod:`datetime` instance. If it is
a naive datetime, it is assumed to be "UTC with no information about the
source timezone", and the conventional ``-0000`` is used for the timezone.
If it is an aware ``datetime``, then the numeric timezone offset is used.
If it is an aware timezone with offset zero, then *usegmt* may be set to
``True``, in which case the string ``GMT`` is used instead of the numeric
timezone offset. This provides a way to generate standards conformant HTTP
date headers.
.. versionadded:: 3.3
.. function:: make_msgid(idstring=None, domain=None) .. function:: make_msgid(idstring=None, domain=None)
Returns a string suitable for an :rfc:`2822`\ -compliant Returns a string suitable for an :rfc:`2822`\ -compliant

View File

@ -46,6 +46,21 @@ def parsedate_tz(data):
"""Convert a date string to a time tuple. """Convert a date string to a time tuple.
Accounts for military timezones. Accounts for military timezones.
"""
res = _parsedate_tz(data)
if res[9] is None:
res[9] = 0
return tuple(res)
def _parsedate_tz(data):
"""Convert date to extended time tuple.
The last (additional) element is the time zone offset in seconds, except if
the timezone was specified as -0000. In that case the last element is
None. This indicates a UTC timestamp that explicitly declaims knowledge of
the source timezone, as opposed to a +0000 timestamp that indicates the
source timezone really was UTC.
""" """
data = data.split() data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and # The FWS after the comma after the day-of-week is optional, so search and
@ -138,6 +153,8 @@ def parsedate_tz(data):
tzoffset = int(tz) tzoffset = int(tz)
except ValueError: except ValueError:
pass pass
if tzoffset==0 and tz.startswith('-'):
tzoffset = None
# Convert a timezone offset into seconds ; -0500 -> -18000 # Convert a timezone offset into seconds ; -0500 -> -18000
if tzoffset: if tzoffset:
if tzoffset < 0: if tzoffset < 0:
@ -147,7 +164,7 @@ def parsedate_tz(data):
tzsign = 1 tzsign = 1
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
# Daylight Saving Time flag is set to -1, since DST is unknown. # Daylight Saving Time flag is set to -1, since DST is unknown.
return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
def parsedate(data): def parsedate(data):

View File

@ -11,12 +11,14 @@ __all__ = [
'encode_rfc2231', 'encode_rfc2231',
'formataddr', 'formataddr',
'formatdate', 'formatdate',
'format_datetime',
'getaddresses', 'getaddresses',
'make_msgid', 'make_msgid',
'mktime_tz', 'mktime_tz',
'parseaddr', 'parseaddr',
'parsedate', 'parsedate',
'parsedate_tz', 'parsedate_tz',
'parsedate_to_datetime',
'unquote', 'unquote',
] ]
@ -26,6 +28,7 @@ import time
import base64 import base64
import random import random
import socket import socket
import datetime
import urllib.parse import urllib.parse
import warnings import warnings
from io import StringIO from io import StringIO
@ -37,6 +40,7 @@ from email._parseaddr import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below) # We need wormarounds for bugs in these methods in older Pythons (see below)
from email._parseaddr import parsedate as _parsedate from email._parseaddr import parsedate as _parsedate
from email._parseaddr import parsedate_tz as _parsedate_tz from email._parseaddr import parsedate_tz as _parsedate_tz
from email._parseaddr import _parsedate_tz as __parsedate_tz
from quopri import decodestring as _qdecode from quopri import decodestring as _qdecode
@ -110,6 +114,14 @@ ecre = re.compile(r'''
''', re.VERBOSE | re.IGNORECASE) ''', re.VERBOSE | re.IGNORECASE)
def _format_timetuple_and_zone(timetuple, zone):
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
timetuple[2],
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
timetuple[0], timetuple[3], timetuple[4], timetuple[5],
zone)
def formatdate(timeval=None, localtime=False, usegmt=False): def formatdate(timeval=None, localtime=False, usegmt=False):
"""Returns a date string as specified by RFC 2822, e.g.: """Returns a date string as specified by RFC 2822, e.g.:
@ -154,14 +166,25 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
zone = 'GMT' zone = 'GMT'
else: else:
zone = '-0000' zone = '-0000'
return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( return _format_timetuple_and_zone(now, zone)
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
now[2],
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
now[0], now[3], now[4], now[5],
zone)
def format_datetime(dt, usegmt=False):
"""Turn a datetime into a date string as specified in RFC 2822.
If usegmt is True, dt must be an aware datetime with an offset of zero. In
this case 'GMT' will be rendered instead of the normal +0000 required by
RFC2822. This is to support HTTP headers involving date stamps.
"""
now = dt.timetuple()
if usegmt:
if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
raise ValueError("usegmt option requires a UTC datetime")
zone = 'GMT'
elif dt.tzinfo is None:
zone = '-0000'
else:
zone = dt.strftime("%z")
return _format_timetuple_and_zone(now, zone)
def make_msgid(idstring=None, domain=None): def make_msgid(idstring=None, domain=None):
@ -203,6 +226,15 @@ def parsedate_tz(data):
return None return None
return _parsedate_tz(data) return _parsedate_tz(data)
def parsedate_to_datetime(data):
if not data:
return None
*dtuple, tz = __parsedate_tz(data)
if tz is None:
return datetime.datetime(*dtuple[:6])
return datetime.datetime(*dtuple[:6],
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
def parseaddr(addr): def parseaddr(addr):
addrs = _AddressList(addr).addresslist addrs = _AddressList(addr).addresslist

View File

@ -0,0 +1,45 @@
import datetime
from email import utils
import unittest
class DateTimeTests(unittest.TestCase):
datestring = 'Sun, 23 Sep 2001 20:10:55'
dateargs = (2001, 9, 23, 20, 10, 55)
offsetstring = ' -0700'
utcoffset = datetime.timedelta(hours=-7)
tz = datetime.timezone(utcoffset)
naive_dt = datetime.datetime(*dateargs)
aware_dt = datetime.datetime(*dateargs, tzinfo=tz)
def test_naive_datetime(self):
self.assertEqual(utils.format_datetime(self.naive_dt),
self.datestring + ' -0000')
def test_aware_datetime(self):
self.assertEqual(utils.format_datetime(self.aware_dt),
self.datestring + self.offsetstring)
def test_usegmt(self):
utc_dt = datetime.datetime(*self.dateargs,
tzinfo=datetime.timezone.utc)
self.assertEqual(utils.format_datetime(utc_dt, usegmt=True),
self.datestring + ' GMT')
def test_usegmt_with_naive_datetime_raises(self):
with self.assertRaises(ValueError):
utils.format_datetime(self.naive_dt, usegmt=True)
def test_usegmt_with_non_utc_datetime_raises(self):
with self.assertRaises(ValueError):
utils.format_datetime(self.aware_dt, usegmt=True)
def test_parsedate_to_datetime(self):
self.assertEqual(
utils.parsedate_to_datetime(self.datestring + self.offsetstring),
self.aware_dt)
def test_parsedate_to_datetime_naive(self):
self.assertEqual(
utils.parsedate_to_datetime(self.datestring + ' -0000'),
self.naive_dt)

View File

@ -234,6 +234,9 @@ Core and Builtins
Library Library
------- -------
- Issue #665194: email.utils now has format_datetime and parsedate_to_datetime
functions, allowing for round tripping of RFC2822 format dates.
- Issue #12571: Add a plat-linux3 directory mirroring the plat-linux2 - Issue #12571: Add a plat-linux3 directory mirroring the plat-linux2
directory, so that "import DLFCN" and other similar imports work on directory, so that "import DLFCN" and other similar imports work on
Linux 3.0. Linux 3.0.