mirror of
https://github.com/python/cpython.git
synced 2024-11-27 20:04:41 +08:00
gh-80010: Expand fromisoformat to include most of ISO-8601 (#92177)
This expands `fromisoformat` to cover most of the common uses of ISO 8601. We may expand the scope more in the future.
This commit is contained in:
parent
ada8b6d1b1
commit
1303f8c927
@ -526,18 +526,20 @@ Other constructors, all class methods:
|
||||
|
||||
.. classmethod:: date.fromisoformat(date_string)
|
||||
|
||||
Return a :class:`date` corresponding to a *date_string* given in the format
|
||||
``YYYY-MM-DD``::
|
||||
Return a :class:`date` corresponding to a *date_string* given in any valid
|
||||
ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``)::
|
||||
|
||||
>>> from datetime import date
|
||||
>>> date.fromisoformat('2019-12-04')
|
||||
datetime.date(2019, 12, 4)
|
||||
|
||||
This is the inverse of :meth:`date.isoformat`. It only supports the format
|
||||
``YYYY-MM-DD``.
|
||||
>>> date.fromisoformat('20191204')
|
||||
datetime.date(2019, 12, 4)
|
||||
>>> date.fromisoformat('2021-W01-1')
|
||||
datetime.date(2021, 1, 4)
|
||||
|
||||
.. versionadded:: 3.7
|
||||
|
||||
.. versionchanged:: 3.11
|
||||
Previously, this method only supported the format ``YYYY-MM-DD``.
|
||||
|
||||
.. classmethod:: date.fromisocalendar(year, week, day)
|
||||
|
||||
@ -710,8 +712,6 @@ Instance methods:
|
||||
>>> date(2002, 12, 4).isoformat()
|
||||
'2002-12-04'
|
||||
|
||||
This is the inverse of :meth:`date.fromisoformat`.
|
||||
|
||||
.. method:: date.__str__()
|
||||
|
||||
For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``.
|
||||
@ -994,31 +994,29 @@ Other constructors, all class methods:
|
||||
|
||||
.. classmethod:: datetime.fromisoformat(date_string)
|
||||
|
||||
Return a :class:`.datetime` corresponding to a *date_string* in one of the
|
||||
formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`.
|
||||
Return a :class:`.datetime` corresponding to a *date_string* in any valid
|
||||
ISO 8601 format, with the following exceptions:
|
||||
|
||||
Specifically, this function supports strings in the format:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
YYYY-MM-DD[*HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]]
|
||||
|
||||
where ``*`` can match any single character.
|
||||
|
||||
.. caution::
|
||||
|
||||
This does *not* support parsing arbitrary ISO 8601 strings - it is only intended
|
||||
as the inverse operation of :meth:`datetime.isoformat`. A more full-featured
|
||||
ISO 8601 parser, ``dateutil.parser.isoparse`` is available in the third-party package
|
||||
`dateutil <https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse>`__.
|
||||
1. Time zone offsets may have fractional seconds.
|
||||
2. The `T` separator may be replaced by any single unicode character.
|
||||
3. Ordinal dates are not currently supported.
|
||||
4. Fractional hours and minutes are not supported.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> from datetime import datetime
|
||||
>>> datetime.fromisoformat('2011-11-04')
|
||||
datetime.datetime(2011, 11, 4, 0, 0)
|
||||
>>> datetime.fromisoformat('20111104')
|
||||
datetime.datetime(2011, 11, 4, 0, 0)
|
||||
>>> datetime.fromisoformat('2011-11-04T00:05:23')
|
||||
datetime.datetime(2011, 11, 4, 0, 5, 23)
|
||||
>>> datetime.fromisoformat('2011-11-04T00:05:23Z')
|
||||
datetime.datetime(2011, 11, 4, 0, 5, 23, tzinfo=datetime.timezone.utc)
|
||||
>>> datetime.fromisoformat('20111104T000523')
|
||||
datetime.datetime(2011, 11, 4, 0, 5, 23)
|
||||
>>> datetime.fromisoformat('2011-W01-2T00:05:23.283')
|
||||
datetime.datetime(2011, 1, 4, 0, 5, 23, 283000)
|
||||
>>> datetime.fromisoformat('2011-11-04 00:05:23.283')
|
||||
datetime.datetime(2011, 11, 4, 0, 5, 23, 283000)
|
||||
>>> datetime.fromisoformat('2011-11-04 00:05:23.283+00:00')
|
||||
@ -1028,6 +1026,10 @@ Other constructors, all class methods:
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
|
||||
|
||||
.. versionadded:: 3.7
|
||||
.. versionchanged:: 3.11
|
||||
Previously, this method only supported formats that could be emitted by
|
||||
:meth:`date.isoformat()` or :meth:`datetime.isoformat()`.
|
||||
|
||||
|
||||
.. classmethod:: datetime.fromisocalendar(year, week, day)
|
||||
|
||||
@ -1763,30 +1765,41 @@ Other constructor:
|
||||
|
||||
.. classmethod:: time.fromisoformat(time_string)
|
||||
|
||||
Return a :class:`.time` corresponding to a *time_string* in one of the
|
||||
formats emitted by :meth:`time.isoformat`. Specifically, this function supports
|
||||
strings in the format:
|
||||
Return a :class:`.time` corresponding to a *time_string* in any valid
|
||||
ISO 8601 format, with the following exceptions:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
|
||||
|
||||
.. caution::
|
||||
|
||||
This does *not* support parsing arbitrary ISO 8601 strings. It is only
|
||||
intended as the inverse operation of :meth:`time.isoformat`.
|
||||
1. Time zone offsets may have fractional seconds.
|
||||
2. The leading `T`, normally required in cases where there may be ambiguity between
|
||||
a date and a time, is not required.
|
||||
3. Fractional seconds may have any number of digits (anything beyond 6 will
|
||||
be truncated).
|
||||
4. Fractional hours and minutes are not supported.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> from datetime import time
|
||||
>>> time.fromisoformat('04:23:01')
|
||||
datetime.time(4, 23, 1)
|
||||
>>> time.fromisoformat('T04:23:01')
|
||||
datetime.time(4, 23, 1)
|
||||
>>> time.fromisoformat('T042301')
|
||||
datetime.time(4, 23, 1)
|
||||
>>> time.fromisoformat('04:23:01.000384')
|
||||
datetime.time(4, 23, 1, 384)
|
||||
>>> time.fromisoformat('04:23:01,000')
|
||||
datetime.time(4, 23, 1, 384)
|
||||
>>> time.fromisoformat('04:23:01+04:00')
|
||||
datetime.time(4, 23, 1, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
|
||||
>>> time.fromisoformat('04:23:01Z')
|
||||
datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
|
||||
>>> time.fromisoformat('04:23:01+00:00')
|
||||
datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
|
||||
|
||||
|
||||
.. versionadded:: 3.7
|
||||
.. versionchanged:: 3.11
|
||||
Previously, this method only supported formats that could be emitted by
|
||||
:meth:`time.isoformat()`.
|
||||
|
||||
|
||||
Instance methods:
|
||||
|
@ -425,6 +425,14 @@ asyncio
|
||||
existing stream-based connections to TLS. (Contributed by Ian Good in
|
||||
:issue:`34975`.)
|
||||
|
||||
datetime
|
||||
--------
|
||||
|
||||
* :meth:`datetime.date.fromisoformat`, :meth:`datetime.time.fromisoformat` and
|
||||
:meth:`datetime.datetime.fromisoformat` can now be used to parse most ISO 8601
|
||||
formats (barring only those that support fractional hours and minutes).
|
||||
(Contributed by Paul Ganssle in :gh:`80010`.)
|
||||
|
||||
fractions
|
||||
---------
|
||||
|
||||
|
252
Lib/datetime.py
252
Lib/datetime.py
@ -262,58 +262,150 @@ def _wrap_strftime(object, format, timetuple):
|
||||
return _time.strftime(newformat, timetuple)
|
||||
|
||||
# Helpers for parsing the result of isoformat()
|
||||
def _is_ascii_digit(c):
|
||||
return c in "0123456789"
|
||||
|
||||
def _find_isoformat_datetime_separator(dtstr):
|
||||
# See the comment in _datetimemodule.c:_find_isoformat_datetime_separator
|
||||
len_dtstr = len(dtstr)
|
||||
if len_dtstr == 7:
|
||||
return 7
|
||||
|
||||
assert len_dtstr > 7
|
||||
date_separator = "-"
|
||||
week_indicator = "W"
|
||||
|
||||
if dtstr[4] == date_separator:
|
||||
if dtstr[5] == week_indicator:
|
||||
if len_dtstr < 8:
|
||||
raise ValueError("Invalid ISO string")
|
||||
if len_dtstr > 8 and dtstr[8] == date_separator:
|
||||
if len_dtstr == 9:
|
||||
raise ValueError("Invalid ISO string")
|
||||
if len_dtstr > 10 and _is_ascii_digit(dtstr[10]):
|
||||
# This is as far as we need to resolve the ambiguity for
|
||||
# the moment - if we have YYYY-Www-##, the separator is
|
||||
# either a hyphen at 8 or a number at 10.
|
||||
#
|
||||
# We'll assume it's a hyphen at 8 because it's way more
|
||||
# likely that someone will use a hyphen as a separator than
|
||||
# a number, but at this point it's really best effort
|
||||
# because this is an extension of the spec anyway.
|
||||
# TODO(pganssle): Document this
|
||||
return 8
|
||||
return 10
|
||||
else:
|
||||
# YYYY-Www (8)
|
||||
return 8
|
||||
else:
|
||||
# YYYY-MM-DD (10)
|
||||
return 10
|
||||
else:
|
||||
if dtstr[4] == week_indicator:
|
||||
# YYYYWww (7) or YYYYWwwd (8)
|
||||
idx = 7
|
||||
while idx < len_dtstr:
|
||||
if not _is_ascii_digit(dtstr[idx]):
|
||||
break
|
||||
idx += 1
|
||||
|
||||
if idx < 9:
|
||||
return idx
|
||||
|
||||
if idx % 2 == 0:
|
||||
# If the index of the last number is even, it's YYYYWwwd
|
||||
return 7
|
||||
else:
|
||||
return 8
|
||||
else:
|
||||
# YYYYMMDD (8)
|
||||
return 8
|
||||
|
||||
|
||||
def _parse_isoformat_date(dtstr):
|
||||
# It is assumed that this function will only be called with a
|
||||
# string of length exactly 10, and (though this is not used) ASCII-only
|
||||
# It is assumed that this is an ASCII-only string of lengths 7, 8 or 10,
|
||||
# see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator
|
||||
assert len(dtstr) in (7, 8, 10)
|
||||
year = int(dtstr[0:4])
|
||||
if dtstr[4] != '-':
|
||||
raise ValueError('Invalid date separator: %s' % dtstr[4])
|
||||
has_sep = dtstr[4] == '-'
|
||||
|
||||
month = int(dtstr[5:7])
|
||||
pos = 4 + has_sep
|
||||
if dtstr[pos:pos + 1] == "W":
|
||||
# YYYY-?Www-?D?
|
||||
pos += 1
|
||||
weekno = int(dtstr[pos:pos + 2])
|
||||
pos += 2
|
||||
|
||||
if dtstr[7] != '-':
|
||||
raise ValueError('Invalid date separator')
|
||||
dayno = 1
|
||||
if len(dtstr) > pos:
|
||||
if (dtstr[pos:pos + 1] == '-') != has_sep:
|
||||
raise ValueError("Inconsistent use of dash separator")
|
||||
|
||||
day = int(dtstr[8:10])
|
||||
pos += has_sep
|
||||
|
||||
dayno = int(dtstr[pos:pos + 1])
|
||||
|
||||
return list(_isoweek_to_gregorian(year, weekno, dayno))
|
||||
else:
|
||||
month = int(dtstr[pos:pos + 2])
|
||||
pos += 2
|
||||
if (dtstr[pos:pos + 1] == "-") != has_sep:
|
||||
raise ValueError("Inconsistent use of dash separator")
|
||||
|
||||
pos += has_sep
|
||||
day = int(dtstr[pos:pos + 2])
|
||||
|
||||
return [year, month, day]
|
||||
|
||||
|
||||
_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10]
|
||||
|
||||
return [year, month, day]
|
||||
|
||||
def _parse_hh_mm_ss_ff(tstr):
|
||||
# Parses things of the form HH[:MM[:SS[.fff[fff]]]]
|
||||
# Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]]
|
||||
len_str = len(tstr)
|
||||
|
||||
time_comps = [0, 0, 0, 0]
|
||||
pos = 0
|
||||
for comp in range(0, 3):
|
||||
if (len_str - pos) < 2:
|
||||
raise ValueError('Incomplete time component')
|
||||
raise ValueError("Incomplete time component")
|
||||
|
||||
time_comps[comp] = int(tstr[pos:pos+2])
|
||||
|
||||
pos += 2
|
||||
next_char = tstr[pos:pos+1]
|
||||
|
||||
if comp == 0:
|
||||
has_sep = next_char == ':'
|
||||
|
||||
if not next_char or comp >= 2:
|
||||
break
|
||||
|
||||
if next_char != ':':
|
||||
raise ValueError('Invalid time separator: %c' % next_char)
|
||||
if has_sep and next_char != ':':
|
||||
raise ValueError("Invalid time separator: %c" % next_char)
|
||||
|
||||
pos += 1
|
||||
pos += has_sep
|
||||
|
||||
if pos < len_str:
|
||||
if tstr[pos] != '.':
|
||||
raise ValueError('Invalid microsecond component')
|
||||
if tstr[pos] not in '.,':
|
||||
raise ValueError("Invalid microsecond component")
|
||||
else:
|
||||
pos += 1
|
||||
|
||||
len_remainder = len_str - pos
|
||||
if len_remainder not in (3, 6):
|
||||
raise ValueError('Invalid microsecond component')
|
||||
|
||||
time_comps[3] = int(tstr[pos:])
|
||||
if len_remainder == 3:
|
||||
time_comps[3] *= 1000
|
||||
if len_remainder >= 6:
|
||||
to_parse = 6
|
||||
else:
|
||||
to_parse = len_remainder
|
||||
|
||||
time_comps[3] = int(tstr[pos:(pos+to_parse)])
|
||||
if to_parse < 6:
|
||||
time_comps[3] *= _FRACTION_CORRECTION[to_parse-1]
|
||||
if (len_remainder > to_parse
|
||||
and not all(map(_is_ascii_digit, tstr[(pos+to_parse):]))):
|
||||
raise ValueError("Non-digit values in unparsed fraction")
|
||||
|
||||
return time_comps
|
||||
|
||||
@ -321,27 +413,34 @@ def _parse_isoformat_time(tstr):
|
||||
# Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
|
||||
len_str = len(tstr)
|
||||
if len_str < 2:
|
||||
raise ValueError('Isoformat time too short')
|
||||
raise ValueError("Isoformat time too short")
|
||||
|
||||
# This is equivalent to re.search('[+-]', tstr), but faster
|
||||
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
|
||||
# This is equivalent to re.search('[+-Z]', tstr), but faster
|
||||
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1)
|
||||
timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
|
||||
|
||||
time_comps = _parse_hh_mm_ss_ff(timestr)
|
||||
|
||||
tzi = None
|
||||
if tz_pos > 0:
|
||||
if tz_pos == len_str and tstr[-1] == 'Z':
|
||||
tzi = timezone.utc
|
||||
elif tz_pos > 0:
|
||||
tzstr = tstr[tz_pos:]
|
||||
|
||||
# Valid time zone strings are:
|
||||
# HH len: 2
|
||||
# HHMM len: 4
|
||||
# HH:MM len: 5
|
||||
# HHMMSS len: 6
|
||||
# HHMMSS.f+ len: 7+
|
||||
# HH:MM:SS len: 8
|
||||
# HH:MM:SS.ffffff len: 15
|
||||
# HH:MM:SS.f+ len: 10+
|
||||
|
||||
if len(tzstr) not in (5, 8, 15):
|
||||
raise ValueError('Malformed time zone string')
|
||||
if len(tzstr) in (0, 1, 3):
|
||||
raise ValueError("Malformed time zone string")
|
||||
|
||||
tz_comps = _parse_hh_mm_ss_ff(tzstr)
|
||||
|
||||
if all(x == 0 for x in tz_comps):
|
||||
tzi = timezone.utc
|
||||
else:
|
||||
@ -356,6 +455,38 @@ def _parse_isoformat_time(tstr):
|
||||
|
||||
return time_comps
|
||||
|
||||
# tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar
|
||||
def _isoweek_to_gregorian(year, week, day):
|
||||
# Year is bounded this way because 9999-12-31 is (9999, 52, 5)
|
||||
if not MINYEAR <= year <= MAXYEAR:
|
||||
raise ValueError(f"Year is out of range: {year}")
|
||||
|
||||
if not 0 < week < 53:
|
||||
out_of_range = True
|
||||
|
||||
if week == 53:
|
||||
# ISO years have 53 weeks in them on years starting with a
|
||||
# Thursday and leap years starting on a Wednesday
|
||||
first_weekday = _ymd2ord(year, 1, 1) % 7
|
||||
if (first_weekday == 4 or (first_weekday == 3 and
|
||||
_is_leap(year))):
|
||||
out_of_range = False
|
||||
|
||||
if out_of_range:
|
||||
raise ValueError(f"Invalid week: {week}")
|
||||
|
||||
if not 0 < day < 8:
|
||||
raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
|
||||
|
||||
# Now compute the offset from (Y, 1, 1) in days:
|
||||
day_offset = (week - 1) * 7 + (day - 1)
|
||||
|
||||
# Calculate the ordinal day for monday, week 1
|
||||
day_1 = _isoweek1monday(year)
|
||||
ord_day = day_1 + day_offset
|
||||
|
||||
return _ord2ymd(ord_day)
|
||||
|
||||
|
||||
# Just raise TypeError if the arg isn't None or a string.
|
||||
def _check_tzname(name):
|
||||
@ -847,12 +978,14 @@ class date:
|
||||
|
||||
@classmethod
|
||||
def fromisoformat(cls, date_string):
|
||||
"""Construct a date from the output of date.isoformat()."""
|
||||
"""Construct a date from a string in ISO 8601 format."""
|
||||
if not isinstance(date_string, str):
|
||||
raise TypeError('fromisoformat: argument must be str')
|
||||
|
||||
if len(date_string) not in (7, 8, 10):
|
||||
raise ValueError(f'Invalid isoformat string: {date_string!r}')
|
||||
|
||||
try:
|
||||
assert len(date_string) == 10
|
||||
return cls(*_parse_isoformat_date(date_string))
|
||||
except Exception:
|
||||
raise ValueError(f'Invalid isoformat string: {date_string!r}')
|
||||
@ -862,35 +995,7 @@ class date:
|
||||
"""Construct a date from the ISO year, week number and weekday.
|
||||
|
||||
This is the inverse of the date.isocalendar() function"""
|
||||
# Year is bounded this way because 9999-12-31 is (9999, 52, 5)
|
||||
if not MINYEAR <= year <= MAXYEAR:
|
||||
raise ValueError(f"Year is out of range: {year}")
|
||||
|
||||
if not 0 < week < 53:
|
||||
out_of_range = True
|
||||
|
||||
if week == 53:
|
||||
# ISO years have 53 weeks in them on years starting with a
|
||||
# Thursday and leap years starting on a Wednesday
|
||||
first_weekday = _ymd2ord(year, 1, 1) % 7
|
||||
if (first_weekday == 4 or (first_weekday == 3 and
|
||||
_is_leap(year))):
|
||||
out_of_range = False
|
||||
|
||||
if out_of_range:
|
||||
raise ValueError(f"Invalid week: {week}")
|
||||
|
||||
if not 0 < day < 8:
|
||||
raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
|
||||
|
||||
# Now compute the offset from (Y, 1, 1) in days:
|
||||
day_offset = (week - 1) * 7 + (day - 1)
|
||||
|
||||
# Calculate the ordinal day for monday, week 1
|
||||
day_1 = _isoweek1monday(year)
|
||||
ord_day = day_1 + day_offset
|
||||
|
||||
return cls(*_ord2ymd(ord_day))
|
||||
return cls(*_isoweek_to_gregorian(year, week, day))
|
||||
|
||||
# Conversions to string
|
||||
|
||||
@ -1427,10 +1532,15 @@ class time:
|
||||
|
||||
@classmethod
|
||||
def fromisoformat(cls, time_string):
|
||||
"""Construct a time from the output of isoformat()."""
|
||||
"""Construct a time from a string in one of the ISO 8601 formats."""
|
||||
if not isinstance(time_string, str):
|
||||
raise TypeError('fromisoformat: argument must be str')
|
||||
|
||||
# The spec actually requires that time-only ISO 8601 strings start with
|
||||
# T, but the extended format allows this to be omitted as long as there
|
||||
# is no ambiguity with date strings.
|
||||
time_string = time_string.removeprefix('T')
|
||||
|
||||
try:
|
||||
return cls(*_parse_isoformat_time(time_string))
|
||||
except Exception:
|
||||
@ -1711,24 +1821,30 @@ class datetime(date):
|
||||
|
||||
@classmethod
|
||||
def fromisoformat(cls, date_string):
|
||||
"""Construct a datetime from the output of datetime.isoformat()."""
|
||||
"""Construct a datetime from a string in one of the ISO 8601 formats."""
|
||||
if not isinstance(date_string, str):
|
||||
raise TypeError('fromisoformat: argument must be str')
|
||||
|
||||
# Split this at the separator
|
||||
dstr = date_string[0:10]
|
||||
tstr = date_string[11:]
|
||||
if len(date_string) < 7:
|
||||
raise ValueError(f'Invalid isoformat string: {date_string!r}')
|
||||
|
||||
# Split this at the separator
|
||||
try:
|
||||
separator_location = _find_isoformat_datetime_separator(date_string)
|
||||
dstr = date_string[0:separator_location]
|
||||
tstr = date_string[(separator_location+1):]
|
||||
|
||||
date_components = _parse_isoformat_date(dstr)
|
||||
except ValueError:
|
||||
raise ValueError(f'Invalid isoformat string: {date_string!r}')
|
||||
raise ValueError(
|
||||
f'Invalid isoformat string: {date_string!r}') from None
|
||||
|
||||
if tstr:
|
||||
try:
|
||||
time_components = _parse_isoformat_time(tstr)
|
||||
except ValueError:
|
||||
raise ValueError(f'Invalid isoformat string: {date_string!r}')
|
||||
raise ValueError(
|
||||
f'Invalid isoformat string: {date_string!r}') from None
|
||||
else:
|
||||
time_components = [0, 0, 0, 0, None]
|
||||
|
||||
@ -2509,7 +2625,9 @@ else:
|
||||
_format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math,
|
||||
_ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
|
||||
_divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
|
||||
_parse_hh_mm_ss_ff, _IsoCalendarDate)
|
||||
_parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian,
|
||||
_find_isoformat_datetime_separator, _FRACTION_CORRECTION,
|
||||
_is_ascii_digit)
|
||||
# XXX Since import * above excludes names that start with _,
|
||||
# docstring does not get overwritten. In the future, it may be
|
||||
# appropriate to maintain a single module level docstring and
|
||||
|
@ -7,6 +7,7 @@ import itertools
|
||||
import bisect
|
||||
import copy
|
||||
import decimal
|
||||
import functools
|
||||
import sys
|
||||
import os
|
||||
import pickle
|
||||
@ -1840,6 +1841,41 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
|
||||
|
||||
self.assertEqual(dt, dt_rt)
|
||||
|
||||
def test_fromisoformat_date_examples(self):
|
||||
examples = [
|
||||
('00010101', self.theclass(1, 1, 1)),
|
||||
('20000101', self.theclass(2000, 1, 1)),
|
||||
('20250102', self.theclass(2025, 1, 2)),
|
||||
('99991231', self.theclass(9999, 12, 31)),
|
||||
('0001-01-01', self.theclass(1, 1, 1)),
|
||||
('2000-01-01', self.theclass(2000, 1, 1)),
|
||||
('2025-01-02', self.theclass(2025, 1, 2)),
|
||||
('9999-12-31', self.theclass(9999, 12, 31)),
|
||||
('2025W01', self.theclass(2024, 12, 30)),
|
||||
('2025-W01', self.theclass(2024, 12, 30)),
|
||||
('2025W014', self.theclass(2025, 1, 2)),
|
||||
('2025-W01-4', self.theclass(2025, 1, 2)),
|
||||
('2026W01', self.theclass(2025, 12, 29)),
|
||||
('2026-W01', self.theclass(2025, 12, 29)),
|
||||
('2026W013', self.theclass(2025, 12, 31)),
|
||||
('2026-W01-3', self.theclass(2025, 12, 31)),
|
||||
('2022W52', self.theclass(2022, 12, 26)),
|
||||
('2022-W52', self.theclass(2022, 12, 26)),
|
||||
('2022W527', self.theclass(2023, 1, 1)),
|
||||
('2022-W52-7', self.theclass(2023, 1, 1)),
|
||||
('2015W534', self.theclass(2015, 12, 31)), # Has week 53
|
||||
('2015-W53-4', self.theclass(2015, 12, 31)), # Has week 53
|
||||
('2015-W53-5', self.theclass(2016, 1, 1)),
|
||||
('2020W531', self.theclass(2020, 12, 28)), # Leap year
|
||||
('2020-W53-1', self.theclass(2020, 12, 28)), # Leap year
|
||||
('2020-W53-6', self.theclass(2021, 1, 2)),
|
||||
]
|
||||
|
||||
for input_str, expected in examples:
|
||||
with self.subTest(input_str=input_str):
|
||||
actual = self.theclass.fromisoformat(input_str)
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_fromisoformat_subclass(self):
|
||||
class DateSubclass(self.theclass):
|
||||
pass
|
||||
@ -1862,7 +1898,8 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
|
||||
'2009-12-0a', # Invalid character in day
|
||||
'2009-01-32', # Invalid day
|
||||
'2009-02-29', # Invalid leap day
|
||||
'20090228', # Valid ISO8601 output not from isoformat()
|
||||
'2019-W53-1', # No week 53 in 2019
|
||||
'2020-W54-1', # No week 54
|
||||
'2009\ud80002\ud80028', # Separators are surrogate codepoints
|
||||
]
|
||||
|
||||
@ -3003,6 +3040,140 @@ class TestDateTime(TestDate):
|
||||
dt_rt = self.theclass.fromisoformat(dtstr)
|
||||
self.assertEqual(dt, dt_rt)
|
||||
|
||||
def test_fromisoformat_datetime_examples(self):
|
||||
UTC = timezone.utc
|
||||
BST = timezone(timedelta(hours=1), 'BST')
|
||||
EST = timezone(timedelta(hours=-5), 'EST')
|
||||
EDT = timezone(timedelta(hours=-4), 'EDT')
|
||||
examples = [
|
||||
('2025-01-02', self.theclass(2025, 1, 2, 0, 0)),
|
||||
('2025-01-02T03', self.theclass(2025, 1, 2, 3, 0)),
|
||||
('2025-01-02T03:04', self.theclass(2025, 1, 2, 3, 4)),
|
||||
('2025-01-02T0304', self.theclass(2025, 1, 2, 3, 4)),
|
||||
('2025-01-02T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('2025-01-02T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('2025-01-02T03:04:05.6',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
|
||||
('2025-01-02T03:04:05,6',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
|
||||
('2025-01-02T03:04:05.678',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
|
||||
('2025-01-02T03:04:05.678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2025-01-02T03:04:05,678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2025-01-02T030405.678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2025-01-02T030405,678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2025-01-02T03:04:05.6789010',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2009-04-19T03:15:45.2345',
|
||||
self.theclass(2009, 4, 19, 3, 15, 45, 234500)),
|
||||
('2009-04-19T03:15:45.1234567',
|
||||
self.theclass(2009, 4, 19, 3, 15, 45, 123456)),
|
||||
('2025-01-02T03:04:05,678',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
|
||||
('20250102', self.theclass(2025, 1, 2, 0, 0)),
|
||||
('20250102T03', self.theclass(2025, 1, 2, 3, 0)),
|
||||
('20250102T03:04', self.theclass(2025, 1, 2, 3, 4)),
|
||||
('20250102T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('20250102T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('20250102T03:04:05.6',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
|
||||
('20250102T03:04:05,6',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
|
||||
('20250102T03:04:05.678',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
|
||||
('20250102T03:04:05,678',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
|
||||
('20250102T03:04:05.678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('20250102T030405.678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('20250102T030405,678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('20250102T030405.6789010',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2022W01', self.theclass(2022, 1, 3)),
|
||||
('2022W52520', self.theclass(2022, 12, 26, 20, 0)),
|
||||
('2022W527520', self.theclass(2023, 1, 1, 20, 0)),
|
||||
('2026W01516', self.theclass(2025, 12, 29, 16, 0)),
|
||||
('2026W013516', self.theclass(2025, 12, 31, 16, 0)),
|
||||
('2025W01503', self.theclass(2024, 12, 30, 3, 0)),
|
||||
('2025W014503', self.theclass(2025, 1, 2, 3, 0)),
|
||||
('2025W01512', self.theclass(2024, 12, 30, 12, 0)),
|
||||
('2025W014512', self.theclass(2025, 1, 2, 12, 0)),
|
||||
('2025W014T121431', self.theclass(2025, 1, 2, 12, 14, 31)),
|
||||
('2026W013T162100', self.theclass(2025, 12, 31, 16, 21)),
|
||||
('2026W013 162100', self.theclass(2025, 12, 31, 16, 21)),
|
||||
('2022W527T202159', self.theclass(2023, 1, 1, 20, 21, 59)),
|
||||
('2022W527 202159', self.theclass(2023, 1, 1, 20, 21, 59)),
|
||||
('2025W014 121431', self.theclass(2025, 1, 2, 12, 14, 31)),
|
||||
('2025W014T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('2025W014 030405', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('2020-W53-6T03:04:05', self.theclass(2021, 1, 2, 3, 4, 5)),
|
||||
('2020W537 03:04:05', self.theclass(2021, 1, 3, 3, 4, 5)),
|
||||
('2025-W01-4T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
|
||||
('2025-W01-4T03:04:05.678901',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
|
||||
('2025-W01-4T12:14:31', self.theclass(2025, 1, 2, 12, 14, 31)),
|
||||
('2025-W01-4T12:14:31.012345',
|
||||
self.theclass(2025, 1, 2, 12, 14, 31, 12345)),
|
||||
('2026-W01-3T16:21:00', self.theclass(2025, 12, 31, 16, 21)),
|
||||
('2026-W01-3T16:21:00.000000', self.theclass(2025, 12, 31, 16, 21)),
|
||||
('2022-W52-7T20:21:59',
|
||||
self.theclass(2023, 1, 1, 20, 21, 59)),
|
||||
('2022-W52-7T20:21:59.999999',
|
||||
self.theclass(2023, 1, 1, 20, 21, 59, 999999)),
|
||||
('2025-W01003+00',
|
||||
self.theclass(2024, 12, 30, 3, 0, tzinfo=UTC)),
|
||||
('2025-01-02T03:04:05+00',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
|
||||
('2025-01-02T03:04:05Z',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
|
||||
('2025-01-02003:04:05,6+00:00:00.00',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
|
||||
('2000-01-01T00+21',
|
||||
self.theclass(2000, 1, 1, 0, 0, tzinfo=timezone(timedelta(hours=21)))),
|
||||
('2025-01-02T03:05:06+0300',
|
||||
self.theclass(2025, 1, 2, 3, 5, 6,
|
||||
tzinfo=timezone(timedelta(hours=3)))),
|
||||
('2025-01-02T03:05:06-0300',
|
||||
self.theclass(2025, 1, 2, 3, 5, 6,
|
||||
tzinfo=timezone(timedelta(hours=-3)))),
|
||||
('2025-01-02T03:04:05+0000',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
|
||||
('2025-01-02T03:05:06+03',
|
||||
self.theclass(2025, 1, 2, 3, 5, 6,
|
||||
tzinfo=timezone(timedelta(hours=3)))),
|
||||
('2025-01-02T03:05:06-03',
|
||||
self.theclass(2025, 1, 2, 3, 5, 6,
|
||||
tzinfo=timezone(timedelta(hours=-3)))),
|
||||
('2020-01-01T03:05:07.123457-05:00',
|
||||
self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
|
||||
('2020-01-01T03:05:07.123457-0500',
|
||||
self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
|
||||
('2020-06-01T04:05:06.111111-04:00',
|
||||
self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
|
||||
('2020-06-01T04:05:06.111111-0400',
|
||||
self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
|
||||
('2021-10-31T01:30:00.000000+01:00',
|
||||
self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
|
||||
('2021-10-31T01:30:00.000000+0100',
|
||||
self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
|
||||
('2025-01-02T03:04:05,6+000000.00',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
|
||||
('2025-01-02T03:04:05,678+00:00:10',
|
||||
self.theclass(2025, 1, 2, 3, 4, 5, 678000,
|
||||
tzinfo=timezone(timedelta(seconds=10)))),
|
||||
]
|
||||
|
||||
for input_str, expected in examples:
|
||||
with self.subTest(input_str=input_str):
|
||||
actual = self.theclass.fromisoformat(input_str)
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_fromisoformat_fails_datetime(self):
|
||||
# Test that fromisoformat() fails on invalid values
|
||||
bad_strs = [
|
||||
@ -3016,8 +3187,6 @@ class TestDateTime(TestDate):
|
||||
'2009-04-19T03;15:45', # Bad first time separator
|
||||
'2009-04-19T03:15;45', # Bad second time separator
|
||||
'2009-04-19T03:15:4500:00', # Bad time zone separator
|
||||
'2009-04-19T03:15:45.2345', # Too many digits for milliseconds
|
||||
'2009-04-19T03:15:45.1234567', # Too many digits for microseconds
|
||||
'2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset
|
||||
'2009-04-19T03:15:45.123456-24:30', # Invalid negative offset
|
||||
'2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators
|
||||
@ -3962,6 +4131,76 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
|
||||
t_rt = self.theclass.fromisoformat(tstr)
|
||||
self.assertEqual(t, t_rt)
|
||||
|
||||
def test_fromisoformat_fractions(self):
|
||||
strs = [
|
||||
('12:30:45.1', (12, 30, 45, 100000)),
|
||||
('12:30:45.12', (12, 30, 45, 120000)),
|
||||
('12:30:45.123', (12, 30, 45, 123000)),
|
||||
('12:30:45.1234', (12, 30, 45, 123400)),
|
||||
('12:30:45.12345', (12, 30, 45, 123450)),
|
||||
('12:30:45.123456', (12, 30, 45, 123456)),
|
||||
('12:30:45.1234567', (12, 30, 45, 123456)),
|
||||
('12:30:45.12345678', (12, 30, 45, 123456)),
|
||||
]
|
||||
|
||||
for time_str, time_comps in strs:
|
||||
expected = self.theclass(*time_comps)
|
||||
actual = self.theclass.fromisoformat(time_str)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_fromisoformat_time_examples(self):
|
||||
examples = [
|
||||
('0000', self.theclass(0, 0)),
|
||||
('00:00', self.theclass(0, 0)),
|
||||
('000000', self.theclass(0, 0)),
|
||||
('00:00:00', self.theclass(0, 0)),
|
||||
('000000.0', self.theclass(0, 0)),
|
||||
('00:00:00.0', self.theclass(0, 0)),
|
||||
('000000.000', self.theclass(0, 0)),
|
||||
('00:00:00.000', self.theclass(0, 0)),
|
||||
('000000.000000', self.theclass(0, 0)),
|
||||
('00:00:00.000000', self.theclass(0, 0)),
|
||||
('1200', self.theclass(12, 0)),
|
||||
('12:00', self.theclass(12, 0)),
|
||||
('120000', self.theclass(12, 0)),
|
||||
('12:00:00', self.theclass(12, 0)),
|
||||
('120000.0', self.theclass(12, 0)),
|
||||
('12:00:00.0', self.theclass(12, 0)),
|
||||
('120000.000', self.theclass(12, 0)),
|
||||
('12:00:00.000', self.theclass(12, 0)),
|
||||
('120000.000000', self.theclass(12, 0)),
|
||||
('12:00:00.000000', self.theclass(12, 0)),
|
||||
('2359', self.theclass(23, 59)),
|
||||
('23:59', self.theclass(23, 59)),
|
||||
('235959', self.theclass(23, 59, 59)),
|
||||
('23:59:59', self.theclass(23, 59, 59)),
|
||||
('235959.9', self.theclass(23, 59, 59, 900000)),
|
||||
('23:59:59.9', self.theclass(23, 59, 59, 900000)),
|
||||
('235959.999', self.theclass(23, 59, 59, 999000)),
|
||||
('23:59:59.999', self.theclass(23, 59, 59, 999000)),
|
||||
('235959.999999', self.theclass(23, 59, 59, 999999)),
|
||||
('23:59:59.999999', self.theclass(23, 59, 59, 999999)),
|
||||
('00:00:00Z', self.theclass(0, 0, tzinfo=timezone.utc)),
|
||||
('12:00:00+0000', self.theclass(12, 0, tzinfo=timezone.utc)),
|
||||
('12:00:00+00:00', self.theclass(12, 0, tzinfo=timezone.utc)),
|
||||
('00:00:00+05',
|
||||
self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5)))),
|
||||
('00:00:00+05:30',
|
||||
self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30)))),
|
||||
('12:00:00-05:00',
|
||||
self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
|
||||
('12:00:00-0500',
|
||||
self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
|
||||
('00:00:00,000-23:59:59.999999',
|
||||
self.theclass(0, 0, tzinfo=timezone(-timedelta(hours=23, minutes=59, seconds=59, microseconds=999999)))),
|
||||
]
|
||||
|
||||
for input_str, expected in examples:
|
||||
with self.subTest(input_str=input_str):
|
||||
actual = self.theclass.fromisoformat(input_str)
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_fromisoformat_fails(self):
|
||||
bad_strs = [
|
||||
'', # Empty string
|
||||
@ -3975,15 +4214,17 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
|
||||
'1a:30:45.334034', # Invalid character in hours
|
||||
'12:a0:45.334034', # Invalid character in minutes
|
||||
'12:30:a5.334034', # Invalid character in seconds
|
||||
'12:30:45.1234', # Too many digits for milliseconds
|
||||
'12:30:45.1234567', # Too many digits for microseconds
|
||||
'12:30:45.123456+24:30', # Invalid time zone offset
|
||||
'12:30:45.123456-24:30', # Invalid negative offset
|
||||
'12:30:45', # Uses full-width unicode colons
|
||||
'12:30:45.123456a', # Non-numeric data after 6 components
|
||||
'12:30:45.123456789a', # Non-numeric data after 9 components
|
||||
'12:30:45․123456', # Uses \u2024 in place of decimal point
|
||||
'12:30:45a', # Extra at tend of basic time
|
||||
'12:30:45.123a', # Extra at end of millisecond time
|
||||
'12:30:45.123456a', # Extra at end of microsecond time
|
||||
'12:30:45.123456-', # Extra at end of microsecond time
|
||||
'12:30:45.123456+', # Extra at end of microsecond time
|
||||
'12:30:45.123456+12:00:30a', # Extra at end of full time
|
||||
]
|
||||
|
||||
|
@ -0,0 +1,3 @@
|
||||
Add support for generalized ISO 8601 parsing to
|
||||
:meth:`datetime.datetime.fromisoformat`, :meth:`datetime.date.fromisoformat`
|
||||
and :meth:`datetime.time.fromisoformat`. Patch by Paul Ganssle.
|
@ -395,6 +395,39 @@ iso_week1_monday(int year)
|
||||
return week1_monday;
|
||||
}
|
||||
|
||||
static int
|
||||
iso_to_ymd(const int iso_year, const int iso_week, const int iso_day,
|
||||
int *year, int *month, int *day) {
|
||||
if (iso_week <= 0 || iso_week >= 53) {
|
||||
int out_of_range = 1;
|
||||
if (iso_week == 53) {
|
||||
// ISO years have 53 weeks in it on years starting with a Thursday
|
||||
// and on leap years starting on Wednesday
|
||||
int first_weekday = weekday(iso_year, 1, 1);
|
||||
if (first_weekday == 3 || (first_weekday == 2 && is_leap(iso_year))) {
|
||||
out_of_range = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (out_of_range) {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
if (iso_day <= 0 || iso_day >= 8) {
|
||||
return -3;
|
||||
}
|
||||
|
||||
// Convert (Y, W, D) to (Y, M, D) in-place
|
||||
int day_1 = iso_week1_monday(iso_year);
|
||||
|
||||
int day_offset = (iso_week - 1)*7 + iso_day - 1;
|
||||
|
||||
ord_to_ymd(day_1 + day_offset, year, month, day);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------------
|
||||
* Range checkers.
|
||||
*/
|
||||
@ -680,6 +713,11 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d)
|
||||
* String parsing utilities and helper functions
|
||||
*/
|
||||
|
||||
static unsigned char
|
||||
is_digit(const char c) {
|
||||
return ((unsigned int)(c - '0')) < 10;
|
||||
}
|
||||
|
||||
static const char *
|
||||
parse_digits(const char *ptr, int *var, size_t num_digits)
|
||||
{
|
||||
@ -696,14 +734,17 @@ parse_digits(const char *ptr, int *var, size_t num_digits)
|
||||
}
|
||||
|
||||
static int
|
||||
parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
|
||||
parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, int *day)
|
||||
{
|
||||
/* Parse the date components of the result of date.isoformat()
|
||||
*
|
||||
* Return codes:
|
||||
* 0: Success
|
||||
* -1: Failed to parse date component
|
||||
* -2: Failed to parse dateseparator
|
||||
* -2: Inconsistent date separator usage
|
||||
* -3: Failed to parse ISO week.
|
||||
* -4: Failed to parse ISO day.
|
||||
* -5, -6: Failure in iso_to_ymd
|
||||
*/
|
||||
const char *p = dtstr;
|
||||
p = parse_digits(p, year, 4);
|
||||
@ -711,8 +752,42 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*(p++) != '-') {
|
||||
return -2;
|
||||
const unsigned char uses_separator = (*p == '-');
|
||||
if (uses_separator) {
|
||||
++p;
|
||||
}
|
||||
|
||||
if(*p == 'W') {
|
||||
// This is an isocalendar-style date string
|
||||
p++;
|
||||
int iso_week = 0;
|
||||
int iso_day = 0;
|
||||
|
||||
p = parse_digits(p, &iso_week, 2);
|
||||
if (NULL == p) {
|
||||
return -3;
|
||||
}
|
||||
|
||||
assert(p > dtstr);
|
||||
if ((size_t)(p - dtstr) < len) {
|
||||
if (uses_separator && *(p++) != '-') {
|
||||
return -2;
|
||||
}
|
||||
|
||||
p = parse_digits(p, &iso_day, 1);
|
||||
if (NULL == p) {
|
||||
return -4;
|
||||
}
|
||||
} else {
|
||||
iso_day = 1;
|
||||
}
|
||||
|
||||
int rv = iso_to_ymd(*year, iso_week, iso_day, year, month, day);
|
||||
if (rv) {
|
||||
return -3 + rv;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
p = parse_digits(p, month, 2);
|
||||
@ -720,15 +795,13 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*(p++) != '-') {
|
||||
if (uses_separator && *(p++) != '-') {
|
||||
return -2;
|
||||
}
|
||||
|
||||
p = parse_digits(p, day, 2);
|
||||
if (p == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -736,11 +809,14 @@ static int
|
||||
parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
|
||||
int *minute, int *second, int *microsecond)
|
||||
{
|
||||
*hour = *minute = *second = *microsecond = 0;
|
||||
const char *p = tstr;
|
||||
const char *p_end = tstr_end;
|
||||
int *vals[3] = {hour, minute, second};
|
||||
// This is initialized to satisfy an erroneous compiler warning.
|
||||
unsigned char has_separator = 1;
|
||||
|
||||
// Parse [HH[:MM[:SS]]]
|
||||
// Parse [HH[:?MM[:?SS]]]
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
p = parse_digits(p, vals[i], 2);
|
||||
if (NULL == p) {
|
||||
@ -748,33 +824,47 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
|
||||
}
|
||||
|
||||
char c = *(p++);
|
||||
if (i == 0) {
|
||||
has_separator = (c == ':');
|
||||
}
|
||||
|
||||
if (p >= p_end) {
|
||||
return c != '\0';
|
||||
}
|
||||
else if (c == ':') {
|
||||
else if (has_separator && (c == ':')) {
|
||||
continue;
|
||||
}
|
||||
else if (c == '.') {
|
||||
else if (c == '.' || c == ',') {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
} else if (!has_separator) {
|
||||
--p;
|
||||
} else {
|
||||
return -4; // Malformed time separator
|
||||
}
|
||||
}
|
||||
|
||||
// Parse .fff[fff]
|
||||
// Parse fractional components
|
||||
size_t len_remains = p_end - p;
|
||||
if (!(len_remains == 6 || len_remains == 3)) {
|
||||
return -3;
|
||||
size_t to_parse = len_remains;
|
||||
if (len_remains >= 6) {
|
||||
to_parse = 6;
|
||||
}
|
||||
|
||||
p = parse_digits(p, microsecond, len_remains);
|
||||
p = parse_digits(p, microsecond, to_parse);
|
||||
if (NULL == p) {
|
||||
return -3;
|
||||
}
|
||||
|
||||
if (len_remains == 3) {
|
||||
*microsecond *= 1000;
|
||||
static int correction[] = {
|
||||
100000, 10000, 1000, 100, 10
|
||||
};
|
||||
|
||||
if (to_parse < 6) {
|
||||
*microsecond *= correction[to_parse-1];
|
||||
}
|
||||
|
||||
while (is_digit(*p)){
|
||||
++p; // skip truncated digits
|
||||
}
|
||||
|
||||
// Return 1 if it's not the end of the string
|
||||
@ -800,7 +890,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
|
||||
|
||||
const char *tzinfo_pos = p;
|
||||
do {
|
||||
if (*tzinfo_pos == '+' || *tzinfo_pos == '-') {
|
||||
if (*tzinfo_pos == 'Z' || *tzinfo_pos == '+' || *tzinfo_pos == '-') {
|
||||
break;
|
||||
}
|
||||
} while (++tzinfo_pos < p_end);
|
||||
@ -822,14 +912,16 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
|
||||
}
|
||||
}
|
||||
|
||||
// Parse time zone component
|
||||
// Valid formats are:
|
||||
// - +HH:MM (len 6)
|
||||
// - +HH:MM:SS (len 9)
|
||||
// - +HH:MM:SS.ffffff (len 16)
|
||||
size_t tzlen = p_end - tzinfo_pos;
|
||||
if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) {
|
||||
return -5;
|
||||
// Special case UTC / Zulu time.
|
||||
if (*tzinfo_pos == 'Z') {
|
||||
*tzoffset = 0;
|
||||
*tzmicrosecond = 0;
|
||||
|
||||
if (*(tzinfo_pos + 1) != '\0') {
|
||||
return -5;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int tzsign = (*tzinfo_pos == '-') ? -1 : 1;
|
||||
@ -2983,8 +3075,8 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr)
|
||||
int year = 0, month = 0, day = 0;
|
||||
|
||||
int rv;
|
||||
if (len == 10) {
|
||||
rv = parse_isoformat_date(dt_ptr, &year, &month, &day);
|
||||
if (len == 7 || len == 8 || len == 10) {
|
||||
rv = parse_isoformat_date(dt_ptr, len, &year, &month, &day);
|
||||
}
|
||||
else {
|
||||
rv = -1;
|
||||
@ -3027,37 +3119,21 @@ date_fromisocalendar(PyObject *cls, PyObject *args, PyObject *kw)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (week <= 0 || week >= 53) {
|
||||
int out_of_range = 1;
|
||||
if (week == 53) {
|
||||
// ISO years have 53 weeks in it on years starting with a Thursday
|
||||
// and on leap years starting on Wednesday
|
||||
int first_weekday = weekday(year, 1, 1);
|
||||
if (first_weekday == 3 || (first_weekday == 2 && is_leap(year))) {
|
||||
out_of_range = 0;
|
||||
}
|
||||
}
|
||||
int month;
|
||||
int rv = iso_to_ymd(year, week, day, &year, &month, &day);
|
||||
|
||||
if (out_of_range) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (rv == -2) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (day <= 0 || day >= 8) {
|
||||
if (rv == -3) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid day: %d (range is [1, 7])",
|
||||
day);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Convert (Y, W, D) to (Y, M, D) in-place
|
||||
int day_1 = iso_week1_monday(year);
|
||||
|
||||
int month = week;
|
||||
int day_offset = (month - 1)*7 + day - 1;
|
||||
|
||||
ord_to_ymd(day_1 + day_offset, &year, &month, &day);
|
||||
|
||||
return new_date_subclass_ex(year, month, day, cls);
|
||||
}
|
||||
|
||||
@ -3489,7 +3565,7 @@ static PyMethodDef date_methods[] = {
|
||||
|
||||
{"fromisoformat", (PyCFunction)date_fromisoformat, METH_O |
|
||||
METH_CLASS,
|
||||
PyDoc_STR("str -> Construct a date from the output of date.isoformat()")},
|
||||
PyDoc_STR("str -> Construct a date from a string in ISO 8601 format.")},
|
||||
|
||||
{"fromisocalendar", _PyCFunction_CAST(date_fromisocalendar),
|
||||
METH_VARARGS | METH_KEYWORDS | METH_CLASS,
|
||||
@ -4564,6 +4640,14 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
|
||||
goto invalid_string_error;
|
||||
}
|
||||
|
||||
// The spec actually requires that time-only ISO 8601 strings start with
|
||||
// T, but the extended format allows this to be omitted as long as there
|
||||
// is no ambiguity with date strings.
|
||||
if (*p == 'T') {
|
||||
++p;
|
||||
len -= 1;
|
||||
}
|
||||
|
||||
int hour = 0, minute = 0, second = 0, microsecond = 0;
|
||||
int tzoffset, tzimicrosecond = 0;
|
||||
int rv = parse_isoformat_time(p, len,
|
||||
@ -4671,7 +4755,7 @@ static PyMethodDef time_methods[] = {
|
||||
PyDoc_STR("Return time with new specified fields.")},
|
||||
|
||||
{"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS,
|
||||
PyDoc_STR("string -> time from time.isoformat() output")},
|
||||
PyDoc_STR("string -> time from a string in ISO 8601 format")},
|
||||
|
||||
{"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS,
|
||||
PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")},
|
||||
@ -5184,19 +5268,42 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
|
||||
static PyObject *
|
||||
_sanitize_isoformat_str(PyObject *dtstr)
|
||||
{
|
||||
Py_ssize_t len = PyUnicode_GetLength(dtstr);
|
||||
if (len < 7) { // All valid ISO 8601 strings are at least 7 characters long
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// `fromisoformat` allows surrogate characters in exactly one position,
|
||||
// the separator; to allow datetime_fromisoformat to make the simplifying
|
||||
// assumption that all valid strings can be encoded in UTF-8, this function
|
||||
// replaces any surrogate character separators with `T`.
|
||||
//
|
||||
// The result of this, if not NULL, returns a new reference
|
||||
Py_ssize_t len = PyUnicode_GetLength(dtstr);
|
||||
if (len < 0) {
|
||||
return NULL;
|
||||
const void* const unicode_data = PyUnicode_DATA(dtstr);
|
||||
const unsigned int kind = PyUnicode_KIND(dtstr);
|
||||
|
||||
// Depending on the format of the string, the separator can only ever be
|
||||
// in positions 7, 8 or 10. We'll check each of these for a surrogate and
|
||||
// if we find one, replace it with `T`. If there is more than one surrogate,
|
||||
// we don't have to bother sanitizing it, because the function will later
|
||||
// fail when we try to encode the string as ASCII.
|
||||
static const size_t potential_separators[3] = {7, 8, 10};
|
||||
size_t surrogate_separator = 0;
|
||||
for(size_t idx = 0;
|
||||
idx < sizeof(potential_separators) / sizeof(*potential_separators);
|
||||
++idx) {
|
||||
size_t pos = potential_separators[idx];
|
||||
if (pos > (size_t)len) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(Py_UNICODE_IS_SURROGATE(PyUnicode_READ(kind, unicode_data, pos))) {
|
||||
surrogate_separator = pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (len <= 10 ||
|
||||
!Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) {
|
||||
if (surrogate_separator == 0) {
|
||||
Py_INCREF(dtstr);
|
||||
return dtstr;
|
||||
}
|
||||
@ -5206,7 +5313,7 @@ _sanitize_isoformat_str(PyObject *dtstr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) {
|
||||
if (PyUnicode_WriteChar(str_out, surrogate_separator, (Py_UCS4)'T')) {
|
||||
Py_DECREF(str_out);
|
||||
return NULL;
|
||||
}
|
||||
@ -5214,6 +5321,106 @@ _sanitize_isoformat_str(PyObject *dtstr)
|
||||
return str_out;
|
||||
}
|
||||
|
||||
|
||||
static Py_ssize_t
|
||||
_find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) {
|
||||
// The valid date formats can all be distinguished by characters 4 and 5
|
||||
// and further narrowed down by character
|
||||
// which tells us where to look for the separator character.
|
||||
// Format | As-rendered | Position
|
||||
// ---------------------------------------
|
||||
// %Y-%m-%d | YYYY-MM-DD | 10
|
||||
// %Y%m%d | YYYYMMDD | 8
|
||||
// %Y-W%V | YYYY-Www | 8
|
||||
// %YW%V | YYYYWww | 7
|
||||
// %Y-W%V-%u | YYYY-Www-d | 10
|
||||
// %YW%V%u | YYYYWwwd | 8
|
||||
// %Y-%j | YYYY-DDD | 8
|
||||
// %Y%j | YYYYDDD | 7
|
||||
//
|
||||
// Note that because we allow *any* character for the separator, in the
|
||||
// case where character 4 is W, it's not straightforward to determine where
|
||||
// the separator is — in the case of YYYY-Www-d, you have actual ambiguity,
|
||||
// e.g. 2020-W01-0000 could be YYYY-Www-D0HH or YYYY-Www-HHMM, when the
|
||||
// separator character is a number in the former case or a hyphen in the
|
||||
// latter case.
|
||||
//
|
||||
// The case of YYYYWww can be distinguished from YYYYWwwd by tracking ahead
|
||||
// to either the end of the string or the first non-numeric character —
|
||||
// since the time components all come in pairs YYYYWww#HH can be
|
||||
// distinguished from YYYYWwwd#HH by the fact that there will always be an
|
||||
// odd number of digits before the first non-digit character in the former
|
||||
// case.
|
||||
static const char date_separator = '-';
|
||||
static const char week_indicator = 'W';
|
||||
|
||||
if (len == 7) {
|
||||
return 7;
|
||||
}
|
||||
|
||||
if (dtstr[4] == date_separator) {
|
||||
// YYYY-???
|
||||
|
||||
if (dtstr[5] == week_indicator) {
|
||||
// YYYY-W??
|
||||
|
||||
if (len < 8) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (len > 8 && dtstr[8] == date_separator) {
|
||||
// YYYY-Www-D (10) or YYYY-Www-HH (8)
|
||||
if (len == 9) { return -1; }
|
||||
if (len > 10 && is_digit(dtstr[10])) {
|
||||
// This is as far as we'll try to go to resolve the
|
||||
// ambiguity for the moment — if we have YYYY-Www-##, the
|
||||
// separator is either a hyphen at 8 or a number at 10.
|
||||
//
|
||||
// We'll assume it's a hyphen at 8 because it's way more
|
||||
// likely that someone will use a hyphen as a separator
|
||||
// than a number, but at this point it's really best effort
|
||||
// because this is an extension of the spec anyway.
|
||||
return 8;
|
||||
}
|
||||
|
||||
return 10;
|
||||
} else {
|
||||
// YYYY-Www (8)
|
||||
return 8;
|
||||
}
|
||||
} else {
|
||||
// YYYY-MM-DD (10)
|
||||
return 10;
|
||||
}
|
||||
} else {
|
||||
// YYYY???
|
||||
if (dtstr[4] == week_indicator) {
|
||||
// YYYYWww (7) or YYYYWwwd (8)
|
||||
size_t idx = 7;
|
||||
for (; idx < (size_t)len; ++idx) {
|
||||
// Keep going until we run out of digits.
|
||||
if (!is_digit(dtstr[idx])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx < 9) {
|
||||
return idx;
|
||||
}
|
||||
|
||||
if (idx % 2 == 0) {
|
||||
// If the index of the last number is even, it's YYYYWww
|
||||
return 7;
|
||||
} else {
|
||||
return 8;
|
||||
}
|
||||
} else {
|
||||
// YYYYMMDD (8)
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
|
||||
{
|
||||
@ -5225,9 +5432,14 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// We only need to sanitize this string if the separator is a surrogate
|
||||
// character. In the situation where the separator location is ambiguous,
|
||||
// we don't have to sanitize it anything because that can only happen when
|
||||
// the separator is either '-' or a number. This should mostly be a noop
|
||||
// but it makes the reference counting easier if we still sanitize.
|
||||
PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr);
|
||||
if (dtstr_clean == NULL) {
|
||||
goto error;
|
||||
goto invalid_string_error;
|
||||
}
|
||||
|
||||
Py_ssize_t len;
|
||||
@ -5243,30 +5455,35 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
|
||||
}
|
||||
}
|
||||
|
||||
const Py_ssize_t separator_location = _find_isoformat_datetime_separator(
|
||||
dt_ptr, len);
|
||||
|
||||
|
||||
const char *p = dt_ptr;
|
||||
|
||||
int year = 0, month = 0, day = 0;
|
||||
int hour = 0, minute = 0, second = 0, microsecond = 0;
|
||||
int tzoffset = 0, tzusec = 0;
|
||||
|
||||
// date has a fixed length of 10
|
||||
int rv = parse_isoformat_date(p, &year, &month, &day);
|
||||
// date runs up to separator_location
|
||||
int rv = parse_isoformat_date(p, separator_location, &year, &month, &day);
|
||||
|
||||
if (!rv && len > 10) {
|
||||
if (!rv && len > separator_location) {
|
||||
// In UTF-8, the length of multi-byte characters is encoded in the MSB
|
||||
if ((p[10] & 0x80) == 0) {
|
||||
p += 11;
|
||||
p += separator_location;
|
||||
if ((p[0] & 0x80) == 0) {
|
||||
p += 1;
|
||||
}
|
||||
else {
|
||||
switch (p[10] & 0xf0) {
|
||||
switch (p[0] & 0xf0) {
|
||||
case 0xe0:
|
||||
p += 13;
|
||||
p += 3;
|
||||
break;
|
||||
case 0xf0:
|
||||
p += 14;
|
||||
p += 4;
|
||||
break;
|
||||
default:
|
||||
p += 12;
|
||||
p += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -6327,7 +6544,7 @@ static PyMethodDef datetime_methods[] = {
|
||||
|
||||
{"fromisoformat", (PyCFunction)datetime_fromisoformat,
|
||||
METH_O | METH_CLASS,
|
||||
PyDoc_STR("string -> datetime from datetime.isoformat() output")},
|
||||
PyDoc_STR("string -> datetime from a string in most ISO 8601 formats")},
|
||||
|
||||
/* Instance methods: */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user