gh-80010: Expand fromisoformat to include most of ISO-8601 (#92177)

This expands `fromisoformat` to cover most of the common uses of ISO 8601. We may expand the scope more in the future.
This commit is contained in:
Paul Ganssle 2022-05-05 18:31:24 -06:00 committed by GitHub
parent ada8b6d1b1
commit 1303f8c927
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 778 additions and 178 deletions

View File

@ -526,18 +526,20 @@ Other constructors, all class methods:
.. classmethod:: date.fromisoformat(date_string)
Return a :class:`date` corresponding to a *date_string* given in the format
``YYYY-MM-DD``::
Return a :class:`date` corresponding to a *date_string* given in any valid
ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``)::
>>> from datetime import date
>>> date.fromisoformat('2019-12-04')
datetime.date(2019, 12, 4)
This is the inverse of :meth:`date.isoformat`. It only supports the format
``YYYY-MM-DD``.
>>> date.fromisoformat('20191204')
datetime.date(2019, 12, 4)
>>> date.fromisoformat('2021-W01-1')
datetime.date(2021, 1, 4)
.. versionadded:: 3.7
.. versionchanged:: 3.11
Previously, this method only supported the format ``YYYY-MM-DD``.
.. classmethod:: date.fromisocalendar(year, week, day)
@ -710,8 +712,6 @@ Instance methods:
>>> date(2002, 12, 4).isoformat()
'2002-12-04'
This is the inverse of :meth:`date.fromisoformat`.
.. method:: date.__str__()
For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``.
@ -994,31 +994,29 @@ Other constructors, all class methods:
.. classmethod:: datetime.fromisoformat(date_string)
Return a :class:`.datetime` corresponding to a *date_string* in one of the
formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`.
Return a :class:`.datetime` corresponding to a *date_string* in any valid
ISO 8601 format, with the following exceptions:
Specifically, this function supports strings in the format:
.. code-block:: none
YYYY-MM-DD[*HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]]
where ``*`` can match any single character.
.. caution::
This does *not* support parsing arbitrary ISO 8601 strings - it is only intended
as the inverse operation of :meth:`datetime.isoformat`. A more full-featured
ISO 8601 parser, ``dateutil.parser.isoparse`` is available in the third-party package
`dateutil <https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse>`__.
1. Time zone offsets may have fractional seconds.
2. The `T` separator may be replaced by any single unicode character.
3. Ordinal dates are not currently supported.
4. Fractional hours and minutes are not supported.
Examples::
>>> from datetime import datetime
>>> datetime.fromisoformat('2011-11-04')
datetime.datetime(2011, 11, 4, 0, 0)
>>> datetime.fromisoformat('20111104')
datetime.datetime(2011, 11, 4, 0, 0)
>>> datetime.fromisoformat('2011-11-04T00:05:23')
datetime.datetime(2011, 11, 4, 0, 5, 23)
>>> datetime.fromisoformat('2011-11-04T00:05:23Z')
datetime.datetime(2011, 11, 4, 0, 5, 23, tzinfo=datetime.timezone.utc)
>>> datetime.fromisoformat('20111104T000523')
datetime.datetime(2011, 11, 4, 0, 5, 23)
>>> datetime.fromisoformat('2011-W01-2T00:05:23.283')
datetime.datetime(2011, 1, 4, 0, 5, 23, 283000)
>>> datetime.fromisoformat('2011-11-04 00:05:23.283')
datetime.datetime(2011, 11, 4, 0, 5, 23, 283000)
>>> datetime.fromisoformat('2011-11-04 00:05:23.283+00:00')
@ -1028,6 +1026,10 @@ Other constructors, all class methods:
tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
.. versionadded:: 3.7
.. versionchanged:: 3.11
Previously, this method only supported formats that could be emitted by
:meth:`date.isoformat()` or :meth:`datetime.isoformat()`.
.. classmethod:: datetime.fromisocalendar(year, week, day)
@ -1763,30 +1765,41 @@ Other constructor:
.. classmethod:: time.fromisoformat(time_string)
Return a :class:`.time` corresponding to a *time_string* in one of the
formats emitted by :meth:`time.isoformat`. Specifically, this function supports
strings in the format:
Return a :class:`.time` corresponding to a *time_string* in any valid
ISO 8601 format, with the following exceptions:
.. code-block:: none
HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
.. caution::
This does *not* support parsing arbitrary ISO 8601 strings. It is only
intended as the inverse operation of :meth:`time.isoformat`.
1. Time zone offsets may have fractional seconds.
2. The leading `T`, normally required in cases where there may be ambiguity between
a date and a time, is not required.
3. Fractional seconds may have any number of digits (anything beyond 6 will
be truncated).
4. Fractional hours and minutes are not supported.
Examples::
>>> from datetime import time
>>> time.fromisoformat('04:23:01')
datetime.time(4, 23, 1)
>>> time.fromisoformat('T04:23:01')
datetime.time(4, 23, 1)
>>> time.fromisoformat('T042301')
datetime.time(4, 23, 1)
>>> time.fromisoformat('04:23:01.000384')
datetime.time(4, 23, 1, 384)
>>> time.fromisoformat('04:23:01,000')
datetime.time(4, 23, 1, 384)
>>> time.fromisoformat('04:23:01+04:00')
datetime.time(4, 23, 1, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
>>> time.fromisoformat('04:23:01Z')
datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
>>> time.fromisoformat('04:23:01+00:00')
datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
.. versionadded:: 3.7
.. versionchanged:: 3.11
Previously, this method only supported formats that could be emitted by
:meth:`time.isoformat()`.
Instance methods:

View File

@ -425,6 +425,14 @@ asyncio
existing stream-based connections to TLS. (Contributed by Ian Good in
:issue:`34975`.)
datetime
--------
* :meth:`datetime.date.fromisoformat`, :meth:`datetime.time.fromisoformat` and
:meth:`datetime.datetime.fromisoformat` can now be used to parse most ISO 8601
formats (barring only those that support fractional hours and minutes).
(Contributed by Paul Ganssle in :gh:`80010`.)
fractions
---------

View File

@ -262,58 +262,150 @@ def _wrap_strftime(object, format, timetuple):
return _time.strftime(newformat, timetuple)
# Helpers for parsing the result of isoformat()
def _is_ascii_digit(c):
return c in "0123456789"
def _find_isoformat_datetime_separator(dtstr):
# See the comment in _datetimemodule.c:_find_isoformat_datetime_separator
len_dtstr = len(dtstr)
if len_dtstr == 7:
return 7
assert len_dtstr > 7
date_separator = "-"
week_indicator = "W"
if dtstr[4] == date_separator:
if dtstr[5] == week_indicator:
if len_dtstr < 8:
raise ValueError("Invalid ISO string")
if len_dtstr > 8 and dtstr[8] == date_separator:
if len_dtstr == 9:
raise ValueError("Invalid ISO string")
if len_dtstr > 10 and _is_ascii_digit(dtstr[10]):
# This is as far as we need to resolve the ambiguity for
# the moment - if we have YYYY-Www-##, the separator is
# either a hyphen at 8 or a number at 10.
#
# We'll assume it's a hyphen at 8 because it's way more
# likely that someone will use a hyphen as a separator than
# a number, but at this point it's really best effort
# because this is an extension of the spec anyway.
# TODO(pganssle): Document this
return 8
return 10
else:
# YYYY-Www (8)
return 8
else:
# YYYY-MM-DD (10)
return 10
else:
if dtstr[4] == week_indicator:
# YYYYWww (7) or YYYYWwwd (8)
idx = 7
while idx < len_dtstr:
if not _is_ascii_digit(dtstr[idx]):
break
idx += 1
if idx < 9:
return idx
if idx % 2 == 0:
# If the index of the last number is even, it's YYYYWwwd
return 7
else:
return 8
else:
# YYYYMMDD (8)
return 8
def _parse_isoformat_date(dtstr):
# It is assumed that this function will only be called with a
# string of length exactly 10, and (though this is not used) ASCII-only
# It is assumed that this is an ASCII-only string of lengths 7, 8 or 10,
# see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator
assert len(dtstr) in (7, 8, 10)
year = int(dtstr[0:4])
if dtstr[4] != '-':
raise ValueError('Invalid date separator: %s' % dtstr[4])
has_sep = dtstr[4] == '-'
month = int(dtstr[5:7])
pos = 4 + has_sep
if dtstr[pos:pos + 1] == "W":
# YYYY-?Www-?D?
pos += 1
weekno = int(dtstr[pos:pos + 2])
pos += 2
if dtstr[7] != '-':
raise ValueError('Invalid date separator')
dayno = 1
if len(dtstr) > pos:
if (dtstr[pos:pos + 1] == '-') != has_sep:
raise ValueError("Inconsistent use of dash separator")
day = int(dtstr[8:10])
pos += has_sep
dayno = int(dtstr[pos:pos + 1])
return list(_isoweek_to_gregorian(year, weekno, dayno))
else:
month = int(dtstr[pos:pos + 2])
pos += 2
if (dtstr[pos:pos + 1] == "-") != has_sep:
raise ValueError("Inconsistent use of dash separator")
pos += has_sep
day = int(dtstr[pos:pos + 2])
return [year, month, day]
_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10]
return [year, month, day]
def _parse_hh_mm_ss_ff(tstr):
# Parses things of the form HH[:MM[:SS[.fff[fff]]]]
# Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]]
len_str = len(tstr)
time_comps = [0, 0, 0, 0]
pos = 0
for comp in range(0, 3):
if (len_str - pos) < 2:
raise ValueError('Incomplete time component')
raise ValueError("Incomplete time component")
time_comps[comp] = int(tstr[pos:pos+2])
pos += 2
next_char = tstr[pos:pos+1]
if comp == 0:
has_sep = next_char == ':'
if not next_char or comp >= 2:
break
if next_char != ':':
raise ValueError('Invalid time separator: %c' % next_char)
if has_sep and next_char != ':':
raise ValueError("Invalid time separator: %c" % next_char)
pos += 1
pos += has_sep
if pos < len_str:
if tstr[pos] != '.':
raise ValueError('Invalid microsecond component')
if tstr[pos] not in '.,':
raise ValueError("Invalid microsecond component")
else:
pos += 1
len_remainder = len_str - pos
if len_remainder not in (3, 6):
raise ValueError('Invalid microsecond component')
time_comps[3] = int(tstr[pos:])
if len_remainder == 3:
time_comps[3] *= 1000
if len_remainder >= 6:
to_parse = 6
else:
to_parse = len_remainder
time_comps[3] = int(tstr[pos:(pos+to_parse)])
if to_parse < 6:
time_comps[3] *= _FRACTION_CORRECTION[to_parse-1]
if (len_remainder > to_parse
and not all(map(_is_ascii_digit, tstr[(pos+to_parse):]))):
raise ValueError("Non-digit values in unparsed fraction")
return time_comps
@ -321,27 +413,34 @@ def _parse_isoformat_time(tstr):
# Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
len_str = len(tstr)
if len_str < 2:
raise ValueError('Isoformat time too short')
raise ValueError("Isoformat time too short")
# This is equivalent to re.search('[+-]', tstr), but faster
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
# This is equivalent to re.search('[+-Z]', tstr), but faster
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1)
timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
time_comps = _parse_hh_mm_ss_ff(timestr)
tzi = None
if tz_pos > 0:
if tz_pos == len_str and tstr[-1] == 'Z':
tzi = timezone.utc
elif tz_pos > 0:
tzstr = tstr[tz_pos:]
# Valid time zone strings are:
# HH len: 2
# HHMM len: 4
# HH:MM len: 5
# HHMMSS len: 6
# HHMMSS.f+ len: 7+
# HH:MM:SS len: 8
# HH:MM:SS.ffffff len: 15
# HH:MM:SS.f+ len: 10+
if len(tzstr) not in (5, 8, 15):
raise ValueError('Malformed time zone string')
if len(tzstr) in (0, 1, 3):
raise ValueError("Malformed time zone string")
tz_comps = _parse_hh_mm_ss_ff(tzstr)
if all(x == 0 for x in tz_comps):
tzi = timezone.utc
else:
@ -356,6 +455,38 @@ def _parse_isoformat_time(tstr):
return time_comps
# tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar
def _isoweek_to_gregorian(year, week, day):
# Year is bounded this way because 9999-12-31 is (9999, 52, 5)
if not MINYEAR <= year <= MAXYEAR:
raise ValueError(f"Year is out of range: {year}")
if not 0 < week < 53:
out_of_range = True
if week == 53:
# ISO years have 53 weeks in them on years starting with a
# Thursday and leap years starting on a Wednesday
first_weekday = _ymd2ord(year, 1, 1) % 7
if (first_weekday == 4 or (first_weekday == 3 and
_is_leap(year))):
out_of_range = False
if out_of_range:
raise ValueError(f"Invalid week: {week}")
if not 0 < day < 8:
raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
# Now compute the offset from (Y, 1, 1) in days:
day_offset = (week - 1) * 7 + (day - 1)
# Calculate the ordinal day for monday, week 1
day_1 = _isoweek1monday(year)
ord_day = day_1 + day_offset
return _ord2ymd(ord_day)
# Just raise TypeError if the arg isn't None or a string.
def _check_tzname(name):
@ -847,12 +978,14 @@ class date:
@classmethod
def fromisoformat(cls, date_string):
"""Construct a date from the output of date.isoformat()."""
"""Construct a date from a string in ISO 8601 format."""
if not isinstance(date_string, str):
raise TypeError('fromisoformat: argument must be str')
if len(date_string) not in (7, 8, 10):
raise ValueError(f'Invalid isoformat string: {date_string!r}')
try:
assert len(date_string) == 10
return cls(*_parse_isoformat_date(date_string))
except Exception:
raise ValueError(f'Invalid isoformat string: {date_string!r}')
@ -862,35 +995,7 @@ class date:
"""Construct a date from the ISO year, week number and weekday.
This is the inverse of the date.isocalendar() function"""
# Year is bounded this way because 9999-12-31 is (9999, 52, 5)
if not MINYEAR <= year <= MAXYEAR:
raise ValueError(f"Year is out of range: {year}")
if not 0 < week < 53:
out_of_range = True
if week == 53:
# ISO years have 53 weeks in them on years starting with a
# Thursday and leap years starting on a Wednesday
first_weekday = _ymd2ord(year, 1, 1) % 7
if (first_weekday == 4 or (first_weekday == 3 and
_is_leap(year))):
out_of_range = False
if out_of_range:
raise ValueError(f"Invalid week: {week}")
if not 0 < day < 8:
raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
# Now compute the offset from (Y, 1, 1) in days:
day_offset = (week - 1) * 7 + (day - 1)
# Calculate the ordinal day for monday, week 1
day_1 = _isoweek1monday(year)
ord_day = day_1 + day_offset
return cls(*_ord2ymd(ord_day))
return cls(*_isoweek_to_gregorian(year, week, day))
# Conversions to string
@ -1427,10 +1532,15 @@ class time:
@classmethod
def fromisoformat(cls, time_string):
"""Construct a time from the output of isoformat()."""
"""Construct a time from a string in one of the ISO 8601 formats."""
if not isinstance(time_string, str):
raise TypeError('fromisoformat: argument must be str')
# The spec actually requires that time-only ISO 8601 strings start with
# T, but the extended format allows this to be omitted as long as there
# is no ambiguity with date strings.
time_string = time_string.removeprefix('T')
try:
return cls(*_parse_isoformat_time(time_string))
except Exception:
@ -1711,24 +1821,30 @@ class datetime(date):
@classmethod
def fromisoformat(cls, date_string):
"""Construct a datetime from the output of datetime.isoformat()."""
"""Construct a datetime from a string in one of the ISO 8601 formats."""
if not isinstance(date_string, str):
raise TypeError('fromisoformat: argument must be str')
# Split this at the separator
dstr = date_string[0:10]
tstr = date_string[11:]
if len(date_string) < 7:
raise ValueError(f'Invalid isoformat string: {date_string!r}')
# Split this at the separator
try:
separator_location = _find_isoformat_datetime_separator(date_string)
dstr = date_string[0:separator_location]
tstr = date_string[(separator_location+1):]
date_components = _parse_isoformat_date(dstr)
except ValueError:
raise ValueError(f'Invalid isoformat string: {date_string!r}')
raise ValueError(
f'Invalid isoformat string: {date_string!r}') from None
if tstr:
try:
time_components = _parse_isoformat_time(tstr)
except ValueError:
raise ValueError(f'Invalid isoformat string: {date_string!r}')
raise ValueError(
f'Invalid isoformat string: {date_string!r}') from None
else:
time_components = [0, 0, 0, 0, None]
@ -2509,7 +2625,9 @@ else:
_format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math,
_ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
_divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
_parse_hh_mm_ss_ff, _IsoCalendarDate)
_parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian,
_find_isoformat_datetime_separator, _FRACTION_CORRECTION,
_is_ascii_digit)
# XXX Since import * above excludes names that start with _,
# docstring does not get overwritten. In the future, it may be
# appropriate to maintain a single module level docstring and

View File

@ -7,6 +7,7 @@ import itertools
import bisect
import copy
import decimal
import functools
import sys
import os
import pickle
@ -1840,6 +1841,41 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
self.assertEqual(dt, dt_rt)
def test_fromisoformat_date_examples(self):
examples = [
('00010101', self.theclass(1, 1, 1)),
('20000101', self.theclass(2000, 1, 1)),
('20250102', self.theclass(2025, 1, 2)),
('99991231', self.theclass(9999, 12, 31)),
('0001-01-01', self.theclass(1, 1, 1)),
('2000-01-01', self.theclass(2000, 1, 1)),
('2025-01-02', self.theclass(2025, 1, 2)),
('9999-12-31', self.theclass(9999, 12, 31)),
('2025W01', self.theclass(2024, 12, 30)),
('2025-W01', self.theclass(2024, 12, 30)),
('2025W014', self.theclass(2025, 1, 2)),
('2025-W01-4', self.theclass(2025, 1, 2)),
('2026W01', self.theclass(2025, 12, 29)),
('2026-W01', self.theclass(2025, 12, 29)),
('2026W013', self.theclass(2025, 12, 31)),
('2026-W01-3', self.theclass(2025, 12, 31)),
('2022W52', self.theclass(2022, 12, 26)),
('2022-W52', self.theclass(2022, 12, 26)),
('2022W527', self.theclass(2023, 1, 1)),
('2022-W52-7', self.theclass(2023, 1, 1)),
('2015W534', self.theclass(2015, 12, 31)), # Has week 53
('2015-W53-4', self.theclass(2015, 12, 31)), # Has week 53
('2015-W53-5', self.theclass(2016, 1, 1)),
('2020W531', self.theclass(2020, 12, 28)), # Leap year
('2020-W53-1', self.theclass(2020, 12, 28)), # Leap year
('2020-W53-6', self.theclass(2021, 1, 2)),
]
for input_str, expected in examples:
with self.subTest(input_str=input_str):
actual = self.theclass.fromisoformat(input_str)
self.assertEqual(actual, expected)
def test_fromisoformat_subclass(self):
class DateSubclass(self.theclass):
pass
@ -1862,7 +1898,8 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
'2009-12-0a', # Invalid character in day
'2009-01-32', # Invalid day
'2009-02-29', # Invalid leap day
'20090228', # Valid ISO8601 output not from isoformat()
'2019-W53-1', # No week 53 in 2019
'2020-W54-1', # No week 54
'2009\ud80002\ud80028', # Separators are surrogate codepoints
]
@ -3003,6 +3040,140 @@ class TestDateTime(TestDate):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)
def test_fromisoformat_datetime_examples(self):
UTC = timezone.utc
BST = timezone(timedelta(hours=1), 'BST')
EST = timezone(timedelta(hours=-5), 'EST')
EDT = timezone(timedelta(hours=-4), 'EDT')
examples = [
('2025-01-02', self.theclass(2025, 1, 2, 0, 0)),
('2025-01-02T03', self.theclass(2025, 1, 2, 3, 0)),
('2025-01-02T03:04', self.theclass(2025, 1, 2, 3, 4)),
('2025-01-02T0304', self.theclass(2025, 1, 2, 3, 4)),
('2025-01-02T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
('2025-01-02T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
('2025-01-02T03:04:05.6',
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
('2025-01-02T03:04:05,6',
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
('2025-01-02T03:04:05.678',
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
('2025-01-02T03:04:05.678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2025-01-02T03:04:05,678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2025-01-02T030405.678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2025-01-02T030405,678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2025-01-02T03:04:05.6789010',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2009-04-19T03:15:45.2345',
self.theclass(2009, 4, 19, 3, 15, 45, 234500)),
('2009-04-19T03:15:45.1234567',
self.theclass(2009, 4, 19, 3, 15, 45, 123456)),
('2025-01-02T03:04:05,678',
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
('20250102', self.theclass(2025, 1, 2, 0, 0)),
('20250102T03', self.theclass(2025, 1, 2, 3, 0)),
('20250102T03:04', self.theclass(2025, 1, 2, 3, 4)),
('20250102T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
('20250102T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
('20250102T03:04:05.6',
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
('20250102T03:04:05,6',
self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
('20250102T03:04:05.678',
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
('20250102T03:04:05,678',
self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
('20250102T03:04:05.678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('20250102T030405.678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('20250102T030405,678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('20250102T030405.6789010',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2022W01', self.theclass(2022, 1, 3)),
('2022W52520', self.theclass(2022, 12, 26, 20, 0)),
('2022W527520', self.theclass(2023, 1, 1, 20, 0)),
('2026W01516', self.theclass(2025, 12, 29, 16, 0)),
('2026W013516', self.theclass(2025, 12, 31, 16, 0)),
('2025W01503', self.theclass(2024, 12, 30, 3, 0)),
('2025W014503', self.theclass(2025, 1, 2, 3, 0)),
('2025W01512', self.theclass(2024, 12, 30, 12, 0)),
('2025W014512', self.theclass(2025, 1, 2, 12, 0)),
('2025W014T121431', self.theclass(2025, 1, 2, 12, 14, 31)),
('2026W013T162100', self.theclass(2025, 12, 31, 16, 21)),
('2026W013 162100', self.theclass(2025, 12, 31, 16, 21)),
('2022W527T202159', self.theclass(2023, 1, 1, 20, 21, 59)),
('2022W527 202159', self.theclass(2023, 1, 1, 20, 21, 59)),
('2025W014 121431', self.theclass(2025, 1, 2, 12, 14, 31)),
('2025W014T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
('2025W014 030405', self.theclass(2025, 1, 2, 3, 4, 5)),
('2020-W53-6T03:04:05', self.theclass(2021, 1, 2, 3, 4, 5)),
('2020W537 03:04:05', self.theclass(2021, 1, 3, 3, 4, 5)),
('2025-W01-4T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
('2025-W01-4T03:04:05.678901',
self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
('2025-W01-4T12:14:31', self.theclass(2025, 1, 2, 12, 14, 31)),
('2025-W01-4T12:14:31.012345',
self.theclass(2025, 1, 2, 12, 14, 31, 12345)),
('2026-W01-3T16:21:00', self.theclass(2025, 12, 31, 16, 21)),
('2026-W01-3T16:21:00.000000', self.theclass(2025, 12, 31, 16, 21)),
('2022-W52-7T20:21:59',
self.theclass(2023, 1, 1, 20, 21, 59)),
('2022-W52-7T20:21:59.999999',
self.theclass(2023, 1, 1, 20, 21, 59, 999999)),
('2025-W01003+00',
self.theclass(2024, 12, 30, 3, 0, tzinfo=UTC)),
('2025-01-02T03:04:05+00',
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
('2025-01-02T03:04:05Z',
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
('2025-01-02003:04:05,6+00:00:00.00',
self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
('2000-01-01T00+21',
self.theclass(2000, 1, 1, 0, 0, tzinfo=timezone(timedelta(hours=21)))),
('2025-01-02T03:05:06+0300',
self.theclass(2025, 1, 2, 3, 5, 6,
tzinfo=timezone(timedelta(hours=3)))),
('2025-01-02T03:05:06-0300',
self.theclass(2025, 1, 2, 3, 5, 6,
tzinfo=timezone(timedelta(hours=-3)))),
('2025-01-02T03:04:05+0000',
self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
('2025-01-02T03:05:06+03',
self.theclass(2025, 1, 2, 3, 5, 6,
tzinfo=timezone(timedelta(hours=3)))),
('2025-01-02T03:05:06-03',
self.theclass(2025, 1, 2, 3, 5, 6,
tzinfo=timezone(timedelta(hours=-3)))),
('2020-01-01T03:05:07.123457-05:00',
self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
('2020-01-01T03:05:07.123457-0500',
self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
('2020-06-01T04:05:06.111111-04:00',
self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
('2020-06-01T04:05:06.111111-0400',
self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
('2021-10-31T01:30:00.000000+01:00',
self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
('2021-10-31T01:30:00.000000+0100',
self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
('2025-01-02T03:04:05,6+000000.00',
self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
('2025-01-02T03:04:05,678+00:00:10',
self.theclass(2025, 1, 2, 3, 4, 5, 678000,
tzinfo=timezone(timedelta(seconds=10)))),
]
for input_str, expected in examples:
with self.subTest(input_str=input_str):
actual = self.theclass.fromisoformat(input_str)
self.assertEqual(actual, expected)
def test_fromisoformat_fails_datetime(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
@ -3016,8 +3187,6 @@ class TestDateTime(TestDate):
'2009-04-19T03;15:45', # Bad first time separator
'2009-04-19T03:15;45', # Bad second time separator
'2009-04-19T03:15:4500:00', # Bad time zone separator
'2009-04-19T03:15:45.2345', # Too many digits for milliseconds
'2009-04-19T03:15:45.1234567', # Too many digits for microseconds
'2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset
'2009-04-19T03:15:45.123456-24:30', # Invalid negative offset
'2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators
@ -3962,6 +4131,76 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
t_rt = self.theclass.fromisoformat(tstr)
self.assertEqual(t, t_rt)
def test_fromisoformat_fractions(self):
strs = [
('12:30:45.1', (12, 30, 45, 100000)),
('12:30:45.12', (12, 30, 45, 120000)),
('12:30:45.123', (12, 30, 45, 123000)),
('12:30:45.1234', (12, 30, 45, 123400)),
('12:30:45.12345', (12, 30, 45, 123450)),
('12:30:45.123456', (12, 30, 45, 123456)),
('12:30:45.1234567', (12, 30, 45, 123456)),
('12:30:45.12345678', (12, 30, 45, 123456)),
]
for time_str, time_comps in strs:
expected = self.theclass(*time_comps)
actual = self.theclass.fromisoformat(time_str)
self.assertEqual(actual, expected)
def test_fromisoformat_time_examples(self):
examples = [
('0000', self.theclass(0, 0)),
('00:00', self.theclass(0, 0)),
('000000', self.theclass(0, 0)),
('00:00:00', self.theclass(0, 0)),
('000000.0', self.theclass(0, 0)),
('00:00:00.0', self.theclass(0, 0)),
('000000.000', self.theclass(0, 0)),
('00:00:00.000', self.theclass(0, 0)),
('000000.000000', self.theclass(0, 0)),
('00:00:00.000000', self.theclass(0, 0)),
('1200', self.theclass(12, 0)),
('12:00', self.theclass(12, 0)),
('120000', self.theclass(12, 0)),
('12:00:00', self.theclass(12, 0)),
('120000.0', self.theclass(12, 0)),
('12:00:00.0', self.theclass(12, 0)),
('120000.000', self.theclass(12, 0)),
('12:00:00.000', self.theclass(12, 0)),
('120000.000000', self.theclass(12, 0)),
('12:00:00.000000', self.theclass(12, 0)),
('2359', self.theclass(23, 59)),
('23:59', self.theclass(23, 59)),
('235959', self.theclass(23, 59, 59)),
('23:59:59', self.theclass(23, 59, 59)),
('235959.9', self.theclass(23, 59, 59, 900000)),
('23:59:59.9', self.theclass(23, 59, 59, 900000)),
('235959.999', self.theclass(23, 59, 59, 999000)),
('23:59:59.999', self.theclass(23, 59, 59, 999000)),
('235959.999999', self.theclass(23, 59, 59, 999999)),
('23:59:59.999999', self.theclass(23, 59, 59, 999999)),
('00:00:00Z', self.theclass(0, 0, tzinfo=timezone.utc)),
('12:00:00+0000', self.theclass(12, 0, tzinfo=timezone.utc)),
('12:00:00+00:00', self.theclass(12, 0, tzinfo=timezone.utc)),
('00:00:00+05',
self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5)))),
('00:00:00+05:30',
self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30)))),
('12:00:00-05:00',
self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
('12:00:00-0500',
self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
('00:00:00,000-23:59:59.999999',
self.theclass(0, 0, tzinfo=timezone(-timedelta(hours=23, minutes=59, seconds=59, microseconds=999999)))),
]
for input_str, expected in examples:
with self.subTest(input_str=input_str):
actual = self.theclass.fromisoformat(input_str)
self.assertEqual(actual, expected)
def test_fromisoformat_fails(self):
bad_strs = [
'', # Empty string
@ -3975,15 +4214,17 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
'1a:30:45.334034', # Invalid character in hours
'12:a0:45.334034', # Invalid character in minutes
'12:30:a5.334034', # Invalid character in seconds
'12:30:45.1234', # Too many digits for milliseconds
'12:30:45.1234567', # Too many digits for microseconds
'12:30:45.123456+24:30', # Invalid time zone offset
'12:30:45.123456-24:30', # Invalid negative offset
'123045', # Uses full-width unicode colons
'12:30:45.123456a', # Non-numeric data after 6 components
'12:30:45.123456789a', # Non-numeric data after 9 components
'12:30:45123456', # Uses \u2024 in place of decimal point
'12:30:45a', # Extra at tend of basic time
'12:30:45.123a', # Extra at end of millisecond time
'12:30:45.123456a', # Extra at end of microsecond time
'12:30:45.123456-', # Extra at end of microsecond time
'12:30:45.123456+', # Extra at end of microsecond time
'12:30:45.123456+12:00:30a', # Extra at end of full time
]

View File

@ -0,0 +1,3 @@
Add support for generalized ISO 8601 parsing to
:meth:`datetime.datetime.fromisoformat`, :meth:`datetime.date.fromisoformat`
and :meth:`datetime.time.fromisoformat`. Patch by Paul Ganssle.

View File

@ -395,6 +395,39 @@ iso_week1_monday(int year)
return week1_monday;
}
static int
iso_to_ymd(const int iso_year, const int iso_week, const int iso_day,
int *year, int *month, int *day) {
if (iso_week <= 0 || iso_week >= 53) {
int out_of_range = 1;
if (iso_week == 53) {
// ISO years have 53 weeks in it on years starting with a Thursday
// and on leap years starting on Wednesday
int first_weekday = weekday(iso_year, 1, 1);
if (first_weekday == 3 || (first_weekday == 2 && is_leap(iso_year))) {
out_of_range = 0;
}
}
if (out_of_range) {
return -2;
}
}
if (iso_day <= 0 || iso_day >= 8) {
return -3;
}
// Convert (Y, W, D) to (Y, M, D) in-place
int day_1 = iso_week1_monday(iso_year);
int day_offset = (iso_week - 1)*7 + iso_day - 1;
ord_to_ymd(day_1 + day_offset, year, month, day);
return 0;
}
/* ---------------------------------------------------------------------------
* Range checkers.
*/
@ -680,6 +713,11 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d)
* String parsing utilities and helper functions
*/
static unsigned char
is_digit(const char c) {
return ((unsigned int)(c - '0')) < 10;
}
static const char *
parse_digits(const char *ptr, int *var, size_t num_digits)
{
@ -696,14 +734,17 @@ parse_digits(const char *ptr, int *var, size_t num_digits)
}
static int
parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, int *day)
{
/* Parse the date components of the result of date.isoformat()
*
* Return codes:
* 0: Success
* -1: Failed to parse date component
* -2: Failed to parse dateseparator
* -2: Inconsistent date separator usage
* -3: Failed to parse ISO week.
* -4: Failed to parse ISO day.
* -5, -6: Failure in iso_to_ymd
*/
const char *p = dtstr;
p = parse_digits(p, year, 4);
@ -711,8 +752,42 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
return -1;
}
if (*(p++) != '-') {
return -2;
const unsigned char uses_separator = (*p == '-');
if (uses_separator) {
++p;
}
if(*p == 'W') {
// This is an isocalendar-style date string
p++;
int iso_week = 0;
int iso_day = 0;
p = parse_digits(p, &iso_week, 2);
if (NULL == p) {
return -3;
}
assert(p > dtstr);
if ((size_t)(p - dtstr) < len) {
if (uses_separator && *(p++) != '-') {
return -2;
}
p = parse_digits(p, &iso_day, 1);
if (NULL == p) {
return -4;
}
} else {
iso_day = 1;
}
int rv = iso_to_ymd(*year, iso_week, iso_day, year, month, day);
if (rv) {
return -3 + rv;
} else {
return 0;
}
}
p = parse_digits(p, month, 2);
@ -720,15 +795,13 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
return -1;
}
if (*(p++) != '-') {
if (uses_separator && *(p++) != '-') {
return -2;
}
p = parse_digits(p, day, 2);
if (p == NULL) {
return -1;
}
return 0;
}
@ -736,11 +809,14 @@ static int
parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
int *minute, int *second, int *microsecond)
{
*hour = *minute = *second = *microsecond = 0;
const char *p = tstr;
const char *p_end = tstr_end;
int *vals[3] = {hour, minute, second};
// This is initialized to satisfy an erroneous compiler warning.
unsigned char has_separator = 1;
// Parse [HH[:MM[:SS]]]
// Parse [HH[:?MM[:?SS]]]
for (size_t i = 0; i < 3; ++i) {
p = parse_digits(p, vals[i], 2);
if (NULL == p) {
@ -748,33 +824,47 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
}
char c = *(p++);
if (i == 0) {
has_separator = (c == ':');
}
if (p >= p_end) {
return c != '\0';
}
else if (c == ':') {
else if (has_separator && (c == ':')) {
continue;
}
else if (c == '.') {
else if (c == '.' || c == ',') {
break;
}
else {
} else if (!has_separator) {
--p;
} else {
return -4; // Malformed time separator
}
}
// Parse .fff[fff]
// Parse fractional components
size_t len_remains = p_end - p;
if (!(len_remains == 6 || len_remains == 3)) {
return -3;
size_t to_parse = len_remains;
if (len_remains >= 6) {
to_parse = 6;
}
p = parse_digits(p, microsecond, len_remains);
p = parse_digits(p, microsecond, to_parse);
if (NULL == p) {
return -3;
}
if (len_remains == 3) {
*microsecond *= 1000;
static int correction[] = {
100000, 10000, 1000, 100, 10
};
if (to_parse < 6) {
*microsecond *= correction[to_parse-1];
}
while (is_digit(*p)){
++p; // skip truncated digits
}
// Return 1 if it's not the end of the string
@ -800,7 +890,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
const char *tzinfo_pos = p;
do {
if (*tzinfo_pos == '+' || *tzinfo_pos == '-') {
if (*tzinfo_pos == 'Z' || *tzinfo_pos == '+' || *tzinfo_pos == '-') {
break;
}
} while (++tzinfo_pos < p_end);
@ -822,14 +912,16 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
}
}
// Parse time zone component
// Valid formats are:
// - +HH:MM (len 6)
// - +HH:MM:SS (len 9)
// - +HH:MM:SS.ffffff (len 16)
size_t tzlen = p_end - tzinfo_pos;
if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) {
return -5;
// Special case UTC / Zulu time.
if (*tzinfo_pos == 'Z') {
*tzoffset = 0;
*tzmicrosecond = 0;
if (*(tzinfo_pos + 1) != '\0') {
return -5;
} else {
return 1;
}
}
int tzsign = (*tzinfo_pos == '-') ? -1 : 1;
@ -2983,8 +3075,8 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr)
int year = 0, month = 0, day = 0;
int rv;
if (len == 10) {
rv = parse_isoformat_date(dt_ptr, &year, &month, &day);
if (len == 7 || len == 8 || len == 10) {
rv = parse_isoformat_date(dt_ptr, len, &year, &month, &day);
}
else {
rv = -1;
@ -3027,37 +3119,21 @@ date_fromisocalendar(PyObject *cls, PyObject *args, PyObject *kw)
return NULL;
}
if (week <= 0 || week >= 53) {
int out_of_range = 1;
if (week == 53) {
// ISO years have 53 weeks in it on years starting with a Thursday
// and on leap years starting on Wednesday
int first_weekday = weekday(year, 1, 1);
if (first_weekday == 3 || (first_weekday == 2 && is_leap(year))) {
out_of_range = 0;
}
}
int month;
int rv = iso_to_ymd(year, week, day, &year, &month, &day);
if (out_of_range) {
PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
return NULL;
}
if (rv == -2) {
PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
return NULL;
}
if (day <= 0 || day >= 8) {
if (rv == -3) {
PyErr_Format(PyExc_ValueError, "Invalid day: %d (range is [1, 7])",
day);
return NULL;
}
// Convert (Y, W, D) to (Y, M, D) in-place
int day_1 = iso_week1_monday(year);
int month = week;
int day_offset = (month - 1)*7 + day - 1;
ord_to_ymd(day_1 + day_offset, &year, &month, &day);
return new_date_subclass_ex(year, month, day, cls);
}
@ -3489,7 +3565,7 @@ static PyMethodDef date_methods[] = {
{"fromisoformat", (PyCFunction)date_fromisoformat, METH_O |
METH_CLASS,
PyDoc_STR("str -> Construct a date from the output of date.isoformat()")},
PyDoc_STR("str -> Construct a date from a string in ISO 8601 format.")},
{"fromisocalendar", _PyCFunction_CAST(date_fromisocalendar),
METH_VARARGS | METH_KEYWORDS | METH_CLASS,
@ -4564,6 +4640,14 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
goto invalid_string_error;
}
// The spec actually requires that time-only ISO 8601 strings start with
// T, but the extended format allows this to be omitted as long as there
// is no ambiguity with date strings.
if (*p == 'T') {
++p;
len -= 1;
}
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset, tzimicrosecond = 0;
int rv = parse_isoformat_time(p, len,
@ -4671,7 +4755,7 @@ static PyMethodDef time_methods[] = {
PyDoc_STR("Return time with new specified fields.")},
{"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS,
PyDoc_STR("string -> time from time.isoformat() output")},
PyDoc_STR("string -> time from a string in ISO 8601 format")},
{"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS,
PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")},
@ -5184,19 +5268,42 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
static PyObject *
_sanitize_isoformat_str(PyObject *dtstr)
{
Py_ssize_t len = PyUnicode_GetLength(dtstr);
if (len < 7) { // All valid ISO 8601 strings are at least 7 characters long
return NULL;
}
// `fromisoformat` allows surrogate characters in exactly one position,
// the separator; to allow datetime_fromisoformat to make the simplifying
// assumption that all valid strings can be encoded in UTF-8, this function
// replaces any surrogate character separators with `T`.
//
// The result of this, if not NULL, returns a new reference
Py_ssize_t len = PyUnicode_GetLength(dtstr);
if (len < 0) {
return NULL;
const void* const unicode_data = PyUnicode_DATA(dtstr);
const unsigned int kind = PyUnicode_KIND(dtstr);
// Depending on the format of the string, the separator can only ever be
// in positions 7, 8 or 10. We'll check each of these for a surrogate and
// if we find one, replace it with `T`. If there is more than one surrogate,
// we don't have to bother sanitizing it, because the function will later
// fail when we try to encode the string as ASCII.
static const size_t potential_separators[3] = {7, 8, 10};
size_t surrogate_separator = 0;
for(size_t idx = 0;
idx < sizeof(potential_separators) / sizeof(*potential_separators);
++idx) {
size_t pos = potential_separators[idx];
if (pos > (size_t)len) {
break;
}
if(Py_UNICODE_IS_SURROGATE(PyUnicode_READ(kind, unicode_data, pos))) {
surrogate_separator = pos;
break;
}
}
if (len <= 10 ||
!Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) {
if (surrogate_separator == 0) {
Py_INCREF(dtstr);
return dtstr;
}
@ -5206,7 +5313,7 @@ _sanitize_isoformat_str(PyObject *dtstr)
return NULL;
}
if (PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) {
if (PyUnicode_WriteChar(str_out, surrogate_separator, (Py_UCS4)'T')) {
Py_DECREF(str_out);
return NULL;
}
@ -5214,6 +5321,106 @@ _sanitize_isoformat_str(PyObject *dtstr)
return str_out;
}
static Py_ssize_t
_find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) {
// The valid date formats can all be distinguished by characters 4 and 5
// and further narrowed down by character
// which tells us where to look for the separator character.
// Format | As-rendered | Position
// ---------------------------------------
// %Y-%m-%d | YYYY-MM-DD | 10
// %Y%m%d | YYYYMMDD | 8
// %Y-W%V | YYYY-Www | 8
// %YW%V | YYYYWww | 7
// %Y-W%V-%u | YYYY-Www-d | 10
// %YW%V%u | YYYYWwwd | 8
// %Y-%j | YYYY-DDD | 8
// %Y%j | YYYYDDD | 7
//
// Note that because we allow *any* character for the separator, in the
// case where character 4 is W, it's not straightforward to determine where
// the separator is — in the case of YYYY-Www-d, you have actual ambiguity,
// e.g. 2020-W01-0000 could be YYYY-Www-D0HH or YYYY-Www-HHMM, when the
// separator character is a number in the former case or a hyphen in the
// latter case.
//
// The case of YYYYWww can be distinguished from YYYYWwwd by tracking ahead
// to either the end of the string or the first non-numeric character —
// since the time components all come in pairs YYYYWww#HH can be
// distinguished from YYYYWwwd#HH by the fact that there will always be an
// odd number of digits before the first non-digit character in the former
// case.
static const char date_separator = '-';
static const char week_indicator = 'W';
if (len == 7) {
return 7;
}
if (dtstr[4] == date_separator) {
// YYYY-???
if (dtstr[5] == week_indicator) {
// YYYY-W??
if (len < 8) {
return -1;
}
if (len > 8 && dtstr[8] == date_separator) {
// YYYY-Www-D (10) or YYYY-Www-HH (8)
if (len == 9) { return -1; }
if (len > 10 && is_digit(dtstr[10])) {
// This is as far as we'll try to go to resolve the
// ambiguity for the moment — if we have YYYY-Www-##, the
// separator is either a hyphen at 8 or a number at 10.
//
// We'll assume it's a hyphen at 8 because it's way more
// likely that someone will use a hyphen as a separator
// than a number, but at this point it's really best effort
// because this is an extension of the spec anyway.
return 8;
}
return 10;
} else {
// YYYY-Www (8)
return 8;
}
} else {
// YYYY-MM-DD (10)
return 10;
}
} else {
// YYYY???
if (dtstr[4] == week_indicator) {
// YYYYWww (7) or YYYYWwwd (8)
size_t idx = 7;
for (; idx < (size_t)len; ++idx) {
// Keep going until we run out of digits.
if (!is_digit(dtstr[idx])) {
break;
}
}
if (idx < 9) {
return idx;
}
if (idx % 2 == 0) {
// If the index of the last number is even, it's YYYYWww
return 7;
} else {
return 8;
}
} else {
// YYYYMMDD (8)
return 8;
}
}
}
static PyObject *
datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
{
@ -5225,9 +5432,14 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
return NULL;
}
// We only need to sanitize this string if the separator is a surrogate
// character. In the situation where the separator location is ambiguous,
// we don't have to sanitize it anything because that can only happen when
// the separator is either '-' or a number. This should mostly be a noop
// but it makes the reference counting easier if we still sanitize.
PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr);
if (dtstr_clean == NULL) {
goto error;
goto invalid_string_error;
}
Py_ssize_t len;
@ -5243,30 +5455,35 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
}
}
const Py_ssize_t separator_location = _find_isoformat_datetime_separator(
dt_ptr, len);
const char *p = dt_ptr;
int year = 0, month = 0, day = 0;
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset = 0, tzusec = 0;
// date has a fixed length of 10
int rv = parse_isoformat_date(p, &year, &month, &day);
// date runs up to separator_location
int rv = parse_isoformat_date(p, separator_location, &year, &month, &day);
if (!rv && len > 10) {
if (!rv && len > separator_location) {
// In UTF-8, the length of multi-byte characters is encoded in the MSB
if ((p[10] & 0x80) == 0) {
p += 11;
p += separator_location;
if ((p[0] & 0x80) == 0) {
p += 1;
}
else {
switch (p[10] & 0xf0) {
switch (p[0] & 0xf0) {
case 0xe0:
p += 13;
p += 3;
break;
case 0xf0:
p += 14;
p += 4;
break;
default:
p += 12;
p += 2;
break;
}
}
@ -6327,7 +6544,7 @@ static PyMethodDef datetime_methods[] = {
{"fromisoformat", (PyCFunction)datetime_fromisoformat,
METH_O | METH_CLASS,
PyDoc_STR("string -> datetime from datetime.isoformat() output")},
PyDoc_STR("string -> datetime from a string in most ISO 8601 formats")},
/* Instance methods: */