mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-27 13:54:19 +08:00
libstdc++: Fix std::chrono::tzdb to work with vanguard format
I found some issues in the std::chrono::tzdb parser by testing the tzdata "vanguard" format, which uses new features that aren't enabled in the "main" and "rearguard" data formats. Since 2024a the keyword "minimum" is no longer valid for the FROM and TO fields in a Rule line, which means that "m" is now a valid abbreviation for "maximum". Previously we expected either "mi" or "ma". For backwards compatibility, a FROM field beginning with "mi" is still supported and is treated as 1900. The "maximum" keyword is only allowed in TO now, because it makes no sense in FROM. To support these changes the minmax_year and minmax_year2 classes for parsing FROM and TO are replaced with a single years_from_to class that reads both fields. The vanguard format makes use of %z in Zone FORMAT fields, which caused an exception to be thrown from ZoneInfo::set_abbrev because no % or / characters were expected when a Zone doesn't use a named Rule. The ZoneInfo::to(sys_info&) function now uses format_abbrev_str to replace any %z with the current offset. Although format_abbrev_str also checks for %s and STD/DST formats, those only make sense when a named Rule is in effect, so won't occur when ZoneInfo::to(sys_info&) is used. This change also implements a feature that has always been missing from time_zone::_M_get_sys_info: finding the Rule that is active before the specified time point, so that we can correctly handle %s in the FORMAT for the first new sys_info that gets created. This requires implementing a poorly documented feature of zic, to get the LETTERS field from a later transition, as described at https://mm.icann.org/pipermail/tz/2024-April/058891.html In order for this to work we need to be able to distinguish an empty letters field (as used by CE%sT where the variable part is either empty or "S") from "the letters field is not known for this transition". The tzdata file uses "-" for an empty letters field, which libstdc++ was previously replacing with "" when the Rule was parsed. Instead, we now preserve the "-" in the Rule object, so that "" can be used for the case where we don't know the letters (and so need to decide it). libstdc++-v3/ChangeLog: * src/c++20/tzdb.cc (minmax_year, minmax_year2): Remove. (years_from_to): New class replacing minmax_year and minmax_year2. (format_abbrev_str, select_std_or_dst_abbrev): Move earlier in the file. Handle "-" for letters. (ZoneInfo::to): Use format_abbrev_str to expand %z. (ZoneInfo::set_abbrev): Remove exception. Change parameter from reference to value. (operator>>(istream&, Rule&)): Do not clear letters when it contains "-". (time_zone::_M_get_sys_info): Add missing logic to find the Rule in effect before the time point. * testsuite/std/time/tzdb/1.cc: Adjust for vanguard format using "GMT" as the Zone name, not as a Link to "Etc/GMT". * testsuite/std/time/time_zone/sys_info_abbrev.cc: New test.
This commit is contained in:
parent
629257bcb8
commit
0ca8d56f20
@ -342,51 +342,103 @@ namespace std::chrono
|
||||
friend istream& operator>>(istream&, on_day&);
|
||||
};
|
||||
|
||||
// Wrapper for chrono::year that reads a year, or one of the keywords
|
||||
// "minimum" or "maximum", or an unambiguous prefix of a keyword.
|
||||
struct minmax_year
|
||||
// Wrapper for two chrono::year values, which reads the FROM and TO
|
||||
// fields of a Rule line. The FROM field is a year and TO is a year or
|
||||
// one of the keywords "maximum" or "only" (or an abbreviation of those).
|
||||
// For backwards compatibility, the keyword "minimum" is recognized
|
||||
// for FROM and interpreted as 1900.
|
||||
struct years_from_to
|
||||
{
|
||||
year& y;
|
||||
year& from;
|
||||
year& to;
|
||||
|
||||
friend istream& operator>>(istream& in, minmax_year&& y)
|
||||
friend istream& operator>>(istream& in, years_from_to&& yy)
|
||||
{
|
||||
if (ws(in).peek() == 'm') // keywords "minimum" or "maximum"
|
||||
string s;
|
||||
auto c = ws(in).peek();
|
||||
if (c == 'm') [[unlikely]] // keyword "minimum"
|
||||
{
|
||||
string s;
|
||||
in >> s; // extract the rest of the word, but only look at s[1]
|
||||
if (s[1] == 'a')
|
||||
y.y = year::max();
|
||||
else if (s[1] == 'i')
|
||||
y.y = year::min();
|
||||
else
|
||||
in.setstate(ios::failbit);
|
||||
in >> s; // extract the rest of the word
|
||||
yy.from = year(1900);
|
||||
}
|
||||
else if (int num = 0; in >> num) [[likely]]
|
||||
yy.from = year{num};
|
||||
|
||||
c = ws(in).peek();
|
||||
if (c == 'm') // keyword "maximum"
|
||||
{
|
||||
in >> s; // extract the rest of the word
|
||||
yy.to = year::max();
|
||||
}
|
||||
else if (c == 'o') // keyword "only"
|
||||
{
|
||||
in >> s; // extract the rest of the word
|
||||
yy.to = yy.from;
|
||||
}
|
||||
else if (int num = 0; in >> num)
|
||||
y.y = year{num};
|
||||
yy.to = year{num};
|
||||
|
||||
return in;
|
||||
}
|
||||
};
|
||||
|
||||
// As above for minmax_year, but also supports the keyword "only",
|
||||
// meaning that the TO year is the same as the FROM year.
|
||||
struct minmax_year2
|
||||
bool
|
||||
select_std_or_dst_abbrev(string& abbrev, minutes save)
|
||||
{
|
||||
minmax_year to;
|
||||
year from;
|
||||
if (size_t pos = abbrev.find('/'); pos != string::npos)
|
||||
{
|
||||
// Select one of "STD/DST" for standard or daylight.
|
||||
if (save == 0min)
|
||||
abbrev.erase(pos);
|
||||
else
|
||||
abbrev.erase(0, pos + 1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
friend istream& operator>>(istream& in, minmax_year2&& y)
|
||||
{
|
||||
if (ws(in).peek() == 'o') // keyword "only"
|
||||
{
|
||||
string s;
|
||||
in >> s; // extract the whole keyword
|
||||
y.to.y = y.from;
|
||||
}
|
||||
else
|
||||
in >> std::move(y.to);
|
||||
return in;
|
||||
}
|
||||
};
|
||||
// Set the sys_info::abbrev string by expanding any placeholders.
|
||||
void
|
||||
format_abbrev_str(sys_info& info, string_view letters = {})
|
||||
{
|
||||
if (size_t pos = info.abbrev.find('%'); pos != string::npos)
|
||||
{
|
||||
if (info.abbrev[pos + 1] == 's')
|
||||
{
|
||||
// Expand "%s" to the variable part, given by Rule::letters.
|
||||
if (letters == "-")
|
||||
info.abbrev.erase(pos, 2);
|
||||
else
|
||||
info.abbrev.replace(pos, 2, letters);
|
||||
}
|
||||
else if (info.abbrev[pos + 1] == 'z')
|
||||
{
|
||||
// Expand "%z" to the UT offset as +/-hh, +/-hhmm, or +/-hhmmss.
|
||||
hh_mm_ss<seconds> t(info.offset);
|
||||
string z(1, "+-"[t.is_negative()]);
|
||||
long val = t.hours().count();
|
||||
int digits = 2;
|
||||
if (int m = t.minutes().count())
|
||||
{
|
||||
digits = 4;
|
||||
val *= 100;
|
||||
val += m;
|
||||
if (int s = t.seconds().count())
|
||||
{
|
||||
digits = 6;
|
||||
val *= 100;
|
||||
val += s;
|
||||
}
|
||||
}
|
||||
auto sval = std::to_string(val);
|
||||
z += string(digits - sval.size(), '0');
|
||||
z += sval;
|
||||
info.abbrev.replace(pos, 2, z);
|
||||
}
|
||||
}
|
||||
else
|
||||
select_std_or_dst_abbrev(info.abbrev, info.save);
|
||||
}
|
||||
|
||||
// A time zone information record.
|
||||
// Zone NAME STDOFF RULES FORMAT [UNTIL]
|
||||
@ -462,6 +514,7 @@ namespace std::chrono
|
||||
info.offset = offset();
|
||||
info.save = minutes(m_save);
|
||||
info.abbrev = format();
|
||||
format_abbrev_str(info); // expand %z
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -469,12 +522,9 @@ namespace std::chrono
|
||||
friend class time_zone;
|
||||
|
||||
void
|
||||
set_abbrev(const string& abbrev)
|
||||
set_abbrev(string abbrev)
|
||||
{
|
||||
// In practice, the FORMAT field never needs expanding here.
|
||||
if (abbrev.find_first_of("/%") != abbrev.npos)
|
||||
__throw_runtime_error("std::chrono::time_zone: invalid data");
|
||||
m_buf = abbrev;
|
||||
m_buf = std::move(abbrev);
|
||||
m_pos = 0;
|
||||
m_expanded = true;
|
||||
}
|
||||
@ -544,9 +594,7 @@ namespace std::chrono
|
||||
|
||||
// Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
|
||||
|
||||
in >> quoted(rule.name)
|
||||
>> minmax_year{rule.from}
|
||||
>> minmax_year2{rule.to, rule.from};
|
||||
in >> quoted(rule.name) >> years_from_to{rule.from, rule.to};
|
||||
|
||||
if (char type; in >> type && type != '-')
|
||||
in.setstate(ios::failbit);
|
||||
@ -557,7 +605,7 @@ namespace std::chrono
|
||||
if (save_time.indicator != at_time::Wall)
|
||||
{
|
||||
// We don't actually store the save_time.indicator, because we
|
||||
// assume that it's always deducable from the actual offset value.
|
||||
// assume that it's always deducible from the offset value.
|
||||
auto expected = save_time.time == 0s
|
||||
? at_time::Standard
|
||||
: at_time::Daylight;
|
||||
@ -567,8 +615,6 @@ namespace std::chrono
|
||||
rule.save = save_time.time;
|
||||
|
||||
in >> rule.letters;
|
||||
if (rule.letters == "-")
|
||||
rule.letters.clear();
|
||||
return in;
|
||||
}
|
||||
|
||||
@ -719,58 +765,6 @@ namespace std::chrono
|
||||
#endif // TZDB_DISABLED
|
||||
};
|
||||
|
||||
#ifndef TZDB_DISABLED
|
||||
namespace
|
||||
{
|
||||
bool
|
||||
select_std_or_dst_abbrev(string& abbrev, minutes save)
|
||||
{
|
||||
if (size_t pos = abbrev.find('/'); pos != string::npos)
|
||||
{
|
||||
// Select one of "STD/DST" for standard or daylight.
|
||||
if (save == 0min)
|
||||
abbrev.erase(pos);
|
||||
else
|
||||
abbrev.erase(0, pos + 1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the sys_info::abbrev string by expanding any placeholders.
|
||||
void
|
||||
format_abbrev_str(sys_info& info, string_view letters = {})
|
||||
{
|
||||
if (size_t pos = info.abbrev.find("%s"); pos != string::npos)
|
||||
{
|
||||
// Expand "%s" to the variable part, given by Rule::letters.
|
||||
info.abbrev.replace(pos, 2, letters);
|
||||
}
|
||||
else if (size_t pos = info.abbrev.find("%z"); pos != string::npos)
|
||||
{
|
||||
// Expand "%z" to the UT offset as +/-hh, +/-hhmm, or +/-hhmmss.
|
||||
hh_mm_ss<seconds> t(info.offset);
|
||||
string z(1, "+-"[t.is_negative()]);
|
||||
long val = t.hours().count();
|
||||
if (minutes m = t.minutes(); m != m.zero())
|
||||
{
|
||||
val *= 100;
|
||||
val += m.count();
|
||||
if (seconds s = t.seconds(); s != s.zero())
|
||||
{
|
||||
val *= 100;
|
||||
val += s.count();
|
||||
}
|
||||
}
|
||||
z += std::to_string(val);
|
||||
info.abbrev.replace(pos, 2, z);
|
||||
}
|
||||
else
|
||||
select_std_or_dst_abbrev(info.abbrev, info.save);
|
||||
}
|
||||
}
|
||||
#endif // TZDB_DISABLED
|
||||
|
||||
// Implementation of std::chrono::time_zone::get_info(const sys_time<D>&)
|
||||
sys_info
|
||||
time_zone::_M_get_sys_info(sys_seconds tp) const
|
||||
@ -839,12 +833,72 @@ namespace std::chrono
|
||||
info.abbrev = ri.format();
|
||||
|
||||
string_view letters;
|
||||
if (i != infos.begin())
|
||||
if (i != infos.begin() && i[-1].expanded())
|
||||
letters = i[-1].next_letters();
|
||||
|
||||
if (letters.empty())
|
||||
{
|
||||
if (i[-1].expanded())
|
||||
letters = i[-1].next_letters();
|
||||
// XXX else need to find Rule active before this time and use it
|
||||
// to know the initial offset, save, and letters.
|
||||
sys_seconds t = info.begin - seconds(1);
|
||||
const year_month_day date(chrono::floor<days>(t));
|
||||
|
||||
// Try to find a Rule active before this time, to get initial
|
||||
// SAVE and LETTERS values. There may not be a Rule for the period
|
||||
// before the first DST transition, so find the earliest DST->STD
|
||||
// transition and use the LETTERS from that.
|
||||
const Rule* active_rule = nullptr;
|
||||
sys_seconds active_rule_start = sys_seconds::min();
|
||||
const Rule* first_std = nullptr;
|
||||
for (const auto& rule : rules)
|
||||
{
|
||||
if (rule.save == minutes(0))
|
||||
{
|
||||
if (!first_std)
|
||||
first_std = &rule;
|
||||
else if (rule.from < first_std->from)
|
||||
first_std = &rule;
|
||||
else if (rule.from == first_std->from)
|
||||
{
|
||||
if (rule.start_time(rule.from, {})
|
||||
< first_std->start_time(first_std->from, {}))
|
||||
first_std = &rule;
|
||||
}
|
||||
}
|
||||
|
||||
year y = date.year();
|
||||
|
||||
if (y > rule.to) // rule no longer applies at time t
|
||||
continue;
|
||||
if (y < rule.from) // rule doesn't apply yet at time t
|
||||
continue;
|
||||
|
||||
sys_seconds rule_start;
|
||||
|
||||
seconds offset{}; // appropriate for at_time::Universal
|
||||
if (rule.when.indicator == at_time::Wall)
|
||||
offset = info.offset;
|
||||
else if (rule.when.indicator == at_time::Standard)
|
||||
offset = ri.offset();
|
||||
|
||||
// Time the rule takes effect this year:
|
||||
rule_start = rule.start_time(y, offset);
|
||||
|
||||
if (rule_start >= t && rule.from < y)
|
||||
{
|
||||
// Try this rule in the previous year.
|
||||
rule_start = rule.start_time(--y, offset);
|
||||
}
|
||||
|
||||
if (active_rule_start < rule_start && rule_start < t)
|
||||
{
|
||||
active_rule_start = rule_start;
|
||||
active_rule = &rule;
|
||||
}
|
||||
}
|
||||
|
||||
if (active_rule)
|
||||
letters = active_rule->letters;
|
||||
else if (first_std)
|
||||
letters = first_std->letters;
|
||||
}
|
||||
|
||||
const Rule* curr_rule = nullptr;
|
||||
@ -2069,9 +2123,11 @@ namespace std::chrono
|
||||
istringstream in2(std::move(rules));
|
||||
in2 >> rules_time;
|
||||
inf.m_save = duration_cast<minutes>(rules_time.time);
|
||||
// If the FORMAT is "STD/DST" then we can choose the right one
|
||||
// now, so that we store a shorter string.
|
||||
select_std_or_dst_abbrev(fmt, inf.m_save);
|
||||
}
|
||||
inf.set_abbrev(fmt);
|
||||
inf.set_abbrev(std::move(fmt));
|
||||
}
|
||||
|
||||
// YEAR [MONTH [DAY [TIME]]]
|
||||
@ -2082,7 +2138,12 @@ namespace std::chrono
|
||||
abbrev_month m{January};
|
||||
int d = 1;
|
||||
at_time t{};
|
||||
// XXX DAY should support ON format, e.g. lastSun or Sun>=8
|
||||
in >> m >> d >> t;
|
||||
// XXX UNTIL field should be interpreted
|
||||
// "using the rules in effect just before the transition"
|
||||
// so might need to store as year_month_day and hh_mm_ss and only
|
||||
// convert to a sys_time once we know the offset in effect.
|
||||
inf.m_until = sys_days(year(y)/m.m/day(d)) + seconds(t.time);
|
||||
}
|
||||
else
|
||||
|
106
libstdc++-v3/testsuite/std/time/time_zone/sys_info_abbrev.cc
Normal file
106
libstdc++-v3/testsuite/std/time/time_zone/sys_info_abbrev.cc
Normal file
@ -0,0 +1,106 @@
|
||||
// { dg-do run { target c++20 } }
|
||||
// { dg-require-effective-target tzdb }
|
||||
// { dg-require-effective-target cxx11_abi }
|
||||
// { dg-xfail-run-if "no weak override on AIX" { powerpc-ibm-aix* } }
|
||||
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
static bool override_used = false;
|
||||
|
||||
namespace __gnu_cxx
|
||||
{
|
||||
const char* zoneinfo_dir_override() {
|
||||
override_used = true;
|
||||
return "./";
|
||||
}
|
||||
}
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
void
|
||||
test_format()
|
||||
{
|
||||
std::ofstream("tzdata.zi") << R"(# version test_1
|
||||
Zone Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
|
||||
-1 - %z 1975
|
||||
0 - GMT
|
||||
Zon Some/Zone 1:2:3 - %z 1900
|
||||
1:23:45 - %z 1950
|
||||
Zo Another/Zone 1:2:3 - AZ0 1901
|
||||
1 Roolz A%sZ 2000
|
||||
1 Roolz SAZ/DAZ 2005
|
||||
1 Roolz %z
|
||||
Rule Roolz 1950 max - April 1 2 1 D
|
||||
Rul Roolz 1950 max - Oct 1 1 0 S
|
||||
Z Strange/Zone 1 - X%sX 1980
|
||||
1 - FOO/BAR 1990
|
||||
2:00 - %zzz 1995
|
||||
0:9 - %zzz 1996
|
||||
0:8:7 - %zzz 1997
|
||||
0:6:5.5 - %zzz 1998
|
||||
)";
|
||||
|
||||
const auto& db = reload_tzdb();
|
||||
VERIFY( override_used ); // If this fails then XFAIL for the target.
|
||||
VERIFY( db.version == "test_1" );
|
||||
|
||||
// Test formatting %z as
|
||||
auto tz = locate_zone("Africa/Bissau");
|
||||
auto inf = tz->get_info(sys_days(1974y/1/1));
|
||||
VERIFY( inf.abbrev == "-01" );
|
||||
|
||||
tz = locate_zone("Some/Zone");
|
||||
inf = tz->get_info(sys_days(1899y/1/1));
|
||||
VERIFY( inf.abbrev == "+010203" );
|
||||
inf = tz->get_info(sys_days(1955y/1/1));
|
||||
VERIFY( inf.abbrev == "+012345" );
|
||||
|
||||
tz = locate_zone("Another/Zone");
|
||||
// Test formatting %s as the LETTER/S field from the active Rule.
|
||||
inf = tz->get_info(sys_days(1910y/January/1));
|
||||
VERIFY( inf.abbrev == "ASZ" );
|
||||
inf = tz->get_info(sys_days(1950y/January/1));
|
||||
VERIFY( inf.abbrev == "ASZ" );
|
||||
inf = tz->get_info(sys_days(1950y/June/1));
|
||||
VERIFY( inf.abbrev == "ADZ" );
|
||||
inf = tz->get_info(sys_days(1999y/January/1));
|
||||
VERIFY( inf.abbrev == "ASZ" );
|
||||
inf = tz->get_info(sys_days(1999y/July/1));
|
||||
VERIFY( inf.abbrev == "ADZ" );
|
||||
// Test formatting STD/DST according to the active Rule.
|
||||
inf = tz->get_info(sys_days(2000y/January/2));
|
||||
VERIFY( inf.abbrev == "SAZ" );
|
||||
inf = tz->get_info(sys_days(2001y/January/1));
|
||||
VERIFY( inf.abbrev == "SAZ" );
|
||||
inf = tz->get_info(sys_days(2001y/July/1));
|
||||
VERIFY( inf.abbrev == "DAZ" );
|
||||
// Test formatting %z as the offset determined by the active Rule.
|
||||
inf = tz->get_info(sys_days(2005y/January/2));
|
||||
VERIFY( inf.abbrev == "+01" );
|
||||
inf = tz->get_info(sys_days(2006y/January/1));
|
||||
VERIFY( inf.abbrev == "+01" );
|
||||
inf = tz->get_info(sys_days(2006y/July/1));
|
||||
VERIFY( inf.abbrev == "+02" );
|
||||
|
||||
// Test formatting %z, %s and S/D for a Zone with no associated Rules.
|
||||
tz = locate_zone("Strange/Zone");
|
||||
inf = tz->get_info(sys_days(1979y/January/1));
|
||||
VERIFY( inf.abbrev == "XX" ); // No Rule means nothing to use for %s.
|
||||
inf = tz->get_info(sys_days(1981y/July/1));
|
||||
VERIFY( inf.abbrev == "FOO" ); // Always standard time means first string.
|
||||
inf = tz->get_info(sys_days(1994y/July/1));
|
||||
VERIFY( inf.abbrev == "+02zz" );
|
||||
inf = tz->get_info(sys_days(1995y/July/1));
|
||||
VERIFY( inf.abbrev == "+0009zz" );
|
||||
inf = tz->get_info(sys_days(1996y/July/1));
|
||||
VERIFY( inf.abbrev == "+000807zz" );
|
||||
inf = tz->get_info(sys_days(1997y/July/1));
|
||||
VERIFY( inf.abbrev == "+000606zz" );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_format();
|
||||
}
|
@ -39,11 +39,15 @@ test_locate()
|
||||
const tzdb& db = get_tzdb();
|
||||
const time_zone* tz = db.locate_zone("GMT");
|
||||
VERIFY( tz != nullptr );
|
||||
VERIFY( tz->name() == "Etc/GMT" );
|
||||
VERIFY( tz == std::chrono::locate_zone("GMT") );
|
||||
VERIFY( tz == db.locate_zone("Etc/GMT") );
|
||||
VERIFY( tz == db.locate_zone("Etc/GMT+0") );
|
||||
|
||||
// Since 2022f GMT is now a Zone and Etc/GMT a link instead of vice versa,
|
||||
// but only when using the vanguard format. As of 2024a, the main and
|
||||
// rearguard formats still have Etc/GMT as a Zone and GMT as a link.
|
||||
VERIFY( tz->name() == "GMT" || tz->name() == "Etc/GMT" );
|
||||
|
||||
VERIFY( db.locate_zone(db.current_zone()->name()) == db.current_zone() );
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user