mirror of
https://github.com/python/cpython.git
synced 2024-11-23 01:45:25 +08:00
gh-126700: pygettext: Support more gettext functions (GH-126912)
Support multi-argument gettext functions: ngettext(), pgettext(), dgettext(), etc.
This commit is contained in:
parent
f83ca6962a
commit
0a1944cda8
@ -15,53 +15,75 @@ msgstr ""
|
||||
"Generated-By: pygettext.py 1.5\n"
|
||||
|
||||
|
||||
#: messages.py:5
|
||||
#: messages.py:16
|
||||
msgid ""
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:8 messages.py:9
|
||||
#: messages.py:19 messages.py:20
|
||||
msgid "parentheses"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:12
|
||||
#: messages.py:23
|
||||
msgid "Hello, world!"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:15
|
||||
#: messages.py:26
|
||||
msgid ""
|
||||
"Hello,\n"
|
||||
" multiline!\n"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:29
|
||||
#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
|
||||
#: messages.py:99
|
||||
msgid "foo"
|
||||
msgid_plural "foos"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: messages.py:47
|
||||
msgid "something"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:50
|
||||
msgid "Hello, {}!"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:33
|
||||
#: messages.py:54
|
||||
msgid "1"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:33
|
||||
#: messages.py:54
|
||||
msgid "2"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:34 messages.py:35
|
||||
#: messages.py:55 messages.py:56
|
||||
msgid "A"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:34 messages.py:35
|
||||
#: messages.py:55 messages.py:56
|
||||
msgid "B"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:36
|
||||
#: messages.py:57
|
||||
msgid "set"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:42
|
||||
#: messages.py:63
|
||||
msgid "nested string"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:47
|
||||
#: messages.py:68
|
||||
msgid "baz"
|
||||
msgstr ""
|
||||
|
||||
#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
|
||||
msgctxt "context"
|
||||
msgid "foo"
|
||||
msgid_plural "foos"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: messages.py:100
|
||||
msgid "domain foo"
|
||||
msgstr ""
|
||||
|
||||
|
@ -1,5 +1,16 @@
|
||||
# Test message extraction
|
||||
from gettext import gettext as _
|
||||
from gettext import (
|
||||
gettext,
|
||||
ngettext,
|
||||
pgettext,
|
||||
npgettext,
|
||||
dgettext,
|
||||
dngettext,
|
||||
dpgettext,
|
||||
dnpgettext
|
||||
)
|
||||
|
||||
_ = gettext
|
||||
|
||||
# Empty string
|
||||
_("")
|
||||
@ -21,13 +32,23 @@ _()
|
||||
_(None)
|
||||
_(1)
|
||||
_(False)
|
||||
_(x="kwargs are not allowed")
|
||||
_(("invalid"))
|
||||
_(["invalid"])
|
||||
_({"invalid"})
|
||||
_("string"[3])
|
||||
_("string"[:3])
|
||||
_({"string": "foo"})
|
||||
|
||||
# pygettext does not allow keyword arguments, but both xgettext and pybabel do
|
||||
_(x="kwargs work!")
|
||||
|
||||
# Unusual, but valid arguments
|
||||
_("foo", "bar")
|
||||
_("something", x="something else")
|
||||
|
||||
# .format()
|
||||
_("Hello, {}!").format("world") # valid
|
||||
_("Hello, {}!".format("world")) # invalid
|
||||
_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string
|
||||
|
||||
# Nested structures
|
||||
_("1"), _("2")
|
||||
@ -62,3 +83,28 @@ def _(x):
|
||||
|
||||
def _(x="don't extract me"):
|
||||
pass
|
||||
|
||||
|
||||
# Other gettext functions
|
||||
gettext("foo")
|
||||
ngettext("foo", "foos", 1)
|
||||
pgettext("context", "foo")
|
||||
npgettext("context", "foo", "foos", 1)
|
||||
dgettext("domain", "foo")
|
||||
dngettext("domain", "foo", "foos", 1)
|
||||
dpgettext("domain", "context", "foo")
|
||||
dnpgettext("domain", "context", "foo", "foos", 1)
|
||||
|
||||
# Complex arguments
|
||||
ngettext("foo", "foos", 42 + (10 - 20))
|
||||
dgettext(["some", {"complex"}, ("argument",)], "domain foo")
|
||||
|
||||
# Invalid calls which are not extracted
|
||||
gettext()
|
||||
ngettext('foo')
|
||||
pgettext('context')
|
||||
npgettext('context', 'foo')
|
||||
dgettext('domain')
|
||||
dngettext('domain', 'foo')
|
||||
dpgettext('domain', 'context')
|
||||
dnpgettext('domain', 'context', 'foo')
|
||||
|
@ -332,14 +332,14 @@ class Test_pygettext(unittest.TestCase):
|
||||
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||
f"{_('foo', 'bar')}"
|
||||
'''))
|
||||
self.assertNotIn('foo', msgids)
|
||||
self.assertIn('foo', msgids)
|
||||
self.assertNotIn('bar', msgids)
|
||||
|
||||
def test_calls_in_fstring_with_keyword_args(self):
|
||||
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||
f"{_('foo', bar='baz')}"
|
||||
'''))
|
||||
self.assertNotIn('foo', msgids)
|
||||
self.assertIn('foo', msgids)
|
||||
self.assertNotIn('bar', msgids)
|
||||
self.assertNotIn('baz', msgids)
|
||||
|
||||
|
@ -8,6 +8,8 @@ argument %(argument_name)s: %(message)s
|
||||
argument '%(argument_name)s' is deprecated
|
||||
can't open '%(filename)s': %(error)s
|
||||
command '%(parser_name)s' is deprecated
|
||||
conflicting option string: %s
|
||||
expected %s argument
|
||||
expected at least one argument
|
||||
expected at most one argument
|
||||
expected one argument
|
||||
|
@ -1,3 +1,4 @@
|
||||
%(option)s option requires %(number)d argument
|
||||
%prog [options]
|
||||
%s option does not take a value
|
||||
Options
|
||||
|
@ -0,0 +1 @@
|
||||
Add support for multi-argument :mod:`gettext` functions in :program:`pygettext.py`.
|
@ -163,16 +163,13 @@ import glob
|
||||
import time
|
||||
import getopt
|
||||
import ast
|
||||
import token
|
||||
import tokenize
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from operator import itemgetter
|
||||
|
||||
__version__ = '1.5'
|
||||
|
||||
default_keywords = ['_']
|
||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||
|
||||
EMPTYSTRING = ''
|
||||
|
||||
|
||||
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
||||
# there.
|
||||
@ -306,12 +303,64 @@ def getFilesForName(name):
|
||||
return []
|
||||
|
||||
|
||||
# Key is the function name, value is a dictionary mapping argument positions to the
|
||||
# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'.
|
||||
DEFAULTKEYWORDS = {
|
||||
'_': {0: 'msgid'},
|
||||
'gettext': {0: 'msgid'},
|
||||
'ngettext': {0: 'msgid', 1: 'msgid_plural'},
|
||||
'pgettext': {0: 'msgctxt', 1: 'msgid'},
|
||||
'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'},
|
||||
'dgettext': {1: 'msgid'},
|
||||
'dngettext': {1: 'msgid', 2: 'msgid_plural'},
|
||||
'dpgettext': {1: 'msgctxt', 2: 'msgid'},
|
||||
'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'},
|
||||
}
|
||||
|
||||
|
||||
def matches_spec(message, spec):
|
||||
"""Check if a message has all the keys defined by the keyword spec."""
|
||||
return all(key in message for key in spec.values())
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Location:
|
||||
filename: str
|
||||
lineno: int
|
||||
|
||||
def __lt__(self, other):
|
||||
return (self.filename, self.lineno) < (other.filename, other.lineno)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Message:
|
||||
msgid: str
|
||||
msgid_plural: str | None
|
||||
msgctxt: str | None
|
||||
locations: set[Location] = field(default_factory=set)
|
||||
is_docstring: bool = False
|
||||
|
||||
def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False):
|
||||
if self.msgid_plural is None:
|
||||
self.msgid_plural = msgid_plural
|
||||
self.locations.add(Location(filename, lineno))
|
||||
self.is_docstring |= is_docstring
|
||||
|
||||
|
||||
def key_for(msgid, msgctxt=None):
|
||||
if msgctxt is not None:
|
||||
return (msgctxt, msgid)
|
||||
return msgid
|
||||
|
||||
|
||||
class TokenEater:
|
||||
def __init__(self, options):
|
||||
self.__options = options
|
||||
self.__messages = {}
|
||||
self.__state = self.__waiting
|
||||
self.__data = []
|
||||
self.__data = defaultdict(str)
|
||||
self.__curr_arg = 0
|
||||
self.__curr_keyword = None
|
||||
self.__lineno = -1
|
||||
self.__freshmodule = 1
|
||||
self.__curfile = None
|
||||
@ -331,7 +380,7 @@ class TokenEater:
|
||||
# module docstring?
|
||||
if self.__freshmodule:
|
||||
if ttype == tokenize.STRING and is_literal_string(tstring):
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True)
|
||||
self.__freshmodule = 0
|
||||
return
|
||||
if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
|
||||
@ -346,6 +395,7 @@ class TokenEater:
|
||||
return
|
||||
if ttype == tokenize.NAME and tstring in opts.keywords:
|
||||
self.__state = self.__keywordseen
|
||||
self.__curr_keyword = tstring
|
||||
return
|
||||
if ttype == tokenize.STRING:
|
||||
maybe_fstring = ast.parse(tstring, mode='eval').body
|
||||
@ -397,7 +447,8 @@ class TokenEater:
|
||||
}, file=sys.stderr)
|
||||
continue
|
||||
if isinstance(arg.value, str):
|
||||
self.__addentry(arg.value, lineno)
|
||||
self.__curr_keyword = func_name
|
||||
self.__addentry({'msgid': arg.value}, lineno)
|
||||
|
||||
def __suiteseen(self, ttype, tstring, lineno):
|
||||
# skip over any enclosure pairs until we see the colon
|
||||
@ -413,7 +464,7 @@ class TokenEater:
|
||||
def __suitedocstring(self, ttype, tstring, lineno):
|
||||
# ignore any intervening noise
|
||||
if ttype == tokenize.STRING and is_literal_string(tstring):
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True)
|
||||
self.__state = self.__waiting
|
||||
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
||||
tokenize.COMMENT):
|
||||
@ -422,44 +473,90 @@ class TokenEater:
|
||||
|
||||
def __keywordseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == '(':
|
||||
self.__data = []
|
||||
self.__data.clear()
|
||||
self.__curr_arg = 0
|
||||
self.__enclosurecount = 0
|
||||
self.__lineno = lineno
|
||||
self.__state = self.__openseen
|
||||
else:
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __openseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == ')':
|
||||
# We've seen the last of the translatable strings. Record the
|
||||
# line number of the first line of the strings and update the list
|
||||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
self.__addentry(EMPTYSTRING.join(self.__data))
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING and is_literal_string(tstring):
|
||||
self.__data.append(safe_eval(tstring))
|
||||
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||
token.NEWLINE, tokenize.NL]:
|
||||
# warn if we see anything else than STRING or whitespace
|
||||
print(_(
|
||||
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
||||
) % {
|
||||
'token': tstring,
|
||||
'file': self.__curfile,
|
||||
'lineno': self.__lineno
|
||||
}, file=sys.stderr)
|
||||
self.__state = self.__waiting
|
||||
spec = self.__options.keywords[self.__curr_keyword]
|
||||
arg_type = spec.get(self.__curr_arg)
|
||||
expect_string_literal = arg_type is not None
|
||||
|
||||
if ttype == tokenize.OP and self.__enclosurecount == 0:
|
||||
if tstring == ')':
|
||||
# We've seen the last of the translatable strings. Record the
|
||||
# line number of the first line of the strings and update the list
|
||||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
self.__addentry(self.__data)
|
||||
self.__state = self.__waiting
|
||||
return
|
||||
elif tstring == ',':
|
||||
# Advance to the next argument
|
||||
self.__curr_arg += 1
|
||||
return
|
||||
|
||||
if expect_string_literal:
|
||||
if ttype == tokenize.STRING and is_literal_string(tstring):
|
||||
self.__data[arg_type] += safe_eval(tstring)
|
||||
elif ttype not in (tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
|
||||
tokenize.NEWLINE, tokenize.NL):
|
||||
# We are inside an argument which is a translatable string and
|
||||
# we encountered a token that is not a string. This is an error.
|
||||
self.warn_unexpected_token(tstring)
|
||||
self.__enclosurecount = 0
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.OP:
|
||||
if tstring in '([{':
|
||||
self.__enclosurecount += 1
|
||||
elif tstring in ')]}':
|
||||
self.__enclosurecount -= 1
|
||||
|
||||
def __ignorenext(self, ttype, tstring, lineno):
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __addentry(self, msg, lineno=None, isdocstring=0):
|
||||
def __addentry(self, msg, lineno=None, *, is_docstring=False):
|
||||
msgid = msg.get('msgid')
|
||||
if msgid in self.__options.toexclude:
|
||||
return
|
||||
if not is_docstring:
|
||||
spec = self.__options.keywords[self.__curr_keyword]
|
||||
if not matches_spec(msg, spec):
|
||||
return
|
||||
if lineno is None:
|
||||
lineno = self.__lineno
|
||||
if not msg in self.__options.toexclude:
|
||||
entry = (self.__curfile, lineno)
|
||||
self.__messages.setdefault(msg, {})[entry] = isdocstring
|
||||
msgctxt = msg.get('msgctxt')
|
||||
msgid_plural = msg.get('msgid_plural')
|
||||
key = key_for(msgid, msgctxt)
|
||||
if key in self.__messages:
|
||||
self.__messages[key].add_location(
|
||||
self.__curfile,
|
||||
lineno,
|
||||
msgid_plural,
|
||||
is_docstring=is_docstring,
|
||||
)
|
||||
else:
|
||||
self.__messages[key] = Message(
|
||||
msgid=msgid,
|
||||
msgid_plural=msgid_plural,
|
||||
msgctxt=msgctxt,
|
||||
locations={Location(self.__curfile, lineno)},
|
||||
is_docstring=is_docstring,
|
||||
)
|
||||
|
||||
def warn_unexpected_token(self, token):
|
||||
print(_(
|
||||
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
||||
) % {
|
||||
'token': token,
|
||||
'file': self.__curfile,
|
||||
'lineno': self.__lineno
|
||||
}, file=sys.stderr)
|
||||
|
||||
def set_filename(self, filename):
|
||||
self.__curfile = filename
|
||||
@ -472,55 +569,54 @@ class TokenEater:
|
||||
print(pot_header % {'time': timestamp, 'version': __version__,
|
||||
'charset': encoding,
|
||||
'encoding': '8bit'}, file=fp)
|
||||
# Sort the entries. First sort each particular entry's keys, then
|
||||
# sort all the entries by their first item.
|
||||
reverse = {}
|
||||
for k, v in self.__messages.items():
|
||||
keys = sorted(v.keys())
|
||||
reverse.setdefault(tuple(keys), []).append((k, v))
|
||||
rkeys = sorted(reverse.keys())
|
||||
for rkey in rkeys:
|
||||
rentries = reverse[rkey]
|
||||
rentries.sort()
|
||||
for k, v in rentries:
|
||||
# If the entry was gleaned out of a docstring, then add a
|
||||
# comment stating so. This is to aid translators who may wish
|
||||
# to skip translating some unimportant docstrings.
|
||||
isdocstring = any(v.values())
|
||||
# k is the message string, v is a dictionary-set of (filename,
|
||||
# lineno) tuples. We want to sort the entries in v first by
|
||||
# file name and then by line number.
|
||||
v = sorted(v.keys())
|
||||
if not options.writelocations:
|
||||
pass
|
||||
|
||||
# Sort locations within each message by filename and lineno
|
||||
sorted_keys = [
|
||||
(key, sorted(msg.locations))
|
||||
for key, msg in self.__messages.items()
|
||||
]
|
||||
# Sort messages by locations
|
||||
# For example, a message with locations [('test.py', 1), ('test.py', 2)] will
|
||||
# appear before a message with locations [('test.py', 1), ('test.py', 3)]
|
||||
sorted_keys.sort(key=itemgetter(1))
|
||||
|
||||
for key, locations in sorted_keys:
|
||||
msg = self.__messages[key]
|
||||
if options.writelocations:
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
elif options.locationstyle == options.SOLARIS:
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
print(_(
|
||||
'# File: %(filename)s, line: %(lineno)d') % d, file=fp)
|
||||
if options.locationstyle == options.SOLARIS:
|
||||
for location in locations:
|
||||
print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
|
||||
elif options.locationstyle == options.GNU:
|
||||
# fit as many locations on one line, as long as the
|
||||
# resulting line length doesn't exceed 'options.width'
|
||||
locline = '#:'
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
s = _(' %(filename)s:%(lineno)d') % d
|
||||
for location in locations:
|
||||
s = f' {location.filename}:{location.lineno}'
|
||||
if len(locline) + len(s) <= options.width:
|
||||
locline = locline + s
|
||||
else:
|
||||
print(locline, file=fp)
|
||||
locline = "#:" + s
|
||||
locline = f'#:{s}'
|
||||
if len(locline) > 2:
|
||||
print(locline, file=fp)
|
||||
if isdocstring:
|
||||
print('#, docstring', file=fp)
|
||||
print('msgid', normalize(k, encoding), file=fp)
|
||||
if msg.is_docstring:
|
||||
# If the entry was gleaned out of a docstring, then add a
|
||||
# comment stating so. This is to aid translators who may wish
|
||||
# to skip translating some unimportant docstrings.
|
||||
print('#, docstring', file=fp)
|
||||
if msg.msgctxt is not None:
|
||||
print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
|
||||
print('msgid', normalize(msg.msgid, encoding), file=fp)
|
||||
if msg.msgid_plural is not None:
|
||||
print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
|
||||
print('msgstr[0] ""', file=fp)
|
||||
print('msgstr[1] ""\n', file=fp)
|
||||
else:
|
||||
print('msgstr ""\n', file=fp)
|
||||
|
||||
|
||||
def main():
|
||||
global default_keywords
|
||||
try:
|
||||
opts, args = getopt.getopt(
|
||||
sys.argv[1:],
|
||||
@ -557,7 +653,7 @@ def main():
|
||||
locations = {'gnu' : options.GNU,
|
||||
'solaris' : options.SOLARIS,
|
||||
}
|
||||
|
||||
no_default_keywords = False
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
@ -573,7 +669,7 @@ def main():
|
||||
elif opt in ('-k', '--keyword'):
|
||||
options.keywords.append(arg)
|
||||
elif opt in ('-K', '--no-default-keywords'):
|
||||
default_keywords = []
|
||||
no_default_keywords = True
|
||||
elif opt in ('-n', '--add-location'):
|
||||
options.writelocations = 1
|
||||
elif opt in ('--no-location',):
|
||||
@ -613,7 +709,9 @@ def main():
|
||||
make_escapes(not options.escape)
|
||||
|
||||
# calculate all keywords
|
||||
options.keywords.extend(default_keywords)
|
||||
options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
|
||||
if not no_default_keywords:
|
||||
options.keywords |= DEFAULTKEYWORDS
|
||||
|
||||
# initialize list of strings to exclude
|
||||
if options.excludefilename:
|
||||
|
Loading…
Reference in New Issue
Block a user