From 1317b70f89606bd14597116b7ab68a968ea6c017 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 22 Apr 2022 10:39:24 +0900 Subject: [PATCH] gh-91156: Use `locale.getencoding()` instead of getpreferredencoding (GH-91732) Co-authored-by: Victor Stinner --- Doc/howto/curses.rst | 3 +-- Doc/library/csv.rst | 2 +- Doc/library/curses.rst | 18 ++---------------- Doc/library/functions.rst | 11 +++++------ Doc/library/os.rst | 6 +++--- Lib/test/libregrtest/main.py | 3 +-- Lib/test/pythoninfo.py | 2 +- Lib/test/support/__init__.py | 2 +- Lib/test/test__locale.py | 2 +- Lib/test/test_builtin.py | 2 +- Lib/test/test_cmd_line.py | 2 +- Lib/test/test_io.py | 2 +- Lib/test/test_locale.py | 10 +++++++++- Lib/test/test_mimetypes.py | 8 +------- 14 files changed, 29 insertions(+), 44 deletions(-) diff --git a/Doc/howto/curses.rst b/Doc/howto/curses.rst index c0149ffff37..26c4ece5ae6 100644 --- a/Doc/howto/curses.rst +++ b/Doc/howto/curses.rst @@ -299,8 +299,7 @@ The :meth:`~curses.window.addstr` method takes a Python string or bytestring as the value to be displayed. The contents of bytestrings are sent to the terminal as-is. Strings are encoded to bytes using the value of the window's :attr:`encoding` attribute; this defaults to -the default system encoding as returned by -:func:`locale.getpreferredencoding`. +the default system encoding as returned by :func:`locale.getencoding`. The :meth:`~curses.window.addch` methods take a character, which can be either a string of length 1, a bytestring of length 1, or an integer. diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 3a7817cfdfa..9dec7240d9c 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -542,7 +542,7 @@ The corresponding simplest possible writing example is:: Since :func:`open` is used to open a CSV file for reading, the file will by default be decoded into unicode using the system default -encoding (see :func:`locale.getpreferredencoding`). To decode a file +encoding (see :func:`locale.getencoding`). To decode a file using a different encoding, use the ``encoding`` argument of open:: import csv diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 37e822c0e2b..a7cc4952778 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -27,20 +27,6 @@ Linux and the BSD variants of Unix. Whenever the documentation mentions a *character string* it can be specified as a Unicode string or a byte string. -.. note:: - - Since version 5.4, the ncurses library decides how to interpret non-ASCII data - using the ``nl_langinfo`` function. That means that you have to call - :func:`locale.setlocale` in the application and encode Unicode strings - using one of the system's available encodings. This example uses the - system's default encoding:: - - import locale - locale.setlocale(locale.LC_ALL, '') - code = locale.getpreferredencoding() - - Then use *code* as the encoding for :meth:`str.encode` calls. - .. seealso:: Module :mod:`curses.ascii` @@ -923,8 +909,8 @@ the following methods and attributes: Encoding used to encode method arguments (Unicode strings and characters). The encoding attribute is inherited from the parent window when a subwindow - is created, for example with :meth:`window.subwin`. By default, the locale - encoding is used (see :func:`locale.getpreferredencoding`). + is created, for example with :meth:`window.subwin`. + By default, current locale encoding is used (see :func:`locale.getencoding`). .. versionadded:: 3.3 diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index e6fd0bb5eee..f3b8e40babb 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1123,8 +1123,8 @@ are always available. They are listed here in alphabetical order. (which on *some* Unix systems, means that *all* writes append to the end of the file regardless of the current seek position). In text mode, if *encoding* is not specified the encoding used is platform-dependent: - ``locale.getpreferredencoding(False)`` is called to get the current locale - encoding. (For reading and writing raw bytes use binary mode and leave + :func:`locale.getencoding()` is called to get the current locale encoding. + (For reading and writing raw bytes use binary mode and leave *encoding* unspecified.) The available modes are: .. _filemodes: @@ -1183,10 +1183,9 @@ are always available. They are listed here in alphabetical order. *encoding* is the name of the encoding used to decode or encode the file. This should only be used in text mode. The default encoding is platform - dependent (whatever :func:`locale.getpreferredencoding` returns), but any - :term:`text encoding` supported by Python - can be used. See the :mod:`codecs` module for - the list of supported encodings. + dependent (whatever :func:`locale.getencoding` returns), but any + :term:`text encoding` supported by Python can be used. + See the :mod:`codecs` module for the list of supported encodings. *errors* is an optional string that specifies how encoding and decoding errors are to be handled—this cannot be used in binary mode. diff --git a/Doc/library/os.rst b/Doc/library/os.rst index c22bf56a9f2..471890e74c8 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -105,15 +105,15 @@ of the UTF-8 encoding: * Use UTF-8 as the :term:`filesystem encoding `. -* :func:`sys.getfilesystemencoding()` returns ``'UTF-8'``. -* :func:`locale.getpreferredencoding()` returns ``'UTF-8'`` (the *do_setlocale* +* :func:`sys.getfilesystemencoding()` returns ``'utf-8'``. +* :func:`locale.getpreferredencoding()` returns ``'utf-8'`` (the *do_setlocale* argument has no effect). * :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` all use UTF-8 as their text encoding, with the ``surrogateescape`` :ref:`error handler ` being enabled for :data:`sys.stdin` and :data:`sys.stdout` (:data:`sys.stderr` continues to use ``backslashreplace`` as it does in the default locale-aware mode) -* On Unix, :func:`os.device_encoding` returns ``'UTF-8'`` rather than the +* On Unix, :func:`os.device_encoding` returns ``'utf-8'`` rather than the device encoding. Note that the standard stream settings in UTF-8 mode can be overridden by diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index e7e3dde0b0a..0cacccfc0b5 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -482,8 +482,7 @@ class Regrtest: if cpu_count: print("== CPU count:", cpu_count) print("== encodings: locale=%s, FS=%s" - % (locale.getpreferredencoding(False), - sys.getfilesystemencoding())) + % (locale.getencoding(), sys.getfilesystemencoding())) def get_tests_result(self): result = [] diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index b00830c279e..39301e6397a 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -155,7 +155,7 @@ def collect_platform(info_add): def collect_locale(info_add): import locale - info_add('locale.encoding', locale.getpreferredencoding(False)) + info_add('locale.getencoding', locale.getencoding()) def collect_builtins(info_add): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index c5666d66f47..3b2f33979db 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1445,7 +1445,7 @@ def skip_if_buggy_ucrt_strfptime(test): global _buggy_ucrt if _buggy_ucrt is None: if(sys.platform == 'win32' and - locale.getpreferredencoding(False) == 'cp65001' and + locale.getencoding() == 'cp65001' and time.localtime().tm_zone == ''): _buggy_ucrt = True else: diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index e25c92c2c82..b3bc54cd551 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -43,7 +43,7 @@ def setUpModule(): locale.setlocale(locale.LC_ALL, loc) except Error: continue - encoding = locale.getpreferredencoding(False) + encoding = locale.getencoding() try: localeconv() except Exception as err: diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index a601a524d6e..29039230201 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1204,7 +1204,7 @@ class BuiltinTest(unittest.TestCase): del os.environ[key] self.write_testfile() - current_locale_encoding = locale.getpreferredencoding(False) + current_locale_encoding = locale.getencoding() with warnings.catch_warnings(): warnings.simplefilter("ignore", EncodingWarning) fp = open(TESTFN, 'w') diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 84eab71f977..e8f1964c2a4 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -216,7 +216,7 @@ class CmdLineTest(unittest.TestCase): code = ( b'import locale; ' b'print(ascii("' + undecodable + b'"), ' - b'locale.getpreferredencoding())') + b'locale.getencoding())') p = subprocess.Popen( [sys.executable, "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 45bf81b61f4..5528c461e58 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2726,7 +2726,7 @@ class TextIOWrapperTest(unittest.TestCase): if key in os.environ: del os.environ[key] - current_locale_encoding = locale.getpreferredencoding(False) + current_locale_encoding = locale.getencoding() b = self.BytesIO() with warnings.catch_warnings(): warnings.simplefilter("ignore", EncodingWarning) diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 774b0fcd333..5cb6edc52d7 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -363,7 +363,7 @@ class TestEnUSCollation(BaseLocalizedTest, TestCollation): locale_type = locale.LC_ALL def setUp(self): - enc = codecs.lookup(locale.getpreferredencoding(False) or 'ascii').name + enc = codecs.lookup(locale.getencoding() or 'ascii').name if enc not in ('utf-8', 'iso8859-1', 'cp1252'): raise unittest.SkipTest('encoding not suitable') if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or @@ -533,6 +533,14 @@ class TestMiscellaneous(unittest.TestCase): if orig_getlocale is not None: _locale._getdefaultlocale = orig_getlocale + def test_getencoding(self): + # Invoke getencoding to make sure it does not cause exceptions. + enc = locale.getencoding() + self.assertIsInstance(enc, str) + self.assertNotEqual(enc, "") + # make sure it is valid + codecs.lookup(enc) + def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 3477b18376a..f2b103693a9 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -1,5 +1,4 @@ import io -import locale import mimetypes import pathlib import sys @@ -33,7 +32,7 @@ def tearDownModule(): class MimeTypesTestCase(unittest.TestCase): def setUp(self): self.db = mimetypes.MimeTypes() - + def test_case_sensitivity(self): eq = self.assertEqual eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html")) @@ -145,11 +144,6 @@ class MimeTypesTestCase(unittest.TestCase): self.assertNotIn('.no-such-ext', all) def test_encoding(self): - getpreferredencoding = locale.getpreferredencoding - self.addCleanup(setattr, locale, 'getpreferredencoding', - getpreferredencoding) - locale.getpreferredencoding = lambda: 'ascii' - filename = support.findfile("mime.types") mimes = mimetypes.MimeTypes([filename]) exts = mimes.guess_all_extensions('application/vnd.geocube+xml',