mirror of
https://github.com/python/cpython.git
synced 2024-11-23 18:04:37 +08:00
Create os.fsdecode(): decode from the filesystem encoding with surrogateescape
error handler, or strict error handler on Windows. * Rewrite os.fsencode() documentation * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING environment variable
This commit is contained in:
parent
dbe6042f0a
commit
e8d5145e18
@ -155,13 +155,26 @@ process and user.
|
||||
These functions are described in :ref:`os-file-dir`.
|
||||
|
||||
|
||||
.. function:: fsencode(value)
|
||||
.. function:: fsencode(filename)
|
||||
|
||||
Encode *value* to bytes for use in the file system, environment variables or
|
||||
the command line. Use :func:`sys.getfilesystemencoding` and
|
||||
``'surrogateescape'`` error handler for strings and return bytes unchanged.
|
||||
On Windows, use ``'strict'`` error handler for strings if the file system
|
||||
encoding is ``'mbcs'`` (which is the default encoding).
|
||||
Encode *filename* to the filesystem encoding with ``'surrogateescape'``
|
||||
error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
|
||||
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
|
||||
encoding).
|
||||
|
||||
:func:`fsdencode` is the reverse function.
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
|
||||
.. function:: fsdecode(filename)
|
||||
|
||||
Decode *filename* from the filesystem encoding with ``'surrogateescape'``
|
||||
error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
|
||||
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
|
||||
encoding).
|
||||
|
||||
:func:`fsencode` is the reverse function.
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
|
@ -237,13 +237,16 @@ Major performance enhancements have been added:
|
||||
* Stub
|
||||
|
||||
|
||||
Unicode
|
||||
=======
|
||||
Filenames and unicode
|
||||
=====================
|
||||
|
||||
The filesystem encoding can be specified by setting the
|
||||
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
|
||||
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
|
||||
|
||||
The :mod:`os` module has two new functions: :func:`os.fsencode` and
|
||||
:func:`os.fsdecode`.
|
||||
|
||||
|
||||
IDLE
|
||||
====
|
||||
|
41
Lib/os.py
41
Lib/os.py
@ -402,8 +402,7 @@ def get_exec_path(env=None):
|
||||
path_list = path_listb
|
||||
|
||||
if path_list is not None and isinstance(path_list, bytes):
|
||||
path_list = path_list.decode(sys.getfilesystemencoding(),
|
||||
'surrogateescape')
|
||||
path_list = fsdecode(path_list)
|
||||
|
||||
if path_list is None:
|
||||
path_list = defpath
|
||||
@ -536,19 +535,39 @@ if supports_bytes_environ:
|
||||
|
||||
__all__.extend(("environb", "getenvb"))
|
||||
|
||||
def fsencode(value):
|
||||
"""Encode value for use in the file system, environment variables
|
||||
or the command line."""
|
||||
if isinstance(value, bytes):
|
||||
return value
|
||||
elif isinstance(value, str):
|
||||
def fsencode(filename):
|
||||
"""
|
||||
Encode filename to the filesystem encoding with 'surrogateescape' error
|
||||
handler, return bytes unchanged. On Windows, use 'strict' error handler if
|
||||
the file system encoding is 'mbcs' (which is the default encoding).
|
||||
"""
|
||||
if isinstance(filename, bytes):
|
||||
return filename
|
||||
elif isinstance(filename, str):
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if encoding == 'mbcs':
|
||||
return value.encode(encoding)
|
||||
return filename.encode(encoding)
|
||||
else:
|
||||
return value.encode(encoding, 'surrogateescape')
|
||||
return filename.encode(encoding, 'surrogateescape')
|
||||
else:
|
||||
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
|
||||
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
|
||||
|
||||
def fsdecode(filename):
|
||||
"""
|
||||
Decode filename from the filesystem encoding with 'surrogateescape' error
|
||||
handler, return str unchanged. On Windows, use 'strict' error handler if
|
||||
the file system encoding is 'mbcs' (which is the default encoding).
|
||||
"""
|
||||
if isinstance(filename, str):
|
||||
return filename
|
||||
elif isinstance(filename, bytes):
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if encoding == 'mbcs':
|
||||
return filename.decode(encoding)
|
||||
else:
|
||||
return filename.decode(encoding, 'surrogateescape')
|
||||
else:
|
||||
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
|
||||
|
||||
def _exists(name):
|
||||
return name in globals()
|
||||
|
@ -897,14 +897,6 @@ if sys.platform != 'win32':
|
||||
|
||||
class Pep383Tests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
def fsdecode(filename):
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if encoding == 'mbcs':
|
||||
errors = 'strict'
|
||||
else:
|
||||
errors = 'surrogateescape'
|
||||
return filename.decode(encoding, errors)
|
||||
|
||||
if support.TESTFN_UNENCODABLE:
|
||||
self.dir = support.TESTFN_UNENCODABLE
|
||||
else:
|
||||
@ -930,7 +922,7 @@ if sys.platform != 'win32':
|
||||
for fn in bytesfn:
|
||||
f = open(os.path.join(self.bdir, fn), "w")
|
||||
f.close()
|
||||
fn = fsdecode(fn)
|
||||
fn = os.fsdecode(fn)
|
||||
if fn in self.unicodefn:
|
||||
raise ValueError("duplicate filename")
|
||||
self.unicodefn.add(fn)
|
||||
@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase):
|
||||
self.assertNotEqual(os.lstat(link), os.stat(link))
|
||||
|
||||
|
||||
class MiscTests(unittest.TestCase):
|
||||
class FSEncodingTests(unittest.TestCase):
|
||||
def test_nop(self):
|
||||
self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
|
||||
self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
|
||||
|
||||
@unittest.skipIf(os.name == "nt", "POSIX specific test")
|
||||
def test_fsencode(self):
|
||||
self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
|
||||
self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
|
||||
def test_identity(self):
|
||||
# assert fsdecode(fsencode(x)) == x
|
||||
for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
|
||||
try:
|
||||
bytesfn = os.fsencode(fn)
|
||||
except UnicodeEncodeError:
|
||||
continue
|
||||
self.assertEquals(os.fsdecode(bytesfn), fn)
|
||||
|
||||
def get_output(self, fs_encoding, func):
|
||||
env = os.environ.copy()
|
||||
env['PYTHONIOENCODING'] = 'utf-8'
|
||||
env['PYTHONFSENCODING'] = fs_encoding
|
||||
code = 'import os; print(%s, end="")' % func
|
||||
process = subprocess.Popen(
|
||||
[sys.executable, "-c", code],
|
||||
stdout=subprocess.PIPE, env=env)
|
||||
stdout, stderr = process.communicate()
|
||||
self.assertEqual(process.returncode, 0)
|
||||
return stdout.decode('utf-8')
|
||||
|
||||
def test_encodings(self):
|
||||
def check(encoding, bytesfn, unicodefn):
|
||||
encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
|
||||
self.assertEqual(encoded, repr(bytesfn))
|
||||
|
||||
decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
|
||||
self.assertEqual(decoded, repr(unicodefn))
|
||||
|
||||
check('ascii', b'abc\xff', 'abc\udcff')
|
||||
check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
|
||||
check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
|
||||
|
||||
|
||||
def test_main():
|
||||
@ -1163,7 +1186,7 @@ def test_main():
|
||||
Pep383Tests,
|
||||
Win32KillTests,
|
||||
Win32SymlinkTests,
|
||||
MiscTests,
|
||||
FSEncodingTests,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -116,6 +116,9 @@ Extensions
|
||||
Library
|
||||
-------
|
||||
|
||||
- Create os.fsdecode(): decode from the filesystem encoding with
|
||||
surrogateescape error handler, or strict error handler on Windows.
|
||||
|
||||
- Issue #3488: Provide convenient shorthand functions ``gzip.compress``
|
||||
and ``gzip.decompress``. Original patch by Anand B. Pillai.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user