bpo-27827: identify a greater range of reserved filename on Windows. (GH-26698)

`pathlib.PureWindowsPath.is_reserved()` now identifies as reserved
filenames with trailing spaces or colons.

Co-authored-by: Barney Gale <barney.gale@foundry.com>
Co-authored-by: Eryk Sun <eryksun@gmail.com>
This commit is contained in:
Barney Gale 2021-07-28 15:28:14 +01:00 committed by GitHub
parent 531e2fbc52
commit 56c1f6d7ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 22 deletions

View File

@ -124,16 +124,25 @@ class _WindowsFlavour(_Flavour):
ext_namespace_prefix = '\\\\?\\' ext_namespace_prefix = '\\\\?\\'
reserved_names = ( reserved_names = (
{'CON', 'PRN', 'AUX', 'NUL'} | {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{'COM%d' % i for i in range(1, 10)} | {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} |
{'LPT%d' % i for i in range(1, 10)} {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'}
) )
# Interesting findings about extended paths: # Interesting findings about extended paths:
# - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported # * '\\?\c:\a' is an extended path, which bypasses normal Windows API
# but '\\?\c:/a' is not # path processing. Thus relative paths are not resolved and slash is not
# - extended paths are always absolute; "relative" extended paths will # translated to backslash. It has the native NT path limit of 32767
# fail. # characters, but a bit less after resolving device symbolic links,
# such as '\??\C:' => '\Device\HarddiskVolume2'.
# * '\\?\c:/a' looks for a device named 'C:/a' because slash is a
# regular name character in the object namespace.
# * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems.
# The only path separator at the filesystem level is backslash.
# * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and
# thus limited to MAX_PATH.
# * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH,
# even with the '\\?\' prefix.
def splitroot(self, part, sep=sep): def splitroot(self, part, sep=sep):
first = part[0:1] first = part[0:1]
@ -195,15 +204,16 @@ class _WindowsFlavour(_Flavour):
def is_reserved(self, parts): def is_reserved(self, parts):
# NOTE: the rules for reserved names seem somewhat complicated # NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL"). # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# We err on the side of caution and return True for paths which are # exist). We err on the side of caution and return True for paths
# not considered reserved by Windows. # which are not considered reserved by Windows.
if not parts: if not parts:
return False return False
if parts[0].startswith('\\\\'): if parts[0].startswith('\\\\'):
# UNC paths are never reserved # UNC paths are never reserved
return False return False
return parts[-1].partition('.')[0].upper() in self.reserved_names name = parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in self.reserved_names
def make_uri(self, path): def make_uri(self, path):
# Under Windows, file URIs use the UTF-8 encoding. # Under Windows, file URIs use the UTF-8 encoding.

View File

@ -1282,19 +1282,35 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):
self.assertIs(False, P('').is_reserved()) self.assertIs(False, P('').is_reserved())
self.assertIs(False, P('/').is_reserved()) self.assertIs(False, P('/').is_reserved())
self.assertIs(False, P('/foo/bar').is_reserved()) self.assertIs(False, P('/foo/bar').is_reserved())
self.assertIs(True, P('con').is_reserved())
self.assertIs(True, P('NUL').is_reserved())
self.assertIs(True, P('NUL.txt').is_reserved())
self.assertIs(True, P('com1').is_reserved())
self.assertIs(True, P('com9.bar').is_reserved())
self.assertIs(False, P('bar.com9').is_reserved())
self.assertIs(True, P('lpt1').is_reserved())
self.assertIs(True, P('lpt9.bar').is_reserved())
self.assertIs(False, P('bar.lpt9').is_reserved())
# Only the last component matters.
self.assertIs(False, P('c:/NUL/con/baz').is_reserved())
# UNC paths are never reserved. # UNC paths are never reserved.
self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) self.assertIs(False, P('//my/share/nul/con/aux').is_reserved())
# Case-insenstive DOS-device names are reserved.
self.assertIs(True, P('nul').is_reserved())
self.assertIs(True, P('aux').is_reserved())
self.assertIs(True, P('prn').is_reserved())
self.assertIs(True, P('con').is_reserved())
self.assertIs(True, P('conin$').is_reserved())
self.assertIs(True, P('conout$').is_reserved())
# COM/LPT + 1-9 or + superscript 1-3 are reserved.
self.assertIs(True, P('COM1').is_reserved())
self.assertIs(True, P('LPT9').is_reserved())
self.assertIs(True, P('com\xb9').is_reserved())
self.assertIs(True, P('com\xb2').is_reserved())
self.assertIs(True, P('lpt\xb3').is_reserved())
# DOS-device name mataching ignores characters after a dot or
# a colon and also ignores trailing spaces.
self.assertIs(True, P('NUL.txt').is_reserved())
self.assertIs(True, P('PRN ').is_reserved())
self.assertIs(True, P('AUX .txt').is_reserved())
self.assertIs(True, P('COM1:bar').is_reserved())
self.assertIs(True, P('LPT9 :bar').is_reserved())
# DOS-device names are only matched at the beginning
# of a path component.
self.assertIs(False, P('bar.com9').is_reserved())
self.assertIs(False, P('bar.lpt9').is_reserved())
# Only the last path component matters.
self.assertIs(True, P('c:/baz/con/NUL').is_reserved())
self.assertIs(False, P('c:/NUL/con/baz').is_reserved())
class PurePathTest(_BasePurePathTest, unittest.TestCase): class PurePathTest(_BasePurePathTest, unittest.TestCase):
cls = pathlib.PurePath cls = pathlib.PurePath

View File

@ -0,0 +1,2 @@
:meth:`pathlib.PureWindowsPath.is_reserved` now identifies a greater range of
reserved filenames, including those with trailing spaces or colons.