cpython/Lib/nturl2path.py
Barney Gale 54c63a32d0
GH-126212: Fix removal of slashes in file URIs on Windows (#126214)
Adjust `urllib.request.pathname2url()` and `url2pathname()` so that they
don't remove slashes from Windows DOS drive paths and URLs. There was no
basis for this behaviour, and it conflicts with how UNC and POSIX paths are
handled.
2024-11-08 16:47:51 +00:00

64 lines
2.2 KiB
Python

"""Convert a NT pathname to a file URL and vice versa.
This module only exists to provide OS-specific code
for urllib.requests, thus do not use directly.
"""
# Testing is done through test_urllib.
def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
# e.g.
# ///C|/foo/bar/spam.foo
# and
# ///C:/foo/bar/spam.foo
# become
# C:\foo\bar\spam.foo
import string, urllib.parse
# Windows itself uses ":" even in URLs.
url = url.replace(':', '|')
if not '|' in url:
# No drive specifier, just convert slashes
if url[:4] == '////':
# path is something like ////host/path/on/remote/host
# convert this to \\host\path\on\remote\host
# (notice halving of slashes at the start of the path)
url = url[2:]
# make sure not to convert quoted slashes :-)
return urllib.parse.unquote(url.replace('/', '\\'))
comp = url.split('|')
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
error = 'Bad URL: ' + url
raise OSError(error)
drive = comp[0][-1].upper()
tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
return drive + ':' + tail
def pathname2url(p):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
# e.g.
# C:\foo\bar\spam.foo
# becomes
# ///C:/foo/bar/spam.foo
import urllib.parse
# First, clean up some special forms. We are going to sacrifice
# the additional information anyway
if p[:4] == '\\\\?\\':
p = p[4:]
if p[:4].upper() == 'UNC\\':
p = '\\\\' + p[4:]
elif p[1:2] != ':':
raise OSError('Bad path: ' + p)
if not ':' in p:
# No drive specifier, just convert slashes and quote the name
return urllib.parse.quote(p.replace('\\', '/'))
comp = p.split(':', maxsplit=2)
if len(comp) != 2 or len(comp[0]) > 1:
error = 'Bad path: ' + p
raise OSError(error)
drive = urllib.parse.quote(comp[0].upper())
tail = urllib.parse.quote(comp[1].replace('\\', '/'))
return '///' + drive + ':' + tail