From cae9d9d20f61cdbde0765efa340b6b596c31b67f Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 14 Nov 2024 20:22:14 +0000 Subject: [PATCH] GH-126766: `url2pathname()`: handle empty authority section. (#126767) Discard two leading slashes from the beginning of a `file:` URI if they introduce an empty authority section. As a result, file URIs like `///etc/hosts` are correctly parsed as `/etc/hosts`. --- Lib/nturl2path.py | 7 +++---- Lib/test/test_urllib.py | 10 +++++----- Lib/urllib/request.py | 4 ++++ .../2024-11-12-21-43-12.gh-issue-126766.oi2KJ7.rst | 2 ++ 4 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-12-21-43-12.gh-issue-126766.oi2KJ7.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 9ecabff21c3..255eb2f547c 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -19,10 +19,9 @@ def url2pathname(url): url = url.replace(':', '|') if not '|' in url: # No drive specifier, just convert slashes - if url[:4] == '////': - # path is something like ////host/path/on/remote/host - # convert this to \\host\path\on\remote\host - # (notice halving of slashes at the start of the path) + if url[:3] == '///': + # URL has an empty authority section, so the path begins on the + # third character. url = url[2:] # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 66e948fc3a0..2c53ce3f99e 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1549,7 +1549,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir') # Round-tripping urls = ['///C:', - '///folder/test/', + '/folder/test/', '///C:/foo/bar/spam.foo'] for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) @@ -1573,7 +1573,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('/C|//'), 'C:\\\\') self.assertEqual(fn('///C|/path'), 'C:\\path') # No DOS drive - self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\') + self.assertEqual(fn("///C/test/"), '\\C\\test\\') self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') # DOS drive paths self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') @@ -1597,7 +1597,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') # Round-tripping paths = ['C:', - r'\\\C\test\\', + r'\C\test\\', r'C:\foo\bar\spam.foo'] for path in paths: self.assertEqual(fn(urllib.request.pathname2url(path)), path) @@ -1608,8 +1608,8 @@ class Pathname_Tests(unittest.TestCase): fn = urllib.request.url2pathname self.assertEqual(fn('/foo/bar'), '/foo/bar') self.assertEqual(fn('//foo/bar'), '//foo/bar') - self.assertEqual(fn('///foo/bar'), '///foo/bar') - self.assertEqual(fn('////foo/bar'), '////foo/bar') + self.assertEqual(fn('///foo/bar'), '/foo/bar') + self.assertEqual(fn('////foo/bar'), '//foo/bar') self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') class Utility_Tests(unittest.TestCase): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bc35d8a80e5..18a837dd57e 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1656,6 +1656,10 @@ else: def url2pathname(pathname): """OS-specific conversion from a relative URL of the 'file' scheme to a file system path; not recommended for general use.""" + if pathname[:3] == '///': + # URL has an empty authority section, so the path begins on the + # third character. + pathname = pathname[2:] return unquote(pathname) def pathname2url(pathname): diff --git a/Misc/NEWS.d/next/Library/2024-11-12-21-43-12.gh-issue-126766.oi2KJ7.rst b/Misc/NEWS.d/next/Library/2024-11-12-21-43-12.gh-issue-126766.oi2KJ7.rst new file mode 100644 index 00000000000..e3936305164 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-12-21-43-12.gh-issue-126766.oi2KJ7.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard two +leading slashes introducing an empty authority section.