From 60cffa265537edb86352dc386ff54dd56e83bd92 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Sun, 28 Jun 2026 14:20:11 +0200 Subject: [PATCH] fix: decode percent-encoded unreserved chars before resolving dot segments in normalize RFC 3986 Section 6.2.2.2 specifies that percent-encoded unreserved characters should be decoded before applying other normalizations, including dot-segment resolution. The previous order (resolve-then-decode) meant that %2e%2e was not recognized as ".." during path normalization, producing incorrect results like http://example.com/../foo instead of http://example.com/foo. --- src/hyperlink/_url.py | 6 +++--- src/hyperlink/test/test_url.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py index 8797b5cc..a43a14ae 100644 --- a/src/hyperlink/_url.py +++ b/src/hyperlink/_url.py @@ -1512,9 +1512,9 @@ def _dec_unres(target): if path: if self.path: - kw["path"] = [ - _dec_unres(p) for p in _resolve_dot_segments(self.path) - ] + kw["path"] = _resolve_dot_segments( + [_dec_unres(p) for p in self.path] + ) else: kw["path"] = (u"",) if query: diff --git a/src/hyperlink/test/test_url.py b/src/hyperlink/test/test_url.py index 37c91726..22cd5312 100644 --- a/src/hyperlink/test/test_url.py +++ b/src/hyperlink/test/test_url.py @@ -1461,6 +1461,27 @@ def test_normalize(self): == "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25" ) + def test_normalize_percent_encoded_dot_segments(self): + # type: () -> None + # RFC 3986 Section 6.2.2.2: percent-encoded unreserved characters + # should be decoded BEFORE resolving dot segments. + # %2e = '.' (unreserved), so %2e%2e should become '..' and + # be resolved to the parent directory. + url = URL.from_text("http://example.com/%2e%2e/foo") + norm = url.normalize() + assert norm.path == ("foo",) + assert norm.to_text() == "http://example.com/foo" + + url2 = URL.from_text("http://example.com/%2e/foo") + norm2 = url2.normalize() + assert norm2.path == ("foo",) + assert norm2.to_text() == "http://example.com/foo" + + url3 = URL.from_text("http://example.com/foo/%2e%2e/bar") + norm3 = url3.normalize() + assert norm3.path == ("bar",) + assert norm3.to_text() == "http://example.com/bar" + def test_str(self): # type: () -> None