A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/python/cpython/commit/4655d576141ee56a69d2052431c636858fcb916a below:

Fixes handling of pre-normalization characters in urlsplit… · python/cpython@4655d57 · GitHub

File tree Expand file treeCollapse file tree 3 files changed

+14

-4

lines changed

Filter options

Expand file treeCollapse file tree 3 files changed

+14

-4

lines changed Original file line number Diff line number Diff line change

@@ -987,6 +987,12 @@ def test_urlsplit_normalization(self):

987 987

self.assertIn('\u2100', denorm_chars)

988 988

self.assertIn('\uFF03', denorm_chars)

989 989 990 +

# bpo-36742: Verify port separators are ignored when they

991 +

# existed prior to decomposition

992 +

urllib.parse.urlsplit('http://\u30d5\u309a:80')

993 +

with self.assertRaises(ValueError):

994 +

urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')

995 + 990 996

for scheme in ["http", "https", "ftp"]:

991 997

for c in denorm_chars:

992 998

url = "{}://netloc{}false.netloc/path".format(scheme, c)

Original file line number Diff line number Diff line change

@@ -333,13 +333,16 @@ def _checknetloc(netloc):

333 333

# looking for characters like \u2100 that expand to 'a/c'

334 334

# IDNA uses NFKC equivalence, so normalize for this check

335 335

import unicodedata

336 -

netloc2 = unicodedata.normalize('NFKC', netloc)

337 -

if netloc == netloc2:

336 +

n = netloc.rpartition('@')[2] # ignore anything to the left of '@'

337 +

n = n.replace(':', '') # ignore characters already included

338 +

n = n.replace('#', '') # but not the surrounding text

339 +

n = n.replace('?', '')

340 +

netloc2 = unicodedata.normalize('NFKC', n)

341 +

if n == netloc2:

338 342

return

339 -

_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay

340 343

for c in '/?#@:':

341 344

if c in netloc2:

342 -

raise ValueError("netloc '" + netloc2 + "' contains invalid " +

345 +

raise ValueError("netloc '" + netloc + "' contains invalid " +

343 346

"characters under NFKC normalization")

344 347 345 348

def urlsplit(url, scheme='', allow_fragments=True):

Original file line number Diff line number Diff line change

@@ -0,0 +1 @@

1 +

Fixes mishandling of pre-normalization characters in urlsplit().

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4