9
9
10
10
from django.utils.deprecation import RemovedInDjango60Warning
11
11
from django.utils.encoding import punycode
12
-
from django.utils.functional import Promise, keep_lazy, keep_lazy_text
12
+
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
13
13
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
14
14
from django.utils.regex_helper import _lazy_re_compile
15
15
from django.utils.safestring import SafeData, SafeString, mark_safe
@@ -255,6 +255,16 @@ def unquote_quote(segment):
255
255
return urlunsplit((scheme, netloc, path, query, fragment))
256
256
257
257
258
+
class CountsDict(dict):
259
+
def __init__(self, *args, word, **kwargs):
260
+
super().__init__(*args, *kwargs)
261
+
self.word = word
262
+
263
+
def __missing__(self, key):
264
+
self[key] = self.word.count(key)
265
+
return self[key]
266
+
267
+
258
268
class Urlizer:
259
269
"""
260
270
Convert any URLs in text into clickable links.
@@ -360,40 +370,72 @@ def trim_url(self, x, *, limit):
360
370
return x
361
371
return "%s…" % x[: max(0, limit - 1)]
362
372
373
+
@cached_property
374
+
def wrapping_punctuation_openings(self):
375
+
return "".join(dict(self.wrapping_punctuation).keys())
376
+
377
+
@cached_property
378
+
def trailing_punctuation_chars_no_semicolon(self):
379
+
return self.trailing_punctuation_chars.replace(";", "")
380
+
381
+
@cached_property
382
+
def trailing_punctuation_chars_has_semicolon(self):
383
+
return ";" in self.trailing_punctuation_chars
384
+
363
385
def trim_punctuation(self, word):
364
386
"""
365
387
Trim trailing and wrapping punctuation from `word`. Return the items of
366
388
the new state.
367
389
"""
368
-
lead, middle, trail = "", word, ""
390
+
# Strip all opening wrapping punctuation.
391
+
middle = word.lstrip(self.wrapping_punctuation_openings)
392
+
lead = word[: len(word) - len(middle)]
393
+
trail = ""
394
+
369
395
# Continue trimming until middle remains unchanged.
370
396
trimmed_something = True
371
-
while trimmed_something:
397
+
counts = CountsDict(word=middle)
398
+
while trimmed_something and middle:
372
399
trimmed_something = False
373
400
# Trim wrapping punctuation.
374
401
for opening, closing in self.wrapping_punctuation:
375
-
if middle.startswith(opening):
376
-
middle = middle.removeprefix(opening)
377
-
lead += opening
378
-
trimmed_something = True
379
-
# Keep parentheses at the end only if they're balanced.
380
-
if (
381
-
middle.endswith(closing)
382
-
and middle.count(closing) == middle.count(opening) + 1
383
-
):
384
-
middle = middle.removesuffix(closing)
385
-
trail = closing + trail
386
-
trimmed_something = True
387
-
# Trim trailing punctuation (after trimming wrapping punctuation,
388
-
# as encoded entities contain ';'). Unescape entities to avoid
389
-
# breaking them by removing ';'.
390
-
middle_unescaped = html.unescape(middle)
391
-
stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
392
-
if middle_unescaped != stripped:
393
-
punctuation_count = len(middle_unescaped) - len(stripped)
394
-
trail = middle[-punctuation_count:] + trail
395
-
middle = middle[:-punctuation_count]
402
+
if counts[opening] < counts[closing]:
403
+
rstripped = middle.rstrip(closing)
404
+
if rstripped != middle:
405
+
strip = counts[closing] - counts[opening]
406
+
trail = middle[-strip:]
407
+
middle = middle[:-strip]
408
+
trimmed_something = True
409
+
counts[closing] -= strip
410
+
411
+
rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
412
+
if rstripped != middle:
413
+
trail = middle[len(rstripped) :] + trail
414
+
middle = rstripped
396
415
trimmed_something = True
416
+
417
+
if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"):
418
+
# Only strip if not part of an HTML entity.
419
+
amp = middle.rfind("&")
420
+
if amp == -1:
421
+
can_strip = True
422
+
else:
423
+
potential_entity = middle[amp:]
424
+
escaped = html.unescape(potential_entity)
425
+
can_strip = (escaped == potential_entity) or escaped.endswith(";")
426
+
427
+
if can_strip:
428
+
rstripped = middle.rstrip(";")
429
+
amount_stripped = len(middle) - len(rstripped)
430
+
if amp > -1 and amount_stripped > 1:
431
+
# Leave a trailing semicolon as might be an entity.
432
+
trail = middle[len(rstripped) + 1 :] + trail
433
+
middle = rstripped + ";"
434
+
else:
435
+
trail = middle[len(rstripped) :] + trail
436
+
middle = rstripped
437
+
trimmed_something = True
438
+
397
439
return lead, middle, trail
398
440
399
441
@staticmethod
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4