+80
-11
lines changedFilter options
+80
-11
lines changed Original file line number Diff line number Diff line change
@@ -60,7 +60,14 @@ def _generator():
60
60
class Truncator(SimpleLazyObject):
61
61
"""
62
62
An object used to truncate text, either by characters or words.
63
+
64
+
When truncating HTML text (either chars or words), input will be limited to
65
+
at most `MAX_LENGTH_HTML` characters.
63
66
"""
67
+
68
+
# 5 million characters are approximately 4000 text pages or 3 web pages.
69
+
MAX_LENGTH_HTML = 5_000_000
70
+
64
71
def __init__(self, text):
65
72
super().__init__(lambda: str(text))
66
73
@@ -157,6 +164,11 @@ def _truncate_html(self, length, truncate, text, truncate_len, words):
157
164
if words and length <= 0:
158
165
return ''
159
166
167
+
size_limited = False
168
+
if len(text) > self.MAX_LENGTH_HTML:
169
+
text = text[: self.MAX_LENGTH_HTML]
170
+
size_limited = True
171
+
160
172
html4_singlets = (
161
173
'br', 'col', 'link', 'base', 'img',
162
174
'param', 'area', 'hr', 'input'
@@ -206,10 +218,14 @@ def _truncate_html(self, length, truncate, text, truncate_len, words):
206
218
# Add it to the start of the open tags list
207
219
open_tags.insert(0, tagname)
208
220
221
+
truncate_text = self.add_truncation_text("", truncate)
222
+
209
223
if current_len <= length:
224
+
if size_limited and truncate_text:
225
+
text += truncate_text
210
226
return text
227
+
211
228
out = text[:end_text_pos]
212
-
truncate_text = self.add_truncation_text('', truncate)
213
229
if truncate_text:
214
230
out += truncate_text
215
231
# Close any tags still open
Original file line number Diff line number Diff line change
@@ -2348,6 +2348,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
2348
2348
2349
2349
Newlines in the HTML content will be preserved.
2350
2350
2351
+
.. admonition:: Size of input string
2352
+
2353
+
Processing large, potentially malformed HTML strings can be
2354
+
resource-intensive and impact service performance. ``truncatechars_html``
2355
+
limits input to the first five million characters.
2356
+
2357
+
.. versionchanged:: 3.2.22
2358
+
2359
+
In older versions, strings over five million characters were processed.
2360
+
2351
2361
.. templatefilter:: truncatewords
2352
2362
2353
2363
``truncatewords``
@@ -2386,6 +2396,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
2386
2396
2387
2397
Newlines in the HTML content will be preserved.
2388
2398
2399
+
.. admonition:: Size of input string
2400
+
2401
+
Processing large, potentially malformed HTML strings can be
2402
+
resource-intensive and impact service performance. ``truncatewords_html``
2403
+
limits input to the first five million characters.
2404
+
2405
+
.. versionchanged:: 3.2.22
2406
+
2407
+
In older versions, strings over five million characters were processed.
2408
+
2389
2409
.. templatefilter:: unordered_list
2390
2410
2391
2411
``unordered_list``
Original file line number Diff line number Diff line change
@@ -6,4 +6,20 @@ Django 3.2.22 release notes
6
6
7
7
Django 3.2.22 fixes a security issue with severity "moderate" in 3.2.21.
8
8
9
-
...
9
+
CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
10
+
================================================================================
11
+
12
+
Following the fix for :cve:`2019-14232`, the regular expressions used in the
13
+
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
14
+
methods (with ``html=True``) were revised and improved. However, these regular
15
+
expressions still exhibited linear backtracking complexity, so when given a
16
+
very long, potentially malformed HTML input, the evaluation would still be
17
+
slow, leading to a potential denial of service vulnerability.
18
+
19
+
The ``chars()`` and ``words()`` methods are used to implement the
20
+
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
21
+
filters, which were thus also vulnerable.
22
+
23
+
The input processed by ``Truncator``, when operating in HTML mode, has been
24
+
limited to the first five million characters in order to avoid potential
25
+
performance and memory issues.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
1
1
import json
2
2
import sys
3
+
from unittest.mock import patch
3
4
4
5
from django.core.exceptions import SuspiciousFileOperation
5
6
from django.test import SimpleTestCase, ignore_warnings
@@ -90,11 +91,17 @@ def test_truncate_chars(self):
90
91
# lazy strings are handled correctly
91
92
self.assertEqual(text.Truncator(lazystr('The quick brown fox')).chars(10), 'The quick…')
92
93
93
-
def test_truncate_chars_html(self):
94
+
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
95
+
def test_truncate_chars_html_size_limit(self):
96
+
max_len = text.Truncator.MAX_LENGTH_HTML
97
+
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
98
+
valid_html = "<p>Joel is a slug</p>" # 14 chars
94
99
perf_test_values = [
95
-
(('</a' + '\t' * 50000) + '//>', None),
96
-
('&' * 50000, '&' * 9 + '…'),
97
-
('_X<<<<<<<<<<<>', None),
100
+
("</a" + "\t" * (max_len - 6) + "//>", None),
101
+
("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * 6 + "…"),
102
+
("&" * bigger_len, "&" * 9 + "…"),
103
+
("_X<<<<<<<<<<<>", None),
104
+
(valid_html * bigger_len, "<p>Joel is a…</p>"), # 10 chars
98
105
]
99
106
for value, expected in perf_test_values:
100
107
with self.subTest(value=value):
@@ -152,15 +159,25 @@ def test_truncate_html_words(self):
152
159
truncator = text.Truncator('<p>I <3 python, what about you?</p>')
153
160
self.assertEqual('<p>I <3 python,…</p>', truncator.words(3, html=True))
154
161
162
+
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
163
+
def test_truncate_words_html_size_limit(self):
164
+
max_len = text.Truncator.MAX_LENGTH_HTML
165
+
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
166
+
valid_html = "<p>Joel is a slug</p>" # 4 words
155
167
perf_test_values = [
156
-
('</a' + '\t' * 50000) + '//>',
157
-
'&' * 50000,
158
-
'_X<<<<<<<<<<<>',
168
+
("</a" + "\t" * (max_len - 6) + "//>", None),
169
+
("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * (max_len - 3) + "…"),
170
+
("&" * max_len, None), # no change
171
+
("&" * bigger_len, "&" * max_len + "…"),
172
+
("_X<<<<<<<<<<<>", None),
173
+
(valid_html * bigger_len, valid_html * 12 + "<p>Joel is…</p>"), # 50 words
159
174
]
160
-
for value in perf_test_values:
175
+
for value, expected in perf_test_values:
161
176
with self.subTest(value=value):
162
177
truncator = text.Truncator(value)
163
-
self.assertEqual(value, truncator.words(50, html=True))
178
+
self.assertEqual(
179
+
expected if expected else value, truncator.words(50, html=True)
180
+
)
164
181
165
182
def test_wrap(self):
166
183
digits = '1234 67 9'
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4