+95
-11
lines changedFilter options
+95
-11
lines changed Original file line number Diff line number Diff line change
@@ -65,8 +65,14 @@ def _generator():
65
65
class Truncator(SimpleLazyObject):
66
66
"""
67
67
An object used to truncate text, either by characters or words.
68
+
69
+
When truncating HTML text (either chars or words), input will be limited to
70
+
at most `MAX_LENGTH_HTML` characters.
68
71
"""
69
72
73
+
# 5 million characters are approximately 4000 text pages or 3 web pages.
74
+
MAX_LENGTH_HTML = 5_000_000
75
+
70
76
def __init__(self, text):
71
77
super().__init__(lambda: str(text))
72
78
@@ -162,6 +168,11 @@ def _truncate_html(self, length, truncate, text, truncate_len, words):
162
168
if words and length <= 0:
163
169
return ""
164
170
171
+
size_limited = False
172
+
if len(text) > self.MAX_LENGTH_HTML:
173
+
text = text[: self.MAX_LENGTH_HTML]
174
+
size_limited = True
175
+
165
176
html4_singlets = (
166
177
"br",
167
178
"col",
@@ -218,10 +229,14 @@ def _truncate_html(self, length, truncate, text, truncate_len, words):
218
229
# Add it to the start of the open tags list
219
230
open_tags.insert(0, tagname)
220
231
232
+
truncate_text = self.add_truncation_text("", truncate)
233
+
221
234
if current_len <= length:
235
+
if size_limited and truncate_text:
236
+
text += truncate_text
222
237
return text
238
+
223
239
out = text[:end_text_pos]
224
-
truncate_text = self.add_truncation_text("", truncate)
225
240
if truncate_text:
226
241
out += truncate_text
227
242
# Close any tags still open
Original file line number Diff line number Diff line change
@@ -2353,6 +2353,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
2353
2353
2354
2354
Newlines in the HTML content will be preserved.
2355
2355
2356
+
.. admonition:: Size of input string
2357
+
2358
+
Processing large, potentially malformed HTML strings can be
2359
+
resource-intensive and impact service performance. ``truncatechars_html``
2360
+
limits input to the first five million characters.
2361
+
2362
+
.. versionchanged:: 3.2.22
2363
+
2364
+
In older versions, strings over five million characters were processed.
2365
+
2356
2366
.. templatefilter:: truncatewords
2357
2367
2358
2368
``truncatewords``
@@ -2391,6 +2401,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
2391
2401
2392
2402
Newlines in the HTML content will be preserved.
2393
2403
2404
+
.. admonition:: Size of input string
2405
+
2406
+
Processing large, potentially malformed HTML strings can be
2407
+
resource-intensive and impact service performance. ``truncatewords_html``
2408
+
limits input to the first five million characters.
2409
+
2410
+
.. versionchanged:: 3.2.22
2411
+
2412
+
In older versions, strings over five million characters were processed.
2413
+
2394
2414
.. templatefilter:: unordered_list
2395
2415
2396
2416
``unordered_list``
Original file line number Diff line number Diff line change
@@ -6,4 +6,20 @@ Django 3.2.22 release notes
6
6
7
7
Django 3.2.22 fixes a security issue with severity "moderate" in 3.2.21.
8
8
9
-
...
9
+
CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
10
+
================================================================================
11
+
12
+
Following the fix for :cve:`2019-14232`, the regular expressions used in the
13
+
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
14
+
methods (with ``html=True``) were revised and improved. However, these regular
15
+
expressions still exhibited linear backtracking complexity, so when given a
16
+
very long, potentially malformed HTML input, the evaluation would still be
17
+
slow, leading to a potential denial of service vulnerability.
18
+
19
+
The ``chars()`` and ``words()`` methods are used to implement the
20
+
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
21
+
filters, which were thus also vulnerable.
22
+
23
+
The input processed by ``Truncator``, when operating in HTML mode, has been
24
+
limited to the first five million characters in order to avoid potential
25
+
performance and memory issues.
Original file line number Diff line number Diff line change
@@ -6,4 +6,20 @@ Django 4.1.12 release notes
6
6
7
7
Django 4.1.12 fixes a security issue with severity "moderate" in 4.1.11.
8
8
9
-
...
9
+
CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
10
+
================================================================================
11
+
12
+
Following the fix for :cve:`2019-14232`, the regular expressions used in the
13
+
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
14
+
methods (with ``html=True``) were revised and improved. However, these regular
15
+
expressions still exhibited linear backtracking complexity, so when given a
16
+
very long, potentially malformed HTML input, the evaluation would still be
17
+
slow, leading to a potential denial of service vulnerability.
18
+
19
+
The ``chars()`` and ``words()`` methods are used to implement the
20
+
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
21
+
filters, which were thus also vulnerable.
22
+
23
+
The input processed by ``Truncator``, when operating in HTML mode, has been
24
+
limited to the first five million characters in order to avoid potential
25
+
performance and memory issues.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
1
1
import json
2
2
import sys
3
+
from unittest.mock import patch
3
4
4
5
from django.core.exceptions import SuspiciousFileOperation
5
6
from django.test import SimpleTestCase
@@ -94,11 +95,17 @@ def test_truncate_chars(self):
94
95
text.Truncator(lazystr("The quick brown fox")).chars(10), "The quick…"
95
96
)
96
97
97
-
def test_truncate_chars_html(self):
98
+
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
99
+
def test_truncate_chars_html_size_limit(self):
100
+
max_len = text.Truncator.MAX_LENGTH_HTML
101
+
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
102
+
valid_html = "<p>Joel is a slug</p>" # 14 chars
98
103
perf_test_values = [
99
-
(("</a" + "\t" * 50000) + "//>", None),
100
-
("&" * 50000, "&" * 9 + "…"),
104
+
("</a" + "\t" * (max_len - 6) + "//>", None),
105
+
("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * 6 + "…"),
106
+
("&" * bigger_len, "&" * 9 + "…"),
101
107
("_X<<<<<<<<<<<>", None),
108
+
(valid_html * bigger_len, "<p>Joel is a…</p>"), # 10 chars
102
109
]
103
110
for value, expected in perf_test_values:
104
111
with self.subTest(value=value):
@@ -176,15 +183,25 @@ def test_truncate_html_words(self):
176
183
truncator = text.Truncator("<p>I <3 python, what about you?</p>")
177
184
self.assertEqual("<p>I <3 python,…</p>", truncator.words(3, html=True))
178
185
186
+
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
187
+
def test_truncate_words_html_size_limit(self):
188
+
max_len = text.Truncator.MAX_LENGTH_HTML
189
+
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
190
+
valid_html = "<p>Joel is a slug</p>" # 4 words
179
191
perf_test_values = [
180
-
("</a" + "\t" * 50000) + "//>",
181
-
"&" * 50000,
182
-
"_X<<<<<<<<<<<>",
192
+
("</a" + "\t" * (max_len - 6) + "//>", None),
193
+
("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * (max_len - 3) + "…"),
194
+
("&" * max_len, None), # no change
195
+
("&" * bigger_len, "&" * max_len + "…"),
196
+
("_X<<<<<<<<<<<>", None),
197
+
(valid_html * bigger_len, valid_html * 12 + "<p>Joel is…</p>"), # 50 words
183
198
]
184
-
for value in perf_test_values:
199
+
for value, expected in perf_test_values:
185
200
with self.subTest(value=value):
186
201
truncator = text.Truncator(value)
187
-
self.assertEqual(value, truncator.words(50, html=True))
202
+
self.assertEqual(
203
+
expected if expected else value, truncator.words(50, html=True)
204
+
)
188
205
189
206
def test_wrap(self):
190
207
digits = "1234 67 9"
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4