Skip to content

Commit 75ac969

Browse files
authored
Upgrade libbrotli to v1.2.0
1 parent ac7cb9e commit 75ac969

File tree

7 files changed

+168
-9
lines changed

7 files changed

+168
-9
lines changed

HISTORY.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Changelog
22
=========
33

4+
1.2.0.0 (TBD)
5+
--------------------
6+
7+
- Upgraded libbrotli to v1.2.0.
8+
- Added ``output_buffer_limit`` parameter to ``Decompressor.decompress()`` and
9+
``Decompressor.process()`` methods to allow mitigation of unexpectedly large
10+
output. This addresses potential security concerns where maliciously crafted
11+
compressed data could result in excessive memory usage during decompression.
12+
13+
414
1.1.0.0 (2023-09-14)
515
--------------------
616

libbrotli

Submodule libbrotli updated 232 files

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,7 @@ def finalize_options(self):
122122
"Programming Language :: Python :: 3.10",
123123
"Programming Language :: Python :: 3.11",
124124
"Programming Language :: Python :: 3.12",
125+
"Programming Language :: Python :: 3.13",
126+
"Programming Language :: Python :: 3.14",
125127
]
126128
)

src/brotlicffi/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error
66
)
77

8-
__version__ = "1.1.0.0"
8+
__version__ = "1.2.0.0"

src/brotlicffi/_api.py

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,17 +348,23 @@ class Decompressor(object):
348348
.. versionchanged:: 0.5.0
349349
Added ``dictionary`` parameter.
350350
351+
.. versionchanged:: 1.2.0
352+
Added ``can_accept_more_data()`` method and optional
353+
``output_buffer_limit`` parameter to ``process()``/``decompress()``.
354+
351355
:param dictionary: A pre-set dictionary for LZ77. Please use this with
352356
caution: if a dictionary is used for compression, the same dictionary
353357
**must** be used for decompression!
354358
:type dictionary: ``bytes``
355359
"""
356360
_dictionary = None
357361
_dictionary_size = None
362+
_unconsumed_data = None
358363

359364
def __init__(self, dictionary=b''):
360365
dec = lib.BrotliDecoderCreateInstance(ffi.NULL, ffi.NULL, ffi.NULL)
361366
self._decoder = ffi.gc(dec, lib.BrotliDecoderDestroyInstance)
367+
self._unconsumed_data = b''
362368

363369
if dictionary:
364370
self._dictionary = ffi.new("uint8_t []", dictionary)
@@ -369,23 +375,73 @@ def __init__(self, dictionary=b''):
369375
self._dictionary
370376
)
371377

372-
def decompress(self, data):
378+
@staticmethod
379+
def _calculate_buffer_size(
380+
input_data_len, output_buffer_limit, chunks_len, chunks_num
381+
):
382+
if output_buffer_limit is not None:
383+
return output_buffer_limit - chunks_len
384+
# When `decompress(b'')` is called without `output_buffer_limit`.
385+
elif input_data_len == 0:
386+
# libbrotli would use 32 KB as a starting buffer size and double it
387+
# each time, capped at 16 MB.
388+
# https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
389+
return 1 << min(chunks_num + 15, 24)
390+
else:
391+
# Allocate a buffer that's hopefully overlarge, but if it's not we
392+
# don't mind: we'll spin around again.
393+
return 5 * input_data_len
394+
395+
def decompress(self, data, output_buffer_limit=None):
373396
"""
374397
Decompress part of a complete Brotli-compressed string.
375398
399+
.. versionchanged:: 1.2.0
400+
Added ``output_buffer_limit`` parameter.
401+
376402
:param data: A bytestring containing Brotli-compressed data.
403+
:param output_buffer_limit: Optional maximum size for the output
404+
buffer. If set, the output buffer will not grow once its size
405+
equals or exceeds this value. If the limit is reached, further
406+
calls to process (potentially with empty input) will continue to
407+
yield more data. Following process() calls must only be called
408+
with empty input until can_accept_more_data() returns True.
409+
:type output_buffer_limit: ``int`` or ``None``
377410
:returns: A bytestring containing the decompressed data.
378411
"""
412+
if self._unconsumed_data and data:
413+
raise error(
414+
"brotli: decoder process called with data when "
415+
"'can_accept_more_data()' is False"
416+
)
417+
418+
# We should avoid operations on the `self._unconsumed_data` if no data
419+
# is to be processed.
420+
if output_buffer_limit is not None and output_buffer_limit <= 0:
421+
return b''
422+
423+
# Use unconsumed data if available, use new data otherwise.
424+
if self._unconsumed_data:
425+
input_data = self._unconsumed_data
426+
self._unconsumed_data = b''
427+
else:
428+
input_data = data
429+
379430
chunks = []
431+
chunks_len = 0
380432

381-
available_in = ffi.new("size_t *", len(data))
382-
in_buffer = ffi.new("uint8_t[]", data)
433+
available_in = ffi.new("size_t *", len(input_data))
434+
in_buffer = ffi.new("uint8_t[]", input_data)
383435
next_in = ffi.new("uint8_t **", in_buffer)
384436

385437
while True:
386-
# Allocate a buffer that's hopefully overlarge, but if it's not we
387-
# don't mind: we'll spin around again.
388-
buffer_size = 5 * len(data)
438+
buffer_size = self._calculate_buffer_size(
439+
input_data_len=len(input_data),
440+
output_buffer_limit=output_buffer_limit,
441+
chunks_len=chunks_len,
442+
chunks_num=len(chunks),
443+
)
444+
389445
available_out = ffi.new("size_t *", buffer_size)
390446
out_buffer = ffi.new("uint8_t[]", buffer_size)
391447
next_out = ffi.new("uint8_t **", out_buffer)
@@ -408,6 +464,19 @@ def decompress(self, data):
408464
# Next, copy the result out.
409465
chunk = ffi.buffer(out_buffer, buffer_size - available_out[0])[:]
410466
chunks.append(chunk)
467+
chunks_len += len(chunk)
468+
469+
# Save any unconsumed input for the next call.
470+
if available_in[0] > 0:
471+
remaining_input = ffi.buffer(next_in[0], available_in[0])[:]
472+
self._unconsumed_data = remaining_input
473+
474+
# Check if we've reached the output limit.
475+
if (
476+
output_buffer_limit is not None
477+
and chunks_len >= output_buffer_limit
478+
):
479+
break
411480

412481
if rc == lib.BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
413482
assert available_in[0] == 0
@@ -459,3 +528,30 @@ def is_finished(self):
459528
is complete, ``False`` otherwise
460529
"""
461530
return lib.BrotliDecoderIsFinished(self._decoder) == lib.BROTLI_TRUE
531+
532+
def can_accept_more_data(self):
533+
"""
534+
Checks if the decompressor can accept more compressed data.
535+
536+
If the ``output_buffer_limit`` parameter was used with
537+
``decompress()`` or ``process()``, this method should be checked to
538+
determine if the decompressor is ready to accept new input. When the
539+
output buffer limit is reached, the decompressor may still have
540+
unconsumed input data or internal buffered output, and calling
541+
``decompress(b'')`` repeatedly will continue producing output until
542+
this method returns ``True``.
543+
544+
.. versionadded:: 1.2.0
545+
546+
:returns: ``True`` if the decompressor is ready to accept more
547+
compressed data via ``decompress()`` or ``process()``, ``False``
548+
if the decompressor needs to output some data via
549+
``decompress(b'')``/``process(b'')`` before being provided any
550+
more compressed data.
551+
:rtype: ``bool``
552+
"""
553+
if len(self._unconsumed_data) > 0:
554+
return False
555+
if lib.BrotliDecoderHasMoreOutput(self._decoder) == lib.BROTLI_TRUE:
556+
return False
557+
return True

test/test_simple_decompression.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,57 @@ def test_decompressobj(simple_compressed_file):
3838
assert data == uncompressed_data
3939

4040

41+
# `more_data_limit` allows testing `decompress(b'')` with and without a limit.
42+
@pytest.mark.parametrize('more_data_limit', [100, None])
43+
def test_decompressobj_with_output_buffer_limit(
44+
simple_compressed_file, more_data_limit
45+
):
46+
"""
47+
Test decompression with `output_buffer_limit` set.
48+
"""
49+
with open(simple_compressed_file[0], 'rb') as f:
50+
uncompressed_data = f.read()
51+
52+
with open(simple_compressed_file[1], 'rb') as f:
53+
compressed_data = f.read()
54+
55+
o = brotlicffi.Decompressor()
56+
assert o.can_accept_more_data()
57+
small_limit = 100
58+
result = o.decompress(compressed_data, output_buffer_limit=small_limit)
59+
assert len(result) <= small_limit
60+
61+
# Ensure `output_buffer_limit` of zero works.
62+
assert o.decompress(b'', output_buffer_limit=0) == b''
63+
64+
if o._unconsumed_data:
65+
with pytest.raises(
66+
brotlicffi.error,
67+
match=(
68+
r"brotli: decoder process called with data when "
69+
r"'can_accept_more_data\(\)' is False"
70+
),
71+
):
72+
o.decompress(b'additional data')
73+
74+
if not o.is_finished():
75+
assert not o.can_accept_more_data()
76+
77+
# Continue decompressing with empty input.
78+
all_output = [result]
79+
while not o.can_accept_more_data() and not o.is_finished():
80+
more_output = o.decompress(
81+
b'', output_buffer_limit=more_data_limit
82+
)
83+
if more_data_limit is not None:
84+
assert len(more_output) <= more_data_limit
85+
all_output.append(more_output)
86+
assert o.can_accept_more_data() or o.is_finished()
87+
88+
final_result = b''.join(all_output)
89+
assert final_result == uncompressed_data
90+
91+
4192
def test_drip_feed(simple_compressed_file):
4293
"""
4394
Sending in the data one byte at a time still works.

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = py37, py38, py39, py310, py311, py312, pypy, lint
2+
envlist = py37, py38, py39, py310, py311, py312, py313, py314, pypy, lint
33

44
[testenv]
55
deps= -r{toxinidir}/test_requirements.txt

0 commit comments

Comments
 (0)