@@ -348,17 +348,23 @@ class Decompressor(object):
348348 .. versionchanged:: 0.5.0
349349 Added ``dictionary`` parameter.
350350
351+ .. versionchanged:: 1.2.0
352+ Added ``can_accept_more_data()`` method and optional
353+ ``output_buffer_limit`` parameter to ``process()``/``decompress()``.
354+
351355 :param dictionary: A pre-set dictionary for LZ77. Please use this with
352356 caution: if a dictionary is used for compression, the same dictionary
353357 **must** be used for decompression!
354358 :type dictionary: ``bytes``
355359 """
356360 _dictionary = None
357361 _dictionary_size = None
362+ _unconsumed_data = None
358363
359364 def __init__ (self , dictionary = b'' ):
360365 dec = lib .BrotliDecoderCreateInstance (ffi .NULL , ffi .NULL , ffi .NULL )
361366 self ._decoder = ffi .gc (dec , lib .BrotliDecoderDestroyInstance )
367+ self ._unconsumed_data = b''
362368
363369 if dictionary :
364370 self ._dictionary = ffi .new ("uint8_t []" , dictionary )
@@ -369,23 +375,73 @@ def __init__(self, dictionary=b''):
369375 self ._dictionary
370376 )
371377
372- def decompress (self , data ):
378+ @staticmethod
379+ def _calculate_buffer_size (
380+ input_data_len , output_buffer_limit , chunks_len , chunks_num
381+ ):
382+ if output_buffer_limit is not None :
383+ return output_buffer_limit - chunks_len
384+ # When `decompress(b'')` is called without `output_buffer_limit`.
385+ elif input_data_len == 0 :
386+ # libbrotli would use 32 KB as a starting buffer size and double it
387+ # each time, capped at 16 MB.
388+ # https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
389+ return 1 << min (chunks_num + 15 , 24 )
390+ else :
391+ # Allocate a buffer that's hopefully overlarge, but if it's not we
392+ # don't mind: we'll spin around again.
393+ return 5 * input_data_len
394+
395+ def decompress (self , data , output_buffer_limit = None ):
373396 """
374397 Decompress part of a complete Brotli-compressed string.
375398
399+ .. versionchanged:: 1.2.0
400+ Added ``output_buffer_limit`` parameter.
401+
376402 :param data: A bytestring containing Brotli-compressed data.
403+ :param output_buffer_limit: Optional maximum size for the output
404+ buffer. If set, the output buffer will not grow once its size
405+ equals or exceeds this value. If the limit is reached, further
406+ calls to process (potentially with empty input) will continue to
407+ yield more data. Following process() calls must only be called
408+ with empty input until can_accept_more_data() returns True.
409+ :type output_buffer_limit: ``int`` or ``None``
377410 :returns: A bytestring containing the decompressed data.
378411 """
412+ if self ._unconsumed_data and data :
413+ raise error (
414+ "brotli: decoder process called with data when "
415+ "'can_accept_more_data()' is False"
416+ )
417+
418+ # We should avoid operations on the `self._unconsumed_data` if no data
419+ # is to be processed.
420+ if output_buffer_limit is not None and output_buffer_limit <= 0 :
421+ return b''
422+
423+ # Use unconsumed data if available, use new data otherwise.
424+ if self ._unconsumed_data :
425+ input_data = self ._unconsumed_data
426+ self ._unconsumed_data = b''
427+ else :
428+ input_data = data
429+
379430 chunks = []
431+ chunks_len = 0
380432
381- available_in = ffi .new ("size_t *" , len (data ))
382- in_buffer = ffi .new ("uint8_t[]" , data )
433+ available_in = ffi .new ("size_t *" , len (input_data ))
434+ in_buffer = ffi .new ("uint8_t[]" , input_data )
383435 next_in = ffi .new ("uint8_t **" , in_buffer )
384436
385437 while True :
386- # Allocate a buffer that's hopefully overlarge, but if it's not we
387- # don't mind: we'll spin around again.
388- buffer_size = 5 * len (data )
438+ buffer_size = self ._calculate_buffer_size (
439+ input_data_len = len (input_data ),
440+ output_buffer_limit = output_buffer_limit ,
441+ chunks_len = chunks_len ,
442+ chunks_num = len (chunks ),
443+ )
444+
389445 available_out = ffi .new ("size_t *" , buffer_size )
390446 out_buffer = ffi .new ("uint8_t[]" , buffer_size )
391447 next_out = ffi .new ("uint8_t **" , out_buffer )
@@ -408,6 +464,19 @@ def decompress(self, data):
408464 # Next, copy the result out.
409465 chunk = ffi .buffer (out_buffer , buffer_size - available_out [0 ])[:]
410466 chunks .append (chunk )
467+ chunks_len += len (chunk )
468+
469+ # Save any unconsumed input for the next call.
470+ if available_in [0 ] > 0 :
471+ remaining_input = ffi .buffer (next_in [0 ], available_in [0 ])[:]
472+ self ._unconsumed_data = remaining_input
473+
474+ # Check if we've reached the output limit.
475+ if (
476+ output_buffer_limit is not None
477+ and chunks_len >= output_buffer_limit
478+ ):
479+ break
411480
412481 if rc == lib .BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT :
413482 assert available_in [0 ] == 0
@@ -459,3 +528,30 @@ def is_finished(self):
459528 is complete, ``False`` otherwise
460529 """
461530 return lib .BrotliDecoderIsFinished (self ._decoder ) == lib .BROTLI_TRUE
531+
532+ def can_accept_more_data (self ):
533+ """
534+ Checks if the decompressor can accept more compressed data.
535+
536+ If the ``output_buffer_limit`` parameter was used with
537+ ``decompress()`` or ``process()``, this method should be checked to
538+ determine if the decompressor is ready to accept new input. When the
539+ output buffer limit is reached, the decompressor may still have
540+ unconsumed input data or internal buffered output, and calling
541+ ``decompress(b'')`` repeatedly will continue producing output until
542+ this method returns ``True``.
543+
544+ .. versionadded:: 1.2.0
545+
546+ :returns: ``True`` if the decompressor is ready to accept more
547+ compressed data via ``decompress()`` or ``process()``, ``False``
548+ if the decompressor needs to output some data via
549+ ``decompress(b'')``/``process(b'')`` before being provided any
550+ more compressed data.
551+ :rtype: ``bool``
552+ """
553+ if len (self ._unconsumed_data ) > 0 :
554+ return False
555+ if lib .BrotliDecoderHasMoreOutput (self ._decoder ) == lib .BROTLI_TRUE :
556+ return False
557+ return True
0 commit comments