Skip to content

Commit

Permalink
problem_report: write CompressedValue in chunks as well
Browse files Browse the repository at this point in the history
In problem reports binary data is compressed in chunks of 1 MB. Each
compressed chunk is base64 encoded and written as one line in the
problem report file. This is the rule for all binary data formats except
for `CompressedValue`.

The problem report files are text files. Text editors like gedit might
choke on text files that have huge lines. So the problem report should
wrap the lines at a reasonable length.

So write `CompressedValue` in chunks as well.
  • Loading branch information
bdrung authored and schopin-pro committed Feb 28, 2025
1 parent 7a312a5 commit 24f307b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
13 changes: 4 additions & 9 deletions problem_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ def write(self, file: typing.IO[bytes]) -> None:
# legacy zlib format
file.write(zlib.decompress(self.compressed_value))

def _iter_compressed_value(self, chunk_size: int) -> Iterator[bytes]:
def iter_compressed(self, chunk_size: int = CHUNK_SIZE) -> Iterator[bytes]:
"""Iterate over the compressed content in chunks."""
assert self.compressed_value
for i in range(0, self.get_compressed_size(), chunk_size):
yield self.compressed_value[i : i + chunk_size]
Expand All @@ -320,9 +321,7 @@ def __len__(self) -> int:

# legacy zlib format and zstandard
length = 0
for block in self.decode_compressed_stream(
self._iter_compressed_value(CHUNK_SIZE)
):
for block in self.decode_compressed_stream(self.iter_compressed()):
length += len(block)
return length

Expand Down Expand Up @@ -670,13 +669,9 @@ def _generate_compressed_chunks(self, key: str) -> Generator[bytes]:
# TODO: split into smaller subgenerators
# pylint: disable=too-many-branches
value = self.data[key]
if isinstance(value, CompressedFile):
if isinstance(value, (CompressedFile, CompressedValue)):
yield from value.iter_compressed()
return
if isinstance(value, CompressedValue):
assert value.compressed_value is not None
yield value.compressed_value
return
gzip_header = (
GZIP_HEADER_START
+ b"\010\000\000\000\000\002\377"
Expand Down
26 changes: 25 additions & 1 deletion tests/unit/test_problem_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import time
import unittest
import unittest.mock
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch

try:
import zstandard
Expand Down Expand Up @@ -411,6 +411,30 @@ def test_write_empty_fileobj(self) -> None:
out = io.BytesIO()
self.assertRaises(OSError, pr.write, out)

@patch("problem_report.CompressedValue.iter_compressed.__defaults__", (48,))
def test_write_compressed_value(self) -> None:
"""Write a report with a multi-line compressed value."""
report = problem_report.ProblemReport(date="now!")
report["BinValue"] = problem_report.CompressedValue(
compressed_value=b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\x0b\xc9HUH\xcc"
b"+.O-R(\xc9W\xc8\xc9LK\xd5Q(\x01\x8a\x95\xe6e\x96\xa5\x16\x15\x03y\x89y)"
b"\n\xa9@veIFf^:\x00\xa6\xbfr\x950\x00\x00\x00"
)
out = io.BytesIO()
report.write(out)
self.assertEqual(
out.getvalue().decode(),
textwrap.dedent(
"""\
ProblemType: Crash
Date: now!
BinValue: base64
H4sIAAAAAAAC/wvJSFVIzCsuTy1SKMlXyMlMS9VRKAGKleZllqUWFQN5iXkpCqlA
dmVJRmZeOgCmv3KVMAAAAA==
"""
),
)

def test_read_file(self) -> None:
"""Read a report with binary data."""
bin_report = textwrap.dedent(
Expand Down

0 comments on commit 24f307b

Please sign in to comment.