From e3fe30c37bb6b6334597a042387f652f0b9d7fc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kiss=20Gy=C3=B6rgy?= <gyorgy@iot-inspector.com>
Date: Mon, 6 Dec 2021 15:49:04 +0100
Subject: [PATCH 1/5] Refactor processing, get rid of strategies.py

We decided that we don't want to implement the strategy abstraction right now.
Moved search chunk related functionality into finder.py and deleted strategies.py
The functions are ordered in the call order.
---
 tests/test_finder.py                          |  60 ++++++++
 ...{test_strategies.py => test_processing.py} |   2 +-
 unblob/extractor.py                           |   3 +
 unblob/finder.py                              |  86 +++++++++++-
 unblob/processing.py                          |  67 ++++++++-
 unblob/strategies.py                          | 128 ------------------
 6 files changed, 206 insertions(+), 140 deletions(-)
 create mode 100644 tests/test_finder.py
 rename tests/{test_strategies.py => test_processing.py} (97%)
 delete mode 100644 unblob/strategies.py

diff --git a/tests/test_finder.py b/tests/test_finder.py
new file mode 100644
index 0000000000..cec535ba49
--- /dev/null
+++ b/tests/test_finder.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+
+from unblob.finder import make_yara_rules, search_yara_patterns
+from unblob.models import Handler
+
+
+class _BaseTestHandler(Handler):
+    def calculate_chunk(self, *args, **kwargs):
+        pass
+
+    @staticmethod
+    def make_extract_command(*args, **kwargs):
+        return []
+
+
+class TestHandler1(_BaseTestHandler):
+    NAME = "handler1"
+    YARA_RULE = r"""
+        strings:
+            $magic = { 21 3C }
+        condition:
+            $magic
+    """
+
+
+class TestHandler2(_BaseTestHandler):
+    NAME = "handler2"
+    YARA_RULE = r"""
+        strings:
+            $tar_magic = { 75 73 74 61 72 }
+        condition:
+            $tar_magic
+    """
+
+
+def test_make_yara_rules():
+    rules = make_yara_rules(tuple([TestHandler1, TestHandler2]))
+    matches = rules.match(data=b"!<        ustar")
+    assert len(matches) == 2
+    assert matches[0].strings == [(0, "$magic", b"!<")]
+    assert matches[1].strings == [(10, "$tar_magic", b"ustar")]
+
+
+def test_search_yara_patterns(tmp_path: Path):
+    handler1 = TestHandler1()
+    handler2 = TestHandler2
+    rules = make_yara_rules(tuple([TestHandler1, TestHandler2]))
+    handler_map = {"handler1": handler1, "handler2": handler2}
+    test_file = tmp_path / "test_file"
+    test_file.write_bytes(b"!<        ustar")
+    results = search_yara_patterns(rules, handler_map, test_file)
+
+    assert len(results) == 2
+    result1, result2 = results
+
+    assert result1.handler is handler1
+    assert result1.match.strings == [(0, "$magic", b"!<")]
+
+    assert result2.handler is handler2
+    assert result2.match.strings == [(10, "$tar_magic", b"ustar")]
diff --git a/tests/test_strategies.py b/tests/test_processing.py
similarity index 97%
rename from tests/test_strategies.py
rename to tests/test_processing.py
index bcd689a474..f35aab4503 100644
--- a/tests/test_strategies.py
+++ b/tests/test_processing.py
@@ -3,7 +3,7 @@
 import pytest
 
 from unblob.models import UnknownChunk, ValidChunk
-from unblob.strategies import calculate_unknown_chunks, remove_inner_chunks
+from unblob.processing import calculate_unknown_chunks, remove_inner_chunks
 
 
 @pytest.mark.parametrize(
diff --git a/unblob/extractor.py b/unblob/extractor.py
index 45bde15d45..0a051a1209 100644
--- a/unblob/extractor.py
+++ b/unblob/extractor.py
@@ -1,3 +1,6 @@
+"""
+File extraction related functions.
+"""
 import io
 import shlex
 import subprocess
diff --git a/unblob/finder.py b/unblob/finder.py
index 8ac2e8cd21..c98623ba20 100644
--- a/unblob/finder.py
+++ b/unblob/finder.py
@@ -1,12 +1,21 @@
+"""
+Searching Chunk related functions.
+The main "entry point" is search_chunks_by_priority.
+"""
+import io
 from functools import lru_cache
+from operator import itemgetter
 from pathlib import Path
 from typing import Dict, List, Tuple
 
 import yara
 from structlog import get_logger
 
-from .handlers import Handler
-from .models import YaraMatchResult
+from .file_utils import LimitedStartReader
+from .handlers import _ALL_MODULES_BY_PRIORITY
+from .logging import noformat
+from .models import Handler, ValidChunk, YaraMatchResult
+from .state import exit_code_var
 
 logger = get_logger()
 
@@ -19,8 +28,70 @@
 """
 
 
+def search_chunks_by_priority(  # noqa: C901
+    path: Path, file: io.BufferedReader, file_size: int
+) -> List[ValidChunk]:
+    """Search all ValidChunks within the file.
+    Collect all the registered handlers by priority, search for YARA patterns and run
+    Handler.calculate_chunk() on the found matches.
+    We don't deal with offset within already found ValidChunks and invalid chunks are thrown away.
+    """
+    all_chunks = []
+
+    for priority_level, handlers in enumerate(_ALL_MODULES_BY_PRIORITY, start=1):
+        logger.info("Starting priority level", priority_level=noformat(priority_level))
+        yara_rules = make_yara_rules(tuple(handlers.values()))
+        yara_results = search_yara_patterns(yara_rules, handlers, path)
+
+        for result in yara_results:
+            handler, match = result.handler, result.match
+
+            by_offset = itemgetter(0)
+            sorted_match_strings = sorted(match.strings, key=by_offset)
+            for offset, identifier, string_data in sorted_match_strings:
+                real_offset = offset + handler.YARA_MATCH_OFFSET
+
+                # Skip chunk calculation if this would start inside another one,
+                # similar to remove_inner_chunks, but before we even begin calculating.
+                if any(chunk.contains_offset(real_offset) for chunk in all_chunks):
+                    continue
+
+                logger.info(
+                    "Calculating chunk for YARA match",
+                    start_offset=offset,
+                    real_offset=real_offset,
+                    identifier=identifier,
+                )
+
+                limited_reader = LimitedStartReader(file, real_offset)
+                try:
+                    chunk = handler.calculate_chunk(limited_reader, real_offset)
+                except Exception as exc:
+                    exit_code_var.set(1)
+                    logger.error(
+                        "Unhandled Exception during chunk calculation", exc_info=exc
+                    )
+                    continue
+
+                # We found some random bytes this handler couldn't parse
+                if chunk is None:
+                    continue
+
+                if chunk.end_offset > file_size or chunk.start_offset < 0:
+                    exit_code_var.set(1)
+                    logger.error("Chunk overflows file", chunk=chunk)
+                    continue
+
+                chunk.handler = handler
+                logger.info("Found valid chunk", chunk=chunk, handler=handler.NAME)
+                all_chunks.append(chunk)
+
+    return all_chunks
+
+
 @lru_cache
-def _make_yara_rules(handlers: Tuple[Handler, ...]):
+def make_yara_rules(handlers: Tuple[Handler, ...]):
+    """Make yara.Rule by concatenating all handlers yara rules and compiling them."""
     all_yara_rules = "\n".join(
         _YARA_RULE_TEMPLATE.format(NAME=h.NAME, YARA_RULE=h.YARA_RULE.strip())
         for h in handlers
@@ -30,10 +101,10 @@ def _make_yara_rules(handlers: Tuple[Handler, ...]):
     return compiled_rules
 
 
-def search_chunks(
-    handlers: Dict[str, Handler], full_path: Path
+def search_yara_patterns(
+    yara_rules: yara.Rule, handlers: Dict[str, Handler], full_path: Path
 ) -> List[YaraMatchResult]:
-    yara_rules = _make_yara_rules(tuple(handlers.values()))
+    """Search with the compiled YARA rules and identify the handler which defined the rule."""
     # YARA uses a memory mapped file internally when given a path
     yara_matches: List[yara.Match] = yara_rules.match(str(full_path), timeout=60)
 
@@ -43,4 +114,7 @@ def search_chunks(
         yara_res = YaraMatchResult(handler=handler, match=match)
         yara_results.append(yara_res)
 
+    if yara_results:
+        logger.info("Found YARA results", count=noformat(len(yara_results)))
+
     return yara_results
diff --git a/unblob/processing.py b/unblob/processing.py
index 4d0a408be2..5a195db471 100644
--- a/unblob/processing.py
+++ b/unblob/processing.py
@@ -1,14 +1,14 @@
 import stat
+from operator import attrgetter
 from pathlib import Path
+from typing import List
 
 from structlog import get_logger
 
 from .extractor import carve_unknown_chunks, extract_valid_chunks, make_extract_dir
-from .strategies import (
-    calculate_unknown_chunks,
-    remove_inner_chunks,
-    search_chunks_by_priority,
-)
+from .finder import search_chunks_by_priority
+from .iter_utils import pairwise
+from .models import UnknownChunk, ValidChunk
 
 logger = get_logger()
 
@@ -61,3 +61,60 @@ def process_file(
             process_file(
                 extract_root, new_path, extract_root, max_depth, current_depth + 1
             )
+
+
+def remove_inner_chunks(chunks: List[ValidChunk]) -> List[ValidChunk]:
+    """Remove all chunks from the list which are within another bigger chunks."""
+    if not chunks:
+        return []
+
+    chunks_by_size = sorted(chunks, key=attrgetter("size"), reverse=True)
+    outer_chunks = [chunks_by_size[0]]
+    for chunk in chunks_by_size[1:]:
+        if not any(outer.contains(chunk) for outer in outer_chunks):
+            outer_chunks.append(chunk)
+
+    outer_count = len(outer_chunks)
+    removed_count = len(chunks) - outer_count
+    logger.info(
+        "Removed inner chunks",
+        outer_chunk_count=outer_count,
+        removed_inner_chunk_count=removed_count,
+    )
+    return outer_chunks
+
+
+def calculate_unknown_chunks(
+    chunks: List[ValidChunk], file_size: int
+) -> List[UnknownChunk]:
+    """Calculate the empty gaps between chunks."""
+    if not chunks or file_size == 0:
+        return []
+
+    sorted_by_offset = sorted(chunks, key=attrgetter("start_offset"))
+
+    unknown_chunks = []
+
+    first = sorted_by_offset[0]
+    if first.start_offset != 0:
+        unknown_chunk = UnknownChunk(0, first.start_offset)
+        unknown_chunks.append(unknown_chunk)
+
+    for chunk, next_chunk in pairwise(sorted_by_offset):
+        diff = next_chunk.start_offset - chunk.end_offset
+        if diff != 0:
+            unknown_chunk = UnknownChunk(
+                start_offset=chunk.end_offset,
+                end_offset=next_chunk.start_offset,
+            )
+            unknown_chunks.append(unknown_chunk)
+
+    last = sorted_by_offset[-1]
+    if last.end_offset < file_size:
+        unknown_chunk = UnknownChunk(
+            start_offset=last.end_offset,
+            end_offset=file_size,
+        )
+        unknown_chunks.append(unknown_chunk)
+
+    return unknown_chunks
diff --git a/unblob/strategies.py b/unblob/strategies.py
deleted file mode 100644
index 0ab3485898..0000000000
--- a/unblob/strategies.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import io
-from operator import attrgetter, itemgetter
-from pathlib import Path
-from typing import List
-
-from structlog import get_logger
-
-from .file_utils import LimitedStartReader
-from .finder import search_chunks
-from .handlers import _ALL_MODULES_BY_PRIORITY
-from .iter_utils import pairwise
-from .logging import noformat
-from .models import UnknownChunk, ValidChunk
-from .state import exit_code_var
-
-logger = get_logger()
-
-
-def search_chunks_by_priority(  # noqa: C901
-    path: Path, file: io.BufferedReader, file_size: int
-) -> List[ValidChunk]:
-    all_chunks = []
-
-    for priority_level, handlers in enumerate(_ALL_MODULES_BY_PRIORITY, start=1):
-        logger.info("Starting priority level", priority_level=noformat(priority_level))
-        yara_results = search_chunks(handlers, path)
-
-        if yara_results:
-            logger.info("Found YARA results", count=noformat(len(yara_results)))
-
-        for result in yara_results:
-            handler = result.handler
-            match = result.match
-            sorted_matches = sorted(match.strings, key=itemgetter(0))
-            for offset, identifier, string_data in sorted_matches:
-                real_offset = offset + handler.YARA_MATCH_OFFSET
-
-                if any(chunk.contains_offset(real_offset) for chunk in all_chunks):
-                    continue
-
-                logger.info(
-                    "Calculating chunk for YARA match",
-                    start_offset=offset,
-                    real_offset=real_offset,
-                    identifier=identifier,
-                )
-                limited_reader = LimitedStartReader(file, real_offset)
-
-                try:
-                    chunk = handler.calculate_chunk(limited_reader, real_offset)
-                except Exception as exc:
-                    exit_code_var.set(1)
-                    logger.error(
-                        "Unhandled Exception during chunk calculation", exc_info=exc
-                    )
-                    continue
-
-                # We found some random bytes this handler couldn't parse
-                if chunk is None:
-                    continue
-
-                if chunk.end_offset > file_size or chunk.start_offset < 0:
-                    exit_code_var.set(1)
-                    logger.error("Chunk overflows file", chunk=chunk)
-                    continue
-
-                chunk.handler = handler
-                logger.info("Found valid chunk", chunk=chunk, handler=handler.NAME)
-                all_chunks.append(chunk)
-
-    return all_chunks
-
-
-def remove_inner_chunks(chunks: List[ValidChunk]) -> List[ValidChunk]:
-    """Remove all chunks from the list which are within another bigger chunks."""
-    if not chunks:
-        return []
-
-    chunks_by_size = sorted(chunks, key=attrgetter("size"), reverse=True)
-    outer_chunks = [chunks_by_size[0]]
-    for chunk in chunks_by_size[1:]:
-        if not any(outer.contains(chunk) for outer in outer_chunks):
-            outer_chunks.append(chunk)
-
-    outer_count = len(outer_chunks)
-    removed_count = len(chunks) - outer_count
-    logger.info(
-        "Removed inner chunks",
-        outer_chunk_count=outer_count,
-        removed_inner_chunk_count=removed_count,
-    )
-    return outer_chunks
-
-
-def calculate_unknown_chunks(
-    chunks: List[ValidChunk], file_size: int
-) -> List[UnknownChunk]:
-    """Calculate the empty gaps between chunks."""
-    if not chunks or file_size == 0:
-        return []
-
-    sorted_by_offset = sorted(chunks, key=attrgetter("start_offset"))
-
-    unknown_chunks = []
-
-    first = sorted_by_offset[0]
-    if first.start_offset != 0:
-        unknown_chunk = UnknownChunk(0, first.start_offset)
-        unknown_chunks.append(unknown_chunk)
-
-    for chunk, next_chunk in pairwise(sorted_by_offset):
-        diff = next_chunk.start_offset - chunk.end_offset
-        if diff != 0:
-            unknown_chunk = UnknownChunk(
-                start_offset=chunk.end_offset,
-                end_offset=next_chunk.start_offset,
-            )
-            unknown_chunks.append(unknown_chunk)
-
-    last = sorted_by_offset[-1]
-    if last.end_offset < file_size:
-        unknown_chunk = UnknownChunk(
-            start_offset=last.end_offset,
-            end_offset=file_size,
-        )
-        unknown_chunks.append(unknown_chunk)
-
-    return unknown_chunks

From 29d6d17b3303941ac4482266217c91da0f073e06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kiss=20Gy=C3=B6rgy?= <gyorgy@iot-inspector.com>
Date: Mon, 6 Dec 2021 16:49:13 +0100
Subject: [PATCH 2/5] Refactored handler discovery and loading

Renamed the handler list to "ALL_HANDLERS_BY_PRIORITY" and made it public,
as we import and use them multiple places. This is the official place to
register new handlers.

Load handlers lazily and cache the instantiation.
---
 tests/test_finder.py        | 15 ++++++++++++++-
 tests/test_handlers.py      | 10 ++++++----
 unblob/finder.py            | 22 ++++++++++++++--------
 unblob/handlers/__init__.py | 17 +++++------------
 4 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/tests/test_finder.py b/tests/test_finder.py
index cec535ba49..369f5c8135 100644
--- a/tests/test_finder.py
+++ b/tests/test_finder.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from unblob.finder import make_yara_rules, search_yara_patterns
+from unblob.finder import make_handler_map, make_yara_rules, search_yara_patterns
 from unblob.models import Handler
 
 
@@ -58,3 +58,16 @@ def test_search_yara_patterns(tmp_path: Path):
 
     assert result2.handler is handler2
     assert result2.match.strings == [(10, "$tar_magic", b"ustar")]
+
+
+def test_make_handler_map():
+    handler_map = make_handler_map(tuple([TestHandler1, TestHandler2]))
+    assert isinstance(handler_map["handler1"], TestHandler1)
+    assert isinstance(handler_map["handler2"], TestHandler2)
+
+
+def test_make_handler_map_instances_are_cached():
+    handler_map1 = make_handler_map(tuple([TestHandler1, TestHandler2]))
+    handler_map2 = make_handler_map(tuple([TestHandler1, TestHandler2]))
+    assert handler_map1["handler1"] is handler_map2["handler1"]
+    assert handler_map1["handler2"] is handler_map2["handler2"]
diff --git a/tests/test_handlers.py b/tests/test_handlers.py
index d7e7c562e0..bd3aa495bf 100644
--- a/tests/test_handlers.py
+++ b/tests/test_handlers.py
@@ -11,10 +11,12 @@
 import shlex
 import subprocess
 from pathlib import Path
+from typing import Type
 
 import pytest
 
 from unblob import handlers
+from unblob.models import Handler
 from unblob.processing import DEFAULT_DEPTH, process_file
 
 TEST_DATA_PATH = Path(__file__).parent / "integration"
@@ -68,12 +70,12 @@ def test_all_handlers(input_dir: Path, output_dir: Path, tmp_path: Path):
     "handler",
     (
         pytest.param(handler, id=handler.NAME)
-        for handler_map in handlers._ALL_MODULES_BY_PRIORITY
-        for handler in handler_map.values()
+        for handlers_in_priority in handlers.ALL_HANDLERS_BY_PRIORITY
+        for handler in handlers_in_priority
     ),
 )
-def test_missing_handlers_integrations_tests(handler):
-    handler_module_path = Path(inspect.getfile(handler.__class__))
+def test_missing_handlers_integrations_tests(handler: Type[Handler]):
+    handler_module_path = Path(inspect.getfile(handler))
     handler_test_path = handler_module_path.relative_to(
         HANDLERS_PACKAGE_PATH
     ).with_suffix("")
diff --git a/unblob/finder.py b/unblob/finder.py
index c98623ba20..009b91231d 100644
--- a/unblob/finder.py
+++ b/unblob/finder.py
@@ -6,13 +6,13 @@
 from functools import lru_cache
 from operator import itemgetter
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Type
 
 import yara
 from structlog import get_logger
 
 from .file_utils import LimitedStartReader
-from .handlers import _ALL_MODULES_BY_PRIORITY
+from .handlers import ALL_HANDLERS_BY_PRIORITY
 from .logging import noformat
 from .models import Handler, ValidChunk, YaraMatchResult
 from .state import exit_code_var
@@ -38,10 +38,11 @@ def search_chunks_by_priority(  # noqa: C901
     """
     all_chunks = []
 
-    for priority_level, handlers in enumerate(_ALL_MODULES_BY_PRIORITY, start=1):
+    for priority_level, handler_classes in enumerate(ALL_HANDLERS_BY_PRIORITY, start=1):
         logger.info("Starting priority level", priority_level=noformat(priority_level))
-        yara_rules = make_yara_rules(tuple(handlers.values()))
-        yara_results = search_yara_patterns(yara_rules, handlers, path)
+        yara_rules = make_yara_rules(handler_classes)
+        handler_map = make_handler_map(handler_classes)
+        yara_results = search_yara_patterns(yara_rules, handler_map, path)
 
         for result in yara_results:
             handler, match = result.handler, result.match
@@ -90,7 +91,7 @@ def search_chunks_by_priority(  # noqa: C901
 
 
 @lru_cache
-def make_yara_rules(handlers: Tuple[Handler, ...]):
+def make_yara_rules(handlers: Tuple[Type[Handler], ...]):
     """Make yara.Rule by concatenating all handlers yara rules and compiling them."""
     all_yara_rules = "\n".join(
         _YARA_RULE_TEMPLATE.format(NAME=h.NAME, YARA_RULE=h.YARA_RULE.strip())
@@ -101,8 +102,13 @@ def make_yara_rules(handlers: Tuple[Handler, ...]):
     return compiled_rules
 
 
+@lru_cache
+def make_handler_map(handler_classes: Tuple[Type[Handler], ...]) -> Dict[str, Handler]:
+    return {h.NAME: h() for h in handler_classes}
+
+
 def search_yara_patterns(
-    yara_rules: yara.Rule, handlers: Dict[str, Handler], full_path: Path
+    yara_rules: yara.Rule, handler_map: Dict[str, Handler], full_path: Path
 ) -> List[YaraMatchResult]:
     """Search with the compiled YARA rules and identify the handler which defined the rule."""
     # YARA uses a memory mapped file internally when given a path
@@ -110,7 +116,7 @@ def search_yara_patterns(
 
     yara_results = []
     for match in yara_matches:
-        handler = handlers[match.rule]
+        handler = handler_map[match.rule]
         yara_res = YaraMatchResult(handler=handler, match=match)
         yara_results.append(yara_res)
 
diff --git a/unblob/handlers/__init__.py b/unblob/handlers/__init__.py
index d3054c0d43..7a52e4af49 100644
--- a/unblob/handlers/__init__.py
+++ b/unblob/handlers/__init__.py
@@ -1,17 +1,12 @@
-from typing import Dict, List, Type
+from typing import List, Tuple, Type
 
 from ..models import Handler
 from .archive import ar, arc, arj, cab, cpio, dmg, rar, sevenzip, tar, zip
 from .compression import lzo
 from .filesystem import cramfs, fat, iso9660, squashfs, ubi
 
-
-def _make_handler_map(*handlers: Type[Handler]) -> Dict[str, Handler]:
-    return {h.NAME: h() for h in handlers}
-
-
-_ALL_MODULES_BY_PRIORITY: List[Dict[str, Handler]] = [
-    _make_handler_map(
+ALL_HANDLERS_BY_PRIORITY: List[Tuple[Type[Handler], ...]] = [
+    (
         cramfs.CramFSHandler,
         fat.FATHandler,
         squashfs.SquashFSv3Handler,
@@ -19,7 +14,7 @@ def _make_handler_map(*handlers: Type[Handler]) -> Dict[str, Handler]:
         ubi.UBIHandler,
         ubi.UBIFSHandler,
     ),
-    _make_handler_map(
+    (
         ar.ARHandler,
         arc.ARCHandler,
         arj.ARJHandler,
@@ -35,7 +30,5 @@ def _make_handler_map(*handlers: Type[Handler]) -> Dict[str, Handler]:
         dmg.DMGHandler,
         iso9660.ISO9660FSHandler,
     ),
-    _make_handler_map(
-        lzo.LZOHandler,
-    ),
+    (lzo.LZOHandler,),
 ]

From 4eb1cb7d84ec3ea2bbe4dc36b849219c4d3b425c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Kiss?= <gyorgy@iot-inspector.com>
Date: Mon, 6 Dec 2021 20:10:00 +0100
Subject: [PATCH 3/5] Don't format count numbers to hex

It's more natural to show those in decimal.
---
 unblob/cli.py        | 4 ++--
 unblob/processing.py | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/unblob/cli.py b/unblob/cli.py
index c9e947113e..c3f3ffbbc9 100644
--- a/unblob/cli.py
+++ b/unblob/cli.py
@@ -6,7 +6,7 @@
 import click
 from structlog import get_logger
 
-from .logging import configure_logger
+from .logging import configure_logger, noformat
 from .processing import DEFAULT_DEPTH, process_file
 from .state import exit_code_var
 
@@ -37,7 +37,7 @@
 @click.option("-v", "--verbose", is_flag=True, help="Verbose mode, enable debug logs.")
 def cli(files: Tuple[Path], extract_root: Path, depth: int, verbose: bool):
     configure_logger(verbose, extract_root)
-    logger.info("Start processing files", count=len(files))
+    logger.info("Start processing files", count=noformat(len(files)))
     for path in files:
         root = path if path.is_dir() else path.parent
         process_file(root, path, extract_root, max_depth=depth)
diff --git a/unblob/processing.py b/unblob/processing.py
index 5a195db471..3f19865aa7 100644
--- a/unblob/processing.py
+++ b/unblob/processing.py
@@ -8,6 +8,7 @@
 from .extractor import carve_unknown_chunks, extract_valid_chunks, make_extract_dir
 from .finder import search_chunks_by_priority
 from .iter_utils import pairwise
+from .logging import noformat
 from .models import UnknownChunk, ValidChunk
 
 logger = get_logger()
@@ -78,8 +79,8 @@ def remove_inner_chunks(chunks: List[ValidChunk]) -> List[ValidChunk]:
     removed_count = len(chunks) - outer_count
     logger.info(
         "Removed inner chunks",
-        outer_chunk_count=outer_count,
-        removed_inner_chunk_count=removed_count,
+        outer_chunk_count=noformat(outer_count),
+        removed_inner_chunk_count=noformat(removed_count),
     )
     return outer_chunks
 

From bea4da286aa03d002b6a20d16653561ef8fb621b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kiss=20Gy=C3=B6rgy?= <gyorgy@iot-inspector.com>
Date: Mon, 6 Dec 2021 19:29:03 +0100
Subject: [PATCH 4/5] Increased coverage to 90%

Wrote new unit tests during refactoring, coverage can be increased.
Put the configuration in pyproject.toml, so everyone can run it with
the same configuration.
---
 .github/workflows/tests.yml | 3 +--
 pyproject.toml              | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6432a4851b..be2d8ff69e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -37,5 +37,4 @@ jobs:
         run: git lfs pull
 
       - name: Run pytest
-        # TODO: Dummy coverage (branch: 0%) is needed for the pass rate. Increase this
-        run: poetry run pytest --cov=. --cov-branch --cov-fail-under=0
+        run: poetry run pytest
diff --git a/pyproject.toml b/pyproject.toml
index 7050975fb0..13d6bc2197 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,9 @@ profile = "black"
 [tool.poetry.scripts]
 unblob = "unblob.cli:main"
 
+[tool.pytest.ini_options]
+addopts = "--cov=. --cov-branch --cov-fail-under=90"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

From 39004cd740e63cb29652bed19d9ab78240ce7e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kiss=20Gy=C3=B6rgy?= <gyorgy@iot-inspector.com>
Date: Tue, 7 Dec 2021 11:50:20 +0100
Subject: [PATCH 5/5] Remove unused ARJ Exception, reword error messages

Reason should be a better phrase.
---
 unblob/handlers/archive/arj.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/unblob/handlers/archive/arj.py b/unblob/handlers/archive/arj.py
index 1b54419f27..1d9fcdd670 100644
--- a/unblob/handlers/archive/arj.py
+++ b/unblob/handlers/archive/arj.py
@@ -13,17 +13,13 @@ class ARJError(Exception):
 
 
 class ARJNullFile(ARJError):
-    """Raised for zero-sized ARJ files."""
+    """Zero-sized ARJ."""
 
 
 class ARJExtendedHeader(ARJError):
     """Main ARJ header contains extended_header, which we don't handle."""
 
 
-class ARJFileLengthError(ARJError):
-    """This is a zero-sized ARJ."""
-
-
 class ARJHandler(StructHandler):
     NAME = "arj"
 
@@ -150,11 +146,11 @@ def calculate_chunk(
             self._read_arj_main_header(file, start_offset)
             end_of_arj = self._read_arj_files(file)
         except ARJError as exc:
-            logger.warning("Invalid ARJ file", message=exc.__doc__)
+            logger.warning("Invalid ARJ file", reason=exc.__doc__)
             return
         except EOFError:
             logger.warning(
-                "Invalid ARJ file", message="File ends before ARJ file resolves."
+                "Invalid ARJ file", reason="File ends before ARJ file resolves."
             )
             return