From 5fb4f0d6d2a402234535f880d1eb9f227dbad4b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Kukovecz?= Date: Fri, 21 Jan 2022 11:52:24 +0100 Subject: [PATCH] Fix handling + showing the default max depth We designed max_depth to be optional in the CLI, having a default value hardcoded in unblob. It should work the same way when directly calling the `process_file` function. Also add showing the default value to the CLI. --- tests/test_cli.py | 58 ++++++++++++++++++++++++++++++++++-------- tests/test_handlers.py | 3 +-- unblob/cli.py | 13 +++++++++- unblob/processing.py | 13 +++++++--- 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index a78ef352c4..ff2121fdf7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,7 +7,7 @@ from conftest import TestHandler import unblob.cli -from unblob.processing import DEFAULT_DEPTH +from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM class ExistingCommandHandler(TestHandler): @@ -52,11 +52,31 @@ def test_show_external_dependencies_not_exists(monkeypatch): ( pytest.param(["--help"], id="alone"), pytest.param( - ["--verbose", "--extract-dir", "unblob", "--depth", "2", "--help", "tests"], + [ + "--verbose", + "--extract-dir", + "unblob", + "--depth", + "2", + "--process-num", + "2", + "--help", + "tests", + ], id="eager_1", ), pytest.param( - ["--verbose", "--extract-dir", "unblob", "--depth", "2", "tests", "--help"], + [ + "--verbose", + "--extract-dir", + "unblob", + "--depth", + "2", + "--process-num", + "2", + "tests", + "--help", + ], id="eager_2", ), ), @@ -78,9 +98,19 @@ def test_help(params): pytest.param(["--extract-dir", "unblob"], id="extract-dir"), pytest.param(["-d", "2"], id="d"), pytest.param(["--depth", "2"], id="depth"), + pytest.param(["-p", "2"], id="p"), + pytest.param(["--process-num", "2"], id="process-num"), pytest.param( - ["--verbose", "--extract-dir", "unblob", "--depth", "2"], - id="verbose+extract-dir+depth", + [ + "--verbose", + "--extract-dir", + "unblob", + "--depth", + "2", + "--process-num", + "2", + ], + id="verbose+extract-dir+depth+process-num", ), ), ) @@ -113,18 +143,23 @@ def test_empty_dir_as_file(tmp_path: Path): @pytest.mark.parametrize( - "params, expected_depth, expected_entropy_depth, expected_verbosity", + "params, expected_depth, expected_entropy_depth, expected_process_num, expected_verbosity", ( - pytest.param([], DEFAULT_DEPTH, 1, False, id="empty"), - pytest.param(["--verbose"], DEFAULT_DEPTH, 1, True, id="verbose"), - pytest.param(["--depth", "2"], 2, 1, False, id="depth"), - pytest.param(["--verbose", "--depth", "2"], 2, 1, True, id="verbose+depth"), + pytest.param([], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, False, id="empty"), + pytest.param( + ["--verbose"], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, True, id="verbose" + ), + pytest.param(["--depth", "2"], 2, 1, DEFAULT_PROCESS_NUM, False, id="depth"), + pytest.param( + ["--process-num", "2"], DEFAULT_DEPTH, 1, 2, False, id="process-num" + ), ), ) def test_archive_success( params, expected_depth: int, expected_entropy_depth: int, + expected_process_num: int, expected_verbosity: bool, tmp_path: Path, ): @@ -153,6 +188,7 @@ def test_archive_success( max_depth=expected_depth, entropy_depth=expected_entropy_depth, verbose=expected_verbosity, + process_num=expected_process_num, ) logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path) @@ -190,6 +226,7 @@ def test_archive_multiple_files(tmp_path: Path): max_depth=DEFAULT_DEPTH, entropy_depth=1, verbose=False, + process_num=DEFAULT_PROCESS_NUM, ), mock.call( in_path_2, @@ -197,5 +234,6 @@ def test_archive_multiple_files(tmp_path: Path): max_depth=DEFAULT_DEPTH, entropy_depth=1, verbose=False, + process_num=DEFAULT_PROCESS_NUM, ), ] diff --git a/tests/test_handlers.py b/tests/test_handlers.py index cb8fc70e4e..7125df28b0 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -17,7 +17,7 @@ from unblob import handlers from unblob.models import Handler -from unblob.processing import DEFAULT_DEPTH, process_file +from unblob.processing import process_file TEST_DATA_PATH = Path(__file__).parent / "integration" TEST_INPUT_DIRS = list(TEST_DATA_PATH.glob("**/__input__")) @@ -42,7 +42,6 @@ def test_all_handlers(input_dir: Path, output_dir: Path, tmp_path: Path): process_file( path=input_dir, extract_root=tmp_path, - max_depth=DEFAULT_DEPTH, entropy_depth=0, ) diff --git a/unblob/cli.py b/unblob/cli.py index eb0f30fe50..4edf0d8318 100644 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -9,7 +9,7 @@ from .dependencies import get_dependencies, pretty_format_dependencies from .handlers import ALL_HANDLERS from .logging import configure_logger, noformat -from .processing import DEFAULT_DEPTH, process_file +from .processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, process_file from .state import exit_code_var logger = get_logger() @@ -76,6 +76,15 @@ def get_help_text(): "1 means input files only, 0 turns it off." ), ) +@click.option( + "-p", + "--process-num", + "process_num", + type=click.IntRange(1), + default=DEFAULT_PROCESS_NUM, + help="Number of worker processes to process files parallelly.", + show_default=True, +) @click.option("-v", "--verbose", is_flag=True, help="Verbose mode, enable debug logs.") @click.option( "--show-external-dependencies", @@ -90,6 +99,7 @@ def cli( extract_root: Path, depth: int, entropy_depth: int, + process_num: int, verbose: bool, ): configure_logger(verbose, extract_root) @@ -101,6 +111,7 @@ def cli( max_depth=depth, entropy_depth=entropy_depth, verbose=verbose, + process_num=process_num, ) diff --git a/unblob/processing.py b/unblob/processing.py index 4f537f6b28..b16691ea09 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -3,7 +3,7 @@ import statistics from operator import attrgetter from pathlib import Path -from typing import List +from typing import List, Optional import plotext as plt from structlog import get_logger @@ -19,6 +19,7 @@ logger = get_logger() DEFAULT_DEPTH = 10 +DEFAULT_PROCESS_NUM = multiprocessing.cpu_count() # TODO: this function became too complex when adding entropy calculation, but @@ -26,9 +27,10 @@ def process_file( # noqa: C901 path: Path, extract_root: Path, - max_depth: int, entropy_depth: int, verbose: bool = False, + max_depth: Optional[int] = DEFAULT_DEPTH, + process_num: Optional[int] = DEFAULT_PROCESS_NUM, ): root = path if path.is_dir() else path.parent @@ -41,6 +43,12 @@ def process_file( # noqa: C901 ) ) + if max_depth is None: + max_depth = DEFAULT_DEPTH + + if process_num is None: + process_num = DEFAULT_PROCESS_NUM + config = ProcessingConfig( extract_root=extract_root, max_depth=max_depth, @@ -48,7 +56,6 @@ def process_file( # noqa: C901 verbose=verbose, ) - process_num = multiprocessing.cpu_count() worker_processes = [ multiprocessing.Process( target=_process_task_queue,