Skip to content

Commit

Permalink
Fix handling + showing the default max depth
Browse files Browse the repository at this point in the history
We designed max_depth to be optional in the CLI, having a default value
hardcoded in unblob. It should work the same way when directly calling
the `process_file` function.

Also add showing the default value to the CLI.
  • Loading branch information
kukovecz committed Jan 27, 2022
1 parent 633731c commit 5fb4f0d
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 16 deletions.
58 changes: 48 additions & 10 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from conftest import TestHandler

import unblob.cli
from unblob.processing import DEFAULT_DEPTH
from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM


class ExistingCommandHandler(TestHandler):
Expand Down Expand Up @@ -52,11 +52,31 @@ def test_show_external_dependencies_not_exists(monkeypatch):
(
pytest.param(["--help"], id="alone"),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2", "--help", "tests"],
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
"--help",
"tests",
],
id="eager_1",
),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2", "tests", "--help"],
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
"tests",
"--help",
],
id="eager_2",
),
),
Expand All @@ -78,9 +98,19 @@ def test_help(params):
pytest.param(["--extract-dir", "unblob"], id="extract-dir"),
pytest.param(["-d", "2"], id="d"),
pytest.param(["--depth", "2"], id="depth"),
pytest.param(["-p", "2"], id="p"),
pytest.param(["--process-num", "2"], id="process-num"),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2"],
id="verbose+extract-dir+depth",
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
],
id="verbose+extract-dir+depth+process-num",
),
),
)
Expand Down Expand Up @@ -113,18 +143,23 @@ def test_empty_dir_as_file(tmp_path: Path):


@pytest.mark.parametrize(
"params, expected_depth, expected_entropy_depth, expected_verbosity",
"params, expected_depth, expected_entropy_depth, expected_process_num, expected_verbosity",
(
pytest.param([], DEFAULT_DEPTH, 1, False, id="empty"),
pytest.param(["--verbose"], DEFAULT_DEPTH, 1, True, id="verbose"),
pytest.param(["--depth", "2"], 2, 1, False, id="depth"),
pytest.param(["--verbose", "--depth", "2"], 2, 1, True, id="verbose+depth"),
pytest.param([], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, False, id="empty"),
pytest.param(
["--verbose"], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, True, id="verbose"
),
pytest.param(["--depth", "2"], 2, 1, DEFAULT_PROCESS_NUM, False, id="depth"),
pytest.param(
["--process-num", "2"], DEFAULT_DEPTH, 1, 2, False, id="process-num"
),
),
)
def test_archive_success(
params,
expected_depth: int,
expected_entropy_depth: int,
expected_process_num: int,
expected_verbosity: bool,
tmp_path: Path,
):
Expand Down Expand Up @@ -153,6 +188,7 @@ def test_archive_success(
max_depth=expected_depth,
entropy_depth=expected_entropy_depth,
verbose=expected_verbosity,
process_num=expected_process_num,
)
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)

Expand Down Expand Up @@ -190,12 +226,14 @@ def test_archive_multiple_files(tmp_path: Path):
max_depth=DEFAULT_DEPTH,
entropy_depth=1,
verbose=False,
process_num=DEFAULT_PROCESS_NUM,
),
mock.call(
in_path_2,
tmp_path,
max_depth=DEFAULT_DEPTH,
entropy_depth=1,
verbose=False,
process_num=DEFAULT_PROCESS_NUM,
),
]
3 changes: 1 addition & 2 deletions tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from unblob import handlers
from unblob.models import Handler
from unblob.processing import DEFAULT_DEPTH, process_file
from unblob.processing import process_file

TEST_DATA_PATH = Path(__file__).parent / "integration"
TEST_INPUT_DIRS = list(TEST_DATA_PATH.glob("**/__input__"))
Expand All @@ -42,7 +42,6 @@ def test_all_handlers(input_dir: Path, output_dir: Path, tmp_path: Path):
process_file(
path=input_dir,
extract_root=tmp_path,
max_depth=DEFAULT_DEPTH,
entropy_depth=0,
)

Expand Down
13 changes: 12 additions & 1 deletion unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .dependencies import get_dependencies, pretty_format_dependencies
from .handlers import ALL_HANDLERS
from .logging import configure_logger, noformat
from .processing import DEFAULT_DEPTH, process_file
from .processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, process_file
from .state import exit_code_var

logger = get_logger()
Expand Down Expand Up @@ -76,6 +76,15 @@ def get_help_text():
"1 means input files only, 0 turns it off."
),
)
@click.option(
"-p",
"--process-num",
"process_num",
type=click.IntRange(1),
default=DEFAULT_PROCESS_NUM,
help="Number of worker processes to process files parallelly.",
show_default=True,
)
@click.option("-v", "--verbose", is_flag=True, help="Verbose mode, enable debug logs.")
@click.option(
"--show-external-dependencies",
Expand All @@ -90,6 +99,7 @@ def cli(
extract_root: Path,
depth: int,
entropy_depth: int,
process_num: int,
verbose: bool,
):
configure_logger(verbose, extract_root)
Expand All @@ -101,6 +111,7 @@ def cli(
max_depth=depth,
entropy_depth=entropy_depth,
verbose=verbose,
process_num=process_num,
)


Expand Down
13 changes: 10 additions & 3 deletions unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import statistics
from operator import attrgetter
from pathlib import Path
from typing import List
from typing import List, Optional

import plotext as plt
from structlog import get_logger
Expand All @@ -19,16 +19,18 @@
logger = get_logger()

DEFAULT_DEPTH = 10
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()


# TODO: this function became too complex when adding entropy calculation, but
# it will be simplified in a separate branch, because the refactor is very complex
def process_file( # noqa: C901
path: Path,
extract_root: Path,
max_depth: int,
entropy_depth: int,
verbose: bool = False,
max_depth: Optional[int] = DEFAULT_DEPTH,
process_num: Optional[int] = DEFAULT_PROCESS_NUM,
):

root = path if path.is_dir() else path.parent
Expand All @@ -41,14 +43,19 @@ def process_file( # noqa: C901
)
)

if max_depth is None:
max_depth = DEFAULT_DEPTH

if process_num is None:
process_num = DEFAULT_PROCESS_NUM

config = ProcessingConfig(
extract_root=extract_root,
max_depth=max_depth,
entropy_depth=entropy_depth,
verbose=verbose,
)

process_num = multiprocessing.cpu_count()
worker_processes = [
multiprocessing.Process(
target=_process_task_queue,
Expand Down

0 comments on commit 5fb4f0d

Please sign in to comment.