Skip to content

Commit

Permalink
Merge pull request #191 from IoT-Inspector/181-remove-recursion
Browse files Browse the repository at this point in the history
Rework recursive process_file core calls
  • Loading branch information
kissgyorgy authored Jan 27, 2022
2 parents 725b94e + 5dd6e6a commit b22ac58
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 54 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ addopts = "--cov=unblob --cov=tests --cov-branch --cov-fail-under=90"
[tool.vulture]
paths = ["unblob/"]
exclude = ["unblob/_py/"]
ignore_names = ["breakpointhook"]

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path

import pytest
from pytest_cov.embed import cleanup_on_sigterm

from unblob.handlers import Handler
from unblob.logging import configure_logger
Expand All @@ -10,6 +11,9 @@
def configure_logging():
configure_logger(verbose=True, extract_root=Path(""))

# https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html#if-you-use-multiprocessing-process
cleanup_on_sigterm()


class TestHandler(Handler):
NAME = "test_handler"
Expand Down
61 changes: 48 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from conftest import TestHandler

import unblob.cli
from unblob.processing import DEFAULT_DEPTH
from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM


class ExistingCommandHandler(TestHandler):
Expand Down Expand Up @@ -52,11 +52,31 @@ def test_show_external_dependencies_not_exists(monkeypatch):
(
pytest.param(["--help"], id="alone"),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2", "--help", "tests"],
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
"--help",
"tests",
],
id="eager_1",
),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2", "tests", "--help"],
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
"tests",
"--help",
],
id="eager_2",
),
),
Expand All @@ -78,9 +98,19 @@ def test_help(params):
pytest.param(["--extract-dir", "unblob"], id="extract-dir"),
pytest.param(["-d", "2"], id="d"),
pytest.param(["--depth", "2"], id="depth"),
pytest.param(["-p", "2"], id="p"),
pytest.param(["--process-num", "2"], id="process-num"),
pytest.param(
["--verbose", "--extract-dir", "unblob", "--depth", "2"],
id="verbose+extract-dir+depth",
[
"--verbose",
"--extract-dir",
"unblob",
"--depth",
"2",
"--process-num",
"2",
],
id="verbose+extract-dir+depth+process-num",
),
),
)
Expand Down Expand Up @@ -113,18 +143,23 @@ def test_empty_dir_as_file(tmp_path: Path):


@pytest.mark.parametrize(
"params, expected_depth, expected_entropy_depth, expected_verbosity",
"params, expected_depth, expected_entropy_depth, expected_process_num, expected_verbosity",
(
pytest.param([], DEFAULT_DEPTH, 1, False, id="empty"),
pytest.param(["--verbose"], DEFAULT_DEPTH, 1, True, id="verbose"),
pytest.param(["--depth", "2"], 2, 1, False, id="depth"),
pytest.param(["--verbose", "--depth", "2"], 2, 1, True, id="verbose+depth"),
pytest.param([], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, False, id="empty"),
pytest.param(
["--verbose"], DEFAULT_DEPTH, 1, DEFAULT_PROCESS_NUM, True, id="verbose"
),
pytest.param(["--depth", "2"], 2, 1, DEFAULT_PROCESS_NUM, False, id="depth"),
pytest.param(
["--process-num", "2"], DEFAULT_DEPTH, 1, 2, False, id="process-num"
),
),
)
def test_archive_success(
params,
expected_depth: int,
expected_entropy_depth: int,
expected_process_num: int,
expected_verbosity: bool,
tmp_path: Path,
):
Expand All @@ -148,12 +183,12 @@ def test_archive_success(
assert "error" not in result.output
assert "warning" not in result.output
process_file_mock.assert_called_once_with(
in_path,
in_path,
tmp_path,
max_depth=expected_depth,
entropy_depth=expected_entropy_depth,
verbose=expected_verbosity,
process_num=expected_process_num,
)
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)

Expand Down Expand Up @@ -186,19 +221,19 @@ def test_archive_multiple_files(tmp_path: Path):
assert process_file_mock.call_count == 2
assert process_file_mock.call_args_list == [
mock.call(
in_path_1,
in_path_1,
tmp_path,
max_depth=DEFAULT_DEPTH,
entropy_depth=1,
verbose=False,
process_num=DEFAULT_PROCESS_NUM,
),
mock.call(
in_path_2,
in_path_2,
tmp_path,
max_depth=DEFAULT_DEPTH,
entropy_depth=1,
verbose=False,
process_num=DEFAULT_PROCESS_NUM,
),
]
4 changes: 1 addition & 3 deletions tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from unblob import handlers
from unblob.models import Handler
from unblob.processing import DEFAULT_DEPTH, process_file
from unblob.processing import process_file

TEST_DATA_PATH = Path(__file__).parent / "integration"
TEST_INPUT_DIRS = list(TEST_DATA_PATH.glob("**/__input__"))
Expand All @@ -40,10 +40,8 @@ def test_all_handlers(input_dir: Path, output_dir: Path, tmp_path: Path):
), f"Integration test input dir should contain at least 1 file: {input_dir}"

process_file(
root=input_dir,
path=input_dir,
extract_root=tmp_path,
max_depth=DEFAULT_DEPTH,
entropy_depth=0,
)

Expand Down
15 changes: 12 additions & 3 deletions unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .dependencies import get_dependencies, pretty_format_dependencies
from .handlers import ALL_HANDLERS
from .logging import configure_logger, noformat
from .processing import DEFAULT_DEPTH, process_file
from .processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, process_file
from .state import exit_code_var

logger = get_logger()
Expand Down Expand Up @@ -76,6 +76,15 @@ def get_help_text():
"1 means input files only, 0 turns it off."
),
)
@click.option(
"-p",
"--process-num",
"process_num",
type=click.IntRange(1),
default=DEFAULT_PROCESS_NUM,
help="Number of worker processes to process files parallelly.",
show_default=True,
)
@click.option("-v", "--verbose", is_flag=True, help="Verbose mode, enable debug logs.")
@click.option(
"--show-external-dependencies",
Expand All @@ -90,19 +99,19 @@ def cli(
extract_root: Path,
depth: int,
entropy_depth: int,
process_num: int,
verbose: bool,
):
configure_logger(verbose, extract_root)
logger.info("Start processing files", count=noformat(len(files)))
for path in files:
root = path if path.is_dir() else path.parent
process_file(
root,
path,
extract_root,
max_depth=depth,
entropy_depth=entropy_depth,
verbose=verbose,
process_num=process_num,
)


Expand Down
26 changes: 26 additions & 0 deletions unblob/logging.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import logging
import pdb
import sys
from os import getpid
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -52,6 +55,13 @@ def convert_type(_logger, _method_name: str, event_dict: structlog.types.EventDi
return convert_type


def add_pid_to_log_message(
_logger, _method_name: str, event_dict: structlog.types.EventDict
):
event_dict["pid"] = getpid()
return event_dict


def configure_logger(verbose: bool, extract_root: Path):
log_level = logging.DEBUG if verbose else logging.INFO
processors = [
Expand All @@ -60,6 +70,7 @@ def configure_logger(verbose: bool, extract_root: Path):
key="timestamp", fmt="%Y-%m-%d %H:%M.%S", utc=True
),
pretty_print_types(extract_root),
add_pid_to_log_message,
structlog.processors.UnicodeDecoder(),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
Expand All @@ -70,3 +81,18 @@ def configure_logger(verbose: bool, extract_root: Path):
wrapper_class=structlog.make_filtering_bound_logger(log_level),
processors=processors,
)


class _MultiprocessingPdb(pdb.Pdb):
def interaction(self, *args, **kwargs):
_stdin = sys.stdin
try:
sys.stdin = open("/dev/stdin")
pdb.Pdb.interaction(self, *args, **kwargs)
finally:
sys.stdin = _stdin


def multiprocessing_breakpoint():
"""Call this in Process forks instead of the builtin `breakpoint` function for debugging with PDB."""
return _MultiprocessingPdb().set_trace(frame=sys._getframe(1))
16 changes: 16 additions & 0 deletions unblob/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import abc
import io
from pathlib import Path
from typing import List, Optional

import attr
Expand All @@ -16,6 +17,21 @@
#


@attr.define
class Task:
root: Path
path: Path
depth: int


@attr.define
class ProcessingConfig:
extract_root: Path
max_depth: int
entropy_depth: int
verbose: bool


@attr.define
class YaraMatchResult:
"""Results of a YARA match grouped by file types (handlers).
Expand Down
Loading

0 comments on commit b22ac58

Please sign in to comment.