diff --git a/src/verifier/core/reporting.py b/src/verifier/core/reporting.py index 02159de..89287cf 100644 --- a/src/verifier/core/reporting.py +++ b/src/verifier/core/reporting.py @@ -1,5 +1,8 @@ import json +import logging import os +from pathlib import Path +import re import tempfile import zipfile from collections import namedtuple @@ -8,6 +11,7 @@ from hashlib import sha256 import falcon +from keri import help from hio.base import doing from keri import kering from keri.core import coring, Siger, MtrDex @@ -15,12 +19,25 @@ from verifier.core.basing import ReportStats from verifier.core.utils import DigerBuilder +# help.ogler.level = logging.getLevelName("DEBUG") +# logger = help.ogler.getLogger() +logger = help.ogler.getLogger("ReportVerifier", level=logging.DEBUG) + # Report Statuses. Reportage = namedtuple("Reportage", "accepted verified failed") # Referencable report status enumeration ReportStatus = Reportage(accepted="accepted", verified="verified", failed="failed") +AID = "aid" +DIGEST = "digest" +DOC_INFO = "documentInfo" +FILE = "file" +META_INF_DIR = "META-INF" +REPORTS_JSON = "reports.json" +SIGNATURES = "signatures" +SIGS = "sigs" + def setup(app, hby, vdb): """ Set up module endpoints and dependencies @@ -70,6 +87,7 @@ def __init__(self, vdb): vdb (VerifierBaser): verification database environment """ self.vdb = vdb + logger.info("Report status filer initialized") def create(self, aid, dig, filename, typ, stream): """ Create a new file upload with initial Accepted status. @@ -94,7 +112,6 @@ def create(self, aid, dig, filename, typ, stream): ) idx = 0 - diger = DigerBuilder.sha256(dig) report = b'' while True: @@ -115,37 +132,27 @@ def create(self, aid, dig, filename, typ, stream): tf.seek(0) with tempfile.TemporaryDirectory() as tempdirname: z = zipfile.ZipFile(tf) - z.extractall(path=tempdirname) - manifest = None - for root, dirs, _ in os.walk(tempdirname): - if "META-INF" not in dirs or 'reports' not in dirs: - continue - metaDir = os.path.join(root, 'META-INF') - name = os.path.join(root, 'META-INF', 'reports.json') - if not os.path.exists(name): - continue - f = open(name, 'r') - manifest = json.load(f) - if "documentInfo" not in manifest: - raise kering.ValidationError("Invalid manifest file in report package, missing " - "'documentInfo") - if manifest is None: - raise kering.ValidationError("No manifest in file, invalid signed report package") - - docInfo = manifest["documentInfo"] - if "signatures" not in docInfo: - raise kering.ValidationError("No signatures found in manifest file") - - signatures = docInfo["signatures"] + signatures, metaDir = FileProcessor.getSignaturesFromZip(zipFile=z, extractDir=tempdirname) for signature in signatures: try: - fullpath = os.path.normpath(os.path.join(metaDir, signature["file"])) - f = open(fullpath, 'rb') + # Use the new function to find the file + fullPath = FileProcessor.find_file_in_dir(metaDir, signature[FILE]) + if(not fullPath): + fullPath = FileProcessor.find_file_in_zip_files(tempdirname, signature[FILE]) + + if not fullPath: + raise kering.ValidationError(f"Didn't find {signature[FILE]} above {metaDir} or in zips") + + f = open(fullPath, 'rb') file_object = f.read() f.close() - tmp_diger = DigerBuilder.sha256(signature["digest"]) + + dig = signature[DIGEST] + + tmp_diger = DigerBuilder.sha256(dig) if not tmp_diger.verify(file_object): - raise kering.ValidationError(f"Invalid digest for file {fullpath}") + raise kering.ValidationError(f"Invalid digest for file {fullPath}") + logger.info(f"File {fullPath} w/ digest {dig} has valid digest") except KeyError as e: raise kering.ValidationError(f"Invalid digest, manifest digest missing '{e.args[0]}'") except OSError: @@ -374,71 +381,59 @@ def recur(self, tyme): for diger in self.filer.getAcceptedIter(): try: stats = self.vdb.stats.get(keys=(diger.qb64,)) - print(f"Processing {stats.filename}:\n " + logger.info(f"Processing {stats.filename}:\n " f"\tType={stats.contentType}\n" f"\tSize={stats.size}") + with tempfile.TemporaryFile("w+b") as tf: - for chunk in self.filer.getData(diger.qb64): tf.write(chunk) tf.seek(0) - with tempfile.TemporaryDirectory() as tempdirname: z = zipfile.ZipFile(tf) - z.extractall(path=tempdirname) + signatures, metaDir = FileProcessor.getSignaturesFromZip(zipFile=z, extractDir=tempdirname) + files = [] - manifest = None - for root, dirs, _ in os.walk(tempdirname): - if "META-INF" not in dirs or 'reports' not in dirs: - continue - - metaDir = os.path.join(root, 'META-INF') - name = os.path.join(root, 'META-INF', 'reports.json') - if not os.path.exists(name): - continue - - f = open(name, 'r') - manifest = json.load(f) - if "documentInfo" not in manifest: - raise kering.ValidationError("Invalid manifest file in report package, missing " - "'documentInfo") - reportsDir = os.path.join(root, 'reports') - files = os.listdir(reportsDir) - - if manifest is None: - raise kering.ValidationError("No manifest in file, invalid signed report package") - - docInfo = manifest["documentInfo"] - - if "signatures" not in docInfo: - raise kering.ValidationError("No signatures found in manifest file") - - signatures = docInfo["signatures"] + reports_dir = FileProcessor.find_reports_directory(tempdirname) + if reports_dir: + files = FileProcessor.list_files_in_directory(reports_dir) + logger.info(f"Files in reports directory: {files}") + else: + logger.info("No reports directory found.") + raise kering.ValidationError("No reports directory found during signature processing") + signed = [] verfed = [] + for signature in signatures: + logger.info(f"processing signature {signature}") try: - aid = signature["aid"] + aid = signature[AID] # First check to ensure signature is from submitter, otherwise skip if aid != stats.submitter: - print(f"signature from {aid} does not match submitter {stats.submitter}") + logger.info(f"signature from {aid} does not match submitter {stats.submitter}") # Now ensure we know who this AID is and that we have their key state if aid not in self.hby.kevers: raise kering.ValidationError(f"signature from unknown AID {aid}") - dig = signature["digest"] + dig = signature[DIGEST] non_prefixed_dig = DigerBuilder.get_non_prefixed_digest(dig) - file_name = signature["file"] - fullpath = os.path.normpath(os.path.join(metaDir, file_name)) - signed.append(os.path.basename(fullpath)) + file_name = signature[FILE] + fullPath = FileProcessor.find_file_in_dir(metaDir, file_name) + if not fullPath: + fullPath = FileProcessor.find_file_in_zip_files(tempdirname, signature[FILE]) + if not fullPath: + raise kering.ValidationError(f"Didn't find {signature[FILE]} above {metaDir} or in zips") + + signed.append(os.path.basename(fullPath)) kever = self.hby.kevers[aid] - sigers = [Siger(qb64=sig) for sig in signature["sigs"]] + sigers = [Siger(qb64=sig) for sig in signature[SIGS]] if len(sigers) == 0: raise kering.ValidationError(f"missing signatures on {file_name}") @@ -447,7 +442,7 @@ def recur(self, tyme): if not siger.verfer.verify(siger.raw, bytes(non_prefixed_dig, "utf-8")): # verify each sig raise kering.ValidationError(f"signature {siger.index} invalid for {file_name}") - verfed.append(os.path.basename(fullpath)) + verfed.append(os.path.basename(fullPath)) except KeyError as e: raise kering.ValidationError(f"Invalid signature in manifest missing '{e.args[0]}'") @@ -461,15 +456,201 @@ def recur(self, tyme): diff = set(files) - set(verfed) if len(diff) == 0: msg = f"All {len(files)} files in report package have been signed by " \ - f"submitter ({stats.submitter})." + f"submitter ({stats.submitter})." self.filer.update(diger, ReportStatus.verified, msg) - print(msg) + logger.info(msg) else: - msg = f"{len(diff)} files from report package missing valid signed {diff}, {signed}" + msg = f"{len(diff)} files from report package missing valid signature {diff}" self.filer.update(diger, ReportStatus.failed, msg) - print(msg) + logger.info(msg) except (kering.ValidationError, zipfile.BadZipFile) as e: self.filer.update(diger, ReportStatus.failed, e.args[0]) - print(e.args[0]) + logger.info(e.args[0]) + +class FileProcessor: + + @staticmethod + def find_reports_directory(start_dir): + """ + Recursively find the 'reports' directory starting from start_dir. + + Parameters: + start_dir (str): The directory to start the search from. + + Returns: + str: The path to the 'reports' directory if found, else None. + """ + for root, dirs, files in os.walk(start_dir): + if 'reports' in dirs: + return os.path.join(root, 'reports') + + # If not found, search within zip files in start_dir + for root, dirs, files in os.walk(start_dir): + for file in files: + if file.endswith('.zip'): + zip_path = os.path.join(root, file) + with zipfile.ZipFile(zip_path, 'r') as zip_file: + for zip_info in zip_file.infolist(): + if zip_info.is_dir() and Path(zip_info.filename).name == 'reports': + zip_file.extractall(root) + return FileProcessor.find_reports_directory(root) + return None + + @staticmethod + def find_file_in_zip_files(zipsDir, file_name): + """ + Check if the file exists inside a zip file in metaDir. + If found inside a zip file, extract it to metaDir. + + Parameters: + zipsDir (str): The directory to search for the file. + file_name (str): The name of the file to search for. + + Returns: + str: The full path to the file if found. + + Raises: + kering.ValidationError: If the file is not found in metaDir or any zip files. + """ + logger.info(f"Finding file {file_name} in zip files...") + + # Extract the base file name and directory from the file_name + base_file_name = os.path.basename(file_name) + file_dir = Path(file_name).parent.name + + # Create a regular expression pattern to match the target file path + target_pattern = re.compile(rf'(.*/)?{re.escape(file_dir)}/?{re.escape(base_file_name)}') + + zip_files = [f for f in os.listdir(zipsDir) if f.endswith('.zip')] + file_found = False + for zip_file in zip_files: + with zipfile.ZipFile(os.path.join(zipsDir, zip_file), 'r') as z: + zip_contents = z.namelist() + for zip_content in zip_contents: + if target_pattern.match(zip_content): + z.extract(zip_content, zipsDir) + repPath = os.path.join(zipsDir, zip_content) + if os.path.exists(repPath): + logger.info(f"File {file_name} found in zip, extracted to {repPath}") + file_found = True + return repPath + + if not file_found: + raise kering.ValidationError(f"File {file_name} not found in any zip files") + + return None + + @staticmethod + def find_file_in_dir(dir, file_name): + """ + Check if the file exists directly in metaDir or inside a zip file in metaDir. + If found inside a zip file, extract it to metaDir. + + Parameters: + metaDir (str): The directory to search for the file. + file_name (str): The name of the file to search for. + + Returns: + str: The full path to the file if found. + + Raises: + kering.ValidationError: If the file is not found in metaDir or any zip files. + """ + fullPath = os.path.normpath(os.path.join(dir, file_name)) + + # Check if the file exists directly in metaDir + if os.path.exists(fullPath): + logger.info(f"File {fullPath} found in {dir}") + return fullPath + else: + logger.info(f"File {fullPath} not found in {dir}") + return None + + @staticmethod + def list_files_in_zip_excluding_report_json(zip_file_path): + """ + List all files in a zip file excluding 'report.json' files. + + Parameters: + zip_file_path (str): The path to the zip file. + + Returns: + list: A list of file names in the zip file excluding 'report.json' files. + """ + if not os.path.exists(zip_file_path): + raise FileNotFoundError(f"The zip file {zip_file_path} does not exist.") + + with zipfile.ZipFile(zip_file_path, 'r') as zip_file: + all_files = zip_file.namelist() + filtered_files = [file for file in all_files if os.path.basename(file) != 'report.json'] + + return filtered_files + + @staticmethod + def list_files_excluding_report_json(directory_path): + """ + List all files in a directory excluding 'report.json' files. + + Parameters: + directory_path (str): The path to the directory. + + Returns: + list: A list of file names in the directory excluding 'report.json' files. + """ + if not os.path.isdir(directory_path): + raise NotADirectoryError(f"The path {directory_path} is not a directory.") + + all_files = os.listdir(directory_path) + filtered_files = [file for file in all_files if file != 'report.json'] + + return filtered_files + + @staticmethod + def list_files_in_directory(directory_path): + """ + List all files in a directory excluding 'report.json' files. + + Parameters: + directory_path (str): The path to the directory. + + Returns: + list: A list of file names in the directory excluding 'report.json' files. + """ + if not os.path.isdir(directory_path): + raise NotADirectoryError(f"The path {directory_path} is not a directory.") + + all_files = os.listdir(directory_path) + filtered_files = [file for file in all_files if file != 'report.json'] + + return filtered_files + + @staticmethod + def getSignaturesFromZip(zipFile: zipfile.ZipFile, extractDir): + + zipFile.extractall(path=extractDir) + manifest = None + metaDir = None + for root, dirs, _ in os.walk(extractDir): + if "META-INF" not in dirs: + continue + metaDir = os.path.join(root, META_INF_DIR) + name = os.path.join(root, META_INF_DIR, REPORTS_JSON) + if not os.path.exists(name): + continue + f = open(name, 'r') + manifest = json.load(f) + if DOC_INFO not in manifest: + raise kering.ValidationError("Invalid manifest file in report package, missing " + f"{DOC_INFO}") + if manifest is None: + raise kering.ValidationError("No manifest in file, invalid signed report package") + + docInfo = manifest[DOC_INFO] + if SIGNATURES not in docInfo: + raise kering.ValidationError("No signatures found in manifest file") + + signatures = docInfo[SIGNATURES] + + return signatures, metaDir diff --git a/src/verifier/core/utils.py b/src/verifier/core/utils.py index b505e75..1922b53 100644 --- a/src/verifier/core/utils.py +++ b/src/verifier/core/utils.py @@ -16,7 +16,7 @@ def sha256(dig): @staticmethod def get_non_prefixed_digest(dig): try: - prefix, digest = dig.split("_", 1) + prefix, digest = dig.split("-", 1) except ValueError: raise kering.ValidationError(f"Digest ({dig}) must start with prefix") return digest diff --git a/tests/core/test_file_processor.py b/tests/core/test_file_processor.py new file mode 100644 index 0000000..82b1840 --- /dev/null +++ b/tests/core/test_file_processor.py @@ -0,0 +1,77 @@ +import unittest +import zipfile +import json +import os +import tempfile + +from verifier.core.reporting import DOC_INFO, FileProcessor + +class TestFileProcessor(unittest.TestCase): + def setUp(self): + # Create a temporary directory for the test + self.temp_dir = tempfile.TemporaryDirectory() + self.zip1_name = os.path.join(self.temp_dir.name, "test_reports.zip") + self.create_test_zip(self.zip1_name) + + def tearDown(self): + # Clean up the temporary directory + self.temp_dir.cleanup() + + def create_test_zip(self, zip1_name): + # Create a temporary directory structure + with tempfile.TemporaryDirectory() as temp_dir: + # Create META-INF directory + meta_inf_dir = os.path.join(temp_dir, "META-INF") + os.makedirs(meta_inf_dir) + + # Create reports.json file with some JSON content + meta_reports_json_path = os.path.join(meta_inf_dir, "reports.json") + with open(meta_reports_json_path, "w") as f: + json.dump({DOC_INFO: {"signatures": []}}, f) + + # Create reports directory + reports_dir = os.path.join(temp_dir, "reports") + os.makedirs(reports_dir) + + reports_json_path = os.path.join(reports_dir, "report.json") + with open(reports_json_path, "w") as f: + json.dump({"key": "value"}, f) + + # Create several files in the reports directory + for i in range(3): + file_path = os.path.join(reports_dir, f"report_{i}.txt") + with open(file_path, "w") as f: + f.write(f"This is report {i}") + + # Create a zip file and add the directories and files + with zipfile.ZipFile(zip1_name, "w") as zipf: + for root, dirs, files in os.walk(temp_dir): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, temp_dir) + zipf.write(file_path, arcname) + + def test_file_processor(self): + with zipfile.ZipFile(self.zip1_name, "r") as z: + # Use the zip file in your tests + with tempfile.TemporaryDirectory() as temp_dir: + z.extractall(temp_dir) + + metaDir = os.path.join(temp_dir, "META-INF") + repFromMeta = FileProcessor.find_file_in_dir(metaDir, "reports.json") + assert repFromMeta == os.path.join(metaDir, "reports.json") + repFromZip = FileProcessor.find_file_in_zip_files(os.path.dirname(self.zip1_name), "../reports/report.json") + assert repFromZip == os.path.join(os.path.dirname(self.zip1_name), "reports", "report.json") + repDir = FileProcessor.find_reports_directory(temp_dir) + assert repDir == os.path.join(temp_dir, "reports") + signatures, metaDir = FileProcessor.getSignaturesFromZip(zipFile=z, extractDir=temp_dir) + assert len(signatures) == 0 + filesNotReportJson = FileProcessor.list_files_excluding_report_json(os.path.join(temp_dir, "reports")) + assert set(filesNotReportJson) == {'report_2.txt', 'report_1.txt', 'report_0.txt'} + filesInDir = FileProcessor.list_files_in_directory(temp_dir) + assert set(filesInDir) == {os.path.basename(metaDir),os.path.basename(repDir)} + fileInZip = FileProcessor.list_files_in_zip_excluding_report_json(self.zip1_name) + assert set(fileInZip) == {'META-INF/reports.json', 'reports/report_2.txt', 'reports/report_1.txt', 'reports/report_0.txt'} + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/core/test_reporting.py b/tests/core/test_reporting.py index bb2c6cc..8ac4bae 100644 --- a/tests/core/test_reporting.py +++ b/tests/core/test_reporting.py @@ -1,15 +1,20 @@ +import json +import os +import tempfile +import zipfile import pytest from hashlib import sha256 from keri import kering +from src.verifier.core.reporting import FileProcessor from src.verifier.core.utils import DigerBuilder def test_diger_builder(): BASE_STR = "fefUBIUhdo9032bfHf0UNONF0kubni9HnF22L0KD2".encode() dig = sha256(BASE_STR).hexdigest() - dig = f"sha256_{dig}" + dig = f"sha256-{dig}" diger = DigerBuilder.sha256(dig) assert diger.verify(BASE_STR) is True @@ -18,7 +23,7 @@ def test_diger_builder_fail(): BASE_STR = "fefUBIUhdo9032bfHf0UNONF0kubni9HnF22L0KD2".encode() WRONG_BASE_STR = "fefUBIUhdo9032bfHf0UNONF0kubni9HnF22L0KDT".encode() dig = sha256(BASE_STR).hexdigest() - dig = f"sha256_{dig}" + dig = f"sha256-{dig}" diger = DigerBuilder.sha256(dig) assert diger.verify(WRONG_BASE_STR) is False @@ -28,5 +33,4 @@ def test_diger_builder_wrong_dig(): dig = sha256(BASE_STR).hexdigest() # Here the dig is not prefixed with pytest.raises(kering.ValidationError) as exc_info: - diger = DigerBuilder.sha256(dig) - + diger = DigerBuilder.sha256(dig) \ No newline at end of file