diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 08cbd81..475c3bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, macos-latest, windows-latest] - python-version: [3.7, 3.8, 3.9, '3.10'] + python-version: [3.8, 3.9, '3.10', '3.11'] steps: - name: Set git crlf/eol run: | diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 31c77a4..bb281db 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -150,11 +150,11 @@ jobs: - name: Setup old python for test uses: actions/setup-python@v4 with: - python-version: 3.7 + python-version: 3.8 - name: Generate coverage run: | - tox -e coverage,py37,py311 + tox -e coverage,py38,py311 - name: Code Coverage Summary Report (data) uses: irongut/CodeCoverageSummary@v1.3.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51a882a..f759ff6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, macos-latest, windows-latest] - python-version: [3.7, 3.9, '3.11'] + python-version: [3.8, 3.9, '3.11'] steps: - name: Set git crlf/eol diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 2cd43b9..4461336 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -19,7 +19,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, macos-latest, windows-latest] - python-version: [3.7, 3.8, 3.9, '3.10'] + python-version: [3.8, 3.9, '3.10', '3.11'] steps: - name: Set git crlf/eol diff --git a/.gitignore b/.gitignore index b6d64b9..9f8af8a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,12 @@ __pycache__/ src/ymltoxml/_version.py .ymltoxml.y* .yasort.y* +.yagrep.y* in.* out.* sorted-out/ munch/ +nested_lookup/ # C extensions *.so diff --git a/README.rst b/README.rst index 526e511..76af84a 100644 --- a/README.rst +++ b/README.rst @@ -10,9 +10,9 @@ Python command line tools to convert between XML_ files and YAML_ files, preserving attributes and comments (with minor corrections). The default -file encoding for both types is UTF-8 without a BOM. Includes another -console entry point to sort large YAML lists (eg, lists of rules found -in the `SCAP Security Guide`_). +file encoding for both types is UTF-8 without a BOM. Now includes more +console entry points to grep or sort interesting YAML files (eg, lists +of rules found in the `SCAP Security Guide`_). .. _SCAP Security Guide: https://github.com/ComplianceAsCode/content @@ -47,8 +47,8 @@ idiom to install it on your system in a virtual env after cloning:: The alternative to python venv is the ``tox`` test driver. If you have it installed already, see the example tox commands below. -Usage ------ +ymltoxml +-------- The current version supports minimal command options; if no options are provided, the only required arguments are one or more files of a single @@ -102,20 +102,75 @@ configuration file, do:: $ ymltoxml --dump-config > .ymltoxml.yaml $ $EDITOR .ymltoxml.yaml -An additional helper script is now provided for sorting large (YAML) lists. -The new ``yasort`` script uses its own configuration file, creatively named -``yasort.yaml``. The above applies equally to this new config file. +yagrep +------ + +A new helper script is now included for searching keys or values in +YAML files. The ``yagrep`` script also has its own built-in config +file, which can be copied and edited as shown above. In this case the +script is intended to feel more-or-less like ``grep`` so the default +config should Just Work. That said, the script uses the ``dpath`` +python library, so you may need to change the default "path" separator +if your data has keys containing forward slashes (see the `upstream +docs`_ for details). + +General usage guidelines: + +* use the ``-f`` (filter) arg to search for a value string +* follow the (json) output from above to find the key name +* then use the ``-l`` (lookup) arg to extract the values for the above key + +Useful yagrep config file settings: + +:default_separator: change the path separator to something like ``;`` if data + has forward slashes +:output_format: set the output format to ``raw`` for unformmated output :: - $ yasort + $ yagrep -h + usage: yagrep [-h] [--version] [-v] [-d] [-s] [-f | -l] TEXT FILE [FILE ...] + + Search in YAML files for keys and values. + + positional arguments: + TEXT Text string to look for (one-only, required) (default: + None) + FILE Look in file(s) for text string (at least one, required) + (default: None) + + options: + -h, --help show this help message and exit + --version show program's version number and exit + -v, --verbose Display more processing info (default: False) + -d, --dump-config Dump default configuration file to stdout (default: + False) + -s, --save-config save active config to default filename (.yagrep.yml) and + exit (default: False) + -f, --filter Filter out data not matching input string (no paths) + (default: False) + -l, --lookup Lookup by key and return list of values for any matches + (default: False) + + +.. _upstream docs: https://github.com/dpath-maintainers/dpath-python + +yasort +------ + +Another helper script is included for sorting large (YAML) lists. +The ``yasort`` script also uses its own configuration file, creatively named +``.yasort.yaml``. The above applies equally to this config file. + +:: + + $ yasort -h usage: yasort [-h] [--version] [-v] [-d] [-s] [FILE ...] Sort YAML lists and write new files. positional arguments: - FILE Process input file(s) to target directory (default: - None) + FILE Process input file(s) to target directory (default: None) options: -h, --help show this help message and exit @@ -126,18 +181,42 @@ The new ``yasort`` script uses its own configuration file, creatively named -s, --save-config save active config to default filename (.yasort.yml) and exit (default: False) +All of the optional arguments for ``yasort`` are essentially orthogonal to +sorting, thus the only required argument for normal usage is one or more +input files. All of the user settings are in the default configuration file +shown below; use the ``--save-config`` option to create your own config file. + +Default yasort.yaml: + +.. code-block:: yaml + + --- + # comments should be preserved + file_encoding: 'utf-8' + default_yml_ext: '.yaml' + output_dirname: 'sorted-out' + default_parent_key: 'controls' + default_sort_key: 'rules' + has_parent_key: true + preserve_quotes: true + process_comments: false + mapping: 4 + sequence: 6 + offset: 4 + Features and limitations ------------------------ -We only test on mavlink XML message definitions, so it probably *will not* -work at all on arbitrarily complex XML files with namespaces, etc. The -current round-trip is not exact, due to the following: +We mainly test on mavlink XML message definitions and NIST/SSG YAML files, +so round-trip conversion *may not* work at all on arbitrarily complex XML +files with namespaces, etc. The current round-trip is not exact, due to +the following: * missing encoding is added to version tag * leading/trailing whitespace in text elements and comments is not preserved -* elements with self-closing tags are converted to full closing tags -* empty elements on more than one line are not preserved +* XML - elements with self-closing tags are converted to full closing tags +* XML - empty elements on more than one line are not preserved For the files tested (eg, mavlink) the end result is cleaner/shinier XML. @@ -172,13 +251,13 @@ only Git, Python, and Tox. SCAP support ------------ -The yasort tool is also intended to be part of a larger workflow, mainly +The yasort/yagrep tools are intended to be part of a larger workflow, mainly working with SCAP content, ie, the scap-security-guide source files (or just content_). It is currently used to sort profiles with large numbers of rules to make it easier to visually diff and spot duplicates, etc. -The configuration file defaults are based on existing yaml structure, but -you are free to change them for another use case. To adjust how the sorting +The yasort configuration file defaults are based on existing yaml structure, +but feel free to change them for another use case. To adjust how the sorting works, make a local config file (see above) and edit as needed the following options: @@ -189,7 +268,7 @@ options: :default_yml_ext: change the output file extension The rest of the options are for YAML formatting/flow style (see the ruamel_ -documetation for formatting details) +documentation for formatting details) .. _content: https://complianceascode.readthedocs.io/en/latest/ .. _ruamel: https://yaml.readthedocs.io/en/latest/ diff --git a/requirements.txt b/requirements.txt index 00b8c84..c7f6b11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ importlib-metadata; python_version < '3.8' importlib-resources; python_version < '3.10' +dpath munch +nested-lookup PyYAML ruamel.yaml xmltodict diff --git a/scripts/analyze_control_ids.py b/scripts/analyze_control_ids.py new file mode 100644 index 0000000..c88b677 --- /dev/null +++ b/scripts/analyze_control_ids.py @@ -0,0 +1,53 @@ +""" +Simple ID string counter. +""" + +import os +import sys +import typing +from collections import Counter +from pathlib import Path + +from ymltoxml.utils import get_profile_sets + +id_count: typing.Counter[str] = Counter() +FILE = os.getenv('ID_FILE', default='tests/data/PRIVACY-ids.txt') +DEBUG = os.getenv('DEBUG', default=None) +SELFTEST = os.getenv('SELFTEST', default=None) + +if not Path(FILE).exists(): + print(f'Input file {FILE} not found!') + sys.exit(1) + +input_ids = list(Path(FILE).read_text(encoding='utf-8').splitlines()) +in_set = set(input_ids) + +print(f"Input control IDs -> {len(in_set)}") +if DEBUG: + print(sorted(in_set)) +if SELFTEST: + id_sets, id_names = get_profile_sets('tests/data') +else: + id_sets, id_names = get_profile_sets('800-53-control-ids/nist') + +for id_set, ptype in zip(id_sets, id_names): + print(f"\n{ptype} profile control IDs -> {len(id_set)}") + + print(f"Input set is in {ptype} set: {id_set > in_set}") + common_set = sorted(id_set & in_set) + print(f"Num input controls in {ptype} set -> {len(common_set)}") + not_in_set = sorted(in_set - id_set) + print(f"Num input controls not in {ptype} set -> {len(not_in_set)}") + if DEBUG: + print(f"Input controls not in {ptype} set: {not_in_set}") + +print(f"\n{id_names[2]} set is in {id_names[0]} set: {id_sets[0] > id_sets[2]}") +print(f"{id_names[2]} set is in {id_names[1]} set: {id_sets[1] > id_sets[2]}") +print(f"{id_names[1]} set is in {id_names[0]} set: {id_sets[0] > id_sets[1]}") +print(f"{id_names[3]} set is in {id_names[0]} set: {id_sets[0] > id_sets[3]}") + +if DEBUG: + not_in_high = sorted(in_set - id_sets[0]) + print("\nInput controls not in HIGH set\n") + for ctl_id in not_in_high: + print(ctl_id) diff --git a/setup.cfg b/setup.cfg index 57d1216..a0ef04f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name = ymltoxml version = attr: setuptools_scm.get_version -description = attr: ymltoxml.__description__ +description = Console tools for YAML and XML processing with config files in YAML. url = https://github.com/sarnold/ymltoxml author = Stephen Arnold author_email = nerdboy@gentoo.org @@ -9,6 +9,7 @@ long_description = file: README.rst long_description_content_type = text/rst; charset=UTF-8 license_expression = LGPL-2.1-or-later license_files = LICENSE +license = LGPLv2+ classifiers = Development Status :: 4 - Beta Intended Audience :: Developers @@ -16,20 +17,22 @@ classifiers = Environment :: Console Topic :: Software Development Topic :: Software Development :: Testing + License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+) [options] -python_requires = >= 3.7 +python_requires = >= 3.8 setup_requires = setuptools_scm[toml] install_requires = - importlib-metadata; python_version < '3.8' importlib-resources; python_version < '3.10' + nested-lookup xmltodict munch ruamel.yaml PyYAML + dpath packages = find_namespace: package_dir = @@ -46,6 +49,7 @@ ymltoxml.data = console_scripts = ymltoxml = ymltoxml.ymltoxml:main yasort = ymltoxml.yasort:main + yagrep = ymltoxml.yagrep:main # extra deps are included here mainly for local/venv installs using pip # otherwise deps are handled via tox, ci config files or pkg managers diff --git a/src/ymltoxml/__init__.py b/src/ymltoxml/__init__.py index 4de47dc..23266dd 100644 --- a/src/ymltoxml/__init__.py +++ b/src/ymltoxml/__init__.py @@ -1,5 +1 @@ """Console tools for YAML/XML processing with config files in YAML.""" - -__description__ = "Console tools for YAML/XML conversion and sorting." - -__all__ = ["__description__"] diff --git a/src/ymltoxml/data/yagrep.yaml b/src/ymltoxml/data/yagrep.yaml new file mode 100644 index 0000000..a115a93 --- /dev/null +++ b/src/ymltoxml/data/yagrep.yaml @@ -0,0 +1,11 @@ +--- +# comments should be preserved +file_encoding: 'utf-8' +default_yml_ext: '.yaml' +default_separator: '/' +output_format: 'json' +preserve_quotes: true +process_comments: false +mapping: 4 +sequence: 6 +offset: 4 diff --git a/src/ymltoxml/utils.py b/src/ymltoxml/utils.py index 475a398..38ea8e5 100644 --- a/src/ymltoxml/utils.py +++ b/src/ymltoxml/utils.py @@ -1,6 +1,7 @@ """ Shared utility code. """ + import re import sys from pathlib import Path @@ -21,6 +22,8 @@ VERSION = version('ymltoxml') +PROFILE_TYPES = ['HIGH', 'MODERATE', 'LOW', 'PRIVACY'] + class FileTypeError(Exception): """Raise when the file extension is not '.xml', '.yml', or '.yaml'""" @@ -40,7 +43,82 @@ def dump(self, data, stream=None, **kw): return stream.getvalue() -def load_config(file_encoding='utf-8', yasort=False, debug=False): +def get_filelist(dirpath, filepattern='*.txt', debug=False): + """ + Get path objects matching ``filepattern`` starting at ``dirpath`` and + return a list of matching paths for any files found. + + :param dirpath: directory name to start file search + :param filepattern: str of the form ``*.`` + :param debug: increase output verbosity + :return: list of path strings + """ + file_list = [] + filenames = Path(dirpath).rglob(filepattern) + for pfile in list(filenames): + file_list.append(str(pfile)) + if debug: + print(f'Found file list: {file_list}') + return file_list + + +def get_profile_sets(dirpath='tests/data', filepattern='*.txt', debug=False): + """ + Get the 800-53 oscal ID files and parse them into ID sets, return + a list of sets. There should not be more than one controls file for + each profile type. + + :Note: The oscal ID files are simply text files with a single "column" + of ID strings extracted from the NIST oscal-content files or a + CSV dump, etc. Samples are contained in the ``tests/data`` folder. + + :param dirpath: directory name to start file search + :param filepattern: str of the form ``*.`` + :param debug: increase output verbosity + :return: tuple of lists: (profile_sets, profile_types) + """ + h_set = set() + m_set = set() + l_set = set() + p_set = set() + + nist_files = sorted(get_filelist(dirpath, filepattern, debug)) + + for _, pfile in enumerate(nist_files): + ptype = get_profile_type(pfile, debug) + ptype_ids = list(Path(pfile).read_text(encoding='utf-8').splitlines()) + t_set = set(sorted(ptype_ids)) + if ptype == 'HIGH': + h_set.update(t_set) + if ptype == 'MODERATE': + m_set.update(t_set) + if ptype == 'LOW': + l_set.update(t_set) + if ptype == 'PRIVACY': + p_set.update(t_set) + if ptype is None: + if debug: + print(f"{ptype} not found! Skipping...") + break + + return [h_set, m_set, l_set, p_set], PROFILE_TYPES + + +def get_profile_type(filename, debug=False): + """ + Get oscal profile type from filename, where profile type is one of the + exported profile names, ie, HIGH, MODERATE, LOW, or PRIVACY. + """ + match = None + + if any((match := substring) in filename for substring in PROFILE_TYPES): + if debug: + print(f'Found profile type: {match}') + + return match + + +def load_config(file_encoding='utf-8', yasort=False, yagrep=False, debug=False): """ Load yaml configuration file and munchify the data. If local file is not found in current directory, the default will be loaded. @@ -56,6 +134,8 @@ def load_config(file_encoding='utf-8', yasort=False, debug=False): prog_name = 'ymltoxml' if yasort: prog_name = 'yasort' + if yagrep: + prog_name = 'yagrep' defconfig = Path(f'.{prog_name}.yml') cfgfile = defconfig if defconfig.exists() else Path(f'.{prog_name}.yaml') diff --git a/src/ymltoxml/yagrep.py b/src/ymltoxml/yagrep.py new file mode 100644 index 0000000..d8c7945 --- /dev/null +++ b/src/ymltoxml/yagrep.py @@ -0,0 +1,166 @@ +"""Console script for searching YAML or XML files.""" + +import argparse +import json +import sys +from pathlib import Path + +import dpath +from munch import Munch +from nested_lookup import nested_lookup + +from .utils import VERSION as __version__ +from .utils import FileTypeError, load_config +from .ymltoxml import get_input_type + +# pylint: disable=R0801 + + +def process_inputs(filepath, grep_args, prog_opts, debug=False): + """ + Handle file arguments and process them. Return any input data for use + with ``dpath`` search. + + :param filepath: filename as Path obj + :param prog_opts: configuration options + :type prog_opts: dict + :param debug: enable extra processing info + :return: data and source type boolean or None + :handles FileTypeError: if input file is not yaml or xml + """ + + def glob_filter(x): + """ + Basic search glob to use with dpath.* functions. + """ + if grep_args.text in str(x): + return True + return False + + fpath = Path(filepath) + path_sep = prog_opts['default_separator'] + + if not fpath.exists(): + print(f'Input file {fpath} not found! Skipping...') + else: + if debug: + print(f'Searching in {fpath}...') + + try: + _, indata = get_input_type(fpath, prog_opts) + except FileTypeError as exc: + print(f'{exc} => {fpath}') + return None + if debug: + print(indata) + + if grep_args.filter: + result = dpath.search(indata, '**', afilter=glob_filter, separator=path_sep) + elif grep_args.lookup: + result = nested_lookup(grep_args.text, indata) + else: + result = dpath.values(indata, grep_args.text, separator=path_sep) + + out = repr(result) + if prog_opts['output_format'] == 'json': + out = json.dumps(result, indent=4, sort_keys=True) + sys.stdout.write(str(filepath) + ': ' + out + '\n') + + +def main(argv=None): # pragma: no cover + """ + Process args and execute search. + """ + debug = False + if argv is None: + argv = sys.argv + + cfg, pfile = load_config(yagrep=True) + popts = Munch.toDict(cfg) + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='''Search in YAML files for keys and values. + The default search with no options is path-based, thus it + may return empty results without a path or wildcard. Use + the filter argument to find the path(s) to a substring.''', + usage='%(prog)s [-h] [--version] [-v] [-d] [-s] [-f | -l] TEXT FILE [FILE ...]', + ) + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Display more processing info", + ) + parser.add_argument( + '-d', + '--dump-config', + action='store_true', + dest="dump", + help='Dump default configuration file to stdout', + ) + parser.add_argument( + '-s', + '--save-config', + action='store_true', + dest="save", + help='save active config to default filename (.yagrep.yml) and exit', + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-f", + "--filter", + action='store_true', + help="Filter out data not matching input string (no paths)", + ) + group.add_argument( + "-l", + "--lookup", + action='store_true', + help="Lookup by key and return list of values for any matches", + ) + parser.add_argument( + 'text', + nargs='?', + metavar="TEXT", + type=str, + help="Text string to look for (one-only, required)", + ) + parser.add_argument( + 'file', + nargs='*', + metavar="FILE", + type=str, + help="Look in file(s) for text string (at least one, required)", + ) + + args = parser.parse_args() + + if args.save: + cfg_data = pfile.read_bytes() + def_config = Path('.yagrep.yml') + def_config.write_bytes(cfg_data) + sys.exit(0) + if args.dump: + sys.stdout.write(pfile.read_text(encoding=popts['file_encoding'])) + sys.exit(0) + if args.verbose: + debug = True + # we need to help argparse here, since it has trouble parsing the 2 + # postional args as required when both are missing, even with help from + # nargs behavior (we also need customized usage msg above to replace the + # default error text with the following print() statement) + if not args.file or not args.text: + parser.print_usage() + print( + "yagrep: error: both of the following arguments are required: TEXT *and* FILE" + ) + sys.exit(1) + + for filearg in args.file: + process_inputs(filearg, args, popts, debug) + + +if __name__ == '__main__': + main() diff --git a/src/ymltoxml/yasort.py b/src/ymltoxml/yasort.py index ce3b09b..092b4aa 100644 --- a/src/ymltoxml/yasort.py +++ b/src/ymltoxml/yasort.py @@ -154,6 +154,7 @@ def main(argv=None): # pragma: no cover cfg, pfile = load_config(yasort=True) popts = Munch.toDict(cfg) + outdir = popts['output_dirname'] if args.save: cfg_data = pfile.read_bytes() @@ -165,18 +166,14 @@ def main(argv=None): # pragma: no cover sys.exit(0) if args.verbose: debug = True - - output_dir = Path(popts['output_dirname']) - if debug: - print(f'Creating output directory {output_dir}') - output_dir.mkdir(exist_ok=True) - - if args.file: - for filearg in args.file: - process_inputs(filearg, popts, debug=debug) - else: + if not args.file: parser.print_help() sys.exit(1) + if debug: + print(f'Creating output directory {outdir}') + Path(outdir).mkdir(exist_ok=True) + for filearg in args.file: + process_inputs(filearg, popts, debug=debug) if __name__ == '__main__': diff --git a/tests/data/HIGH-ids.txt b/tests/data/HIGH-ids.txt new file mode 100644 index 0000000..ca8ef05 --- /dev/null +++ b/tests/data/HIGH-ids.txt @@ -0,0 +1,370 @@ +ac-1 +ac-2 +ac-2.1 +ac-2.2 +ac-2.3 +ac-2.4 +ac-2.5 +ac-2.11 +ac-2.12 +ac-2.13 +ac-3 +ac-4 +ac-4.4 +ac-5 +ac-6 +ac-6.1 +ac-6.2 +ac-6.3 +ac-6.5 +ac-6.7 +ac-6.9 +ac-6.10 +ac-7 +ac-8 +ac-10 +ac-11 +ac-11.1 +ac-12 +ac-14 +ac-17 +ac-17.1 +ac-17.2 +ac-17.3 +ac-17.4 +ac-18 +ac-18.1 +ac-18.3 +ac-18.4 +ac-18.5 +ac-19 +ac-19.5 +ac-20 +ac-20.1 +ac-20.2 +ac-21 +ac-22 +at-1 +at-2 +at-2.2 +at-2.3 +at-3 +at-4 +au-1 +au-2 +au-3 +au-3.1 +au-4 +au-5 +au-5.1 +au-5.2 +au-6 +au-6.1 +au-6.3 +au-6.5 +au-6.6 +au-7 +au-7.1 +au-8 +au-9 +au-9.2 +au-9.3 +au-9.4 +au-10 +au-11 +au-12 +au-12.1 +au-12.3 +ca-1 +ca-2 +ca-2.1 +ca-2.2 +ca-3 +ca-3.6 +ca-5 +ca-6 +ca-7 +ca-7.1 +ca-7.4 +ca-8 +ca-8.1 +ca-9 +cm-1 +cm-2 +cm-2.2 +cm-2.3 +cm-2.7 +cm-3 +cm-3.1 +cm-3.2 +cm-3.4 +cm-3.6 +cm-4 +cm-4.1 +cm-4.2 +cm-5 +cm-5.1 +cm-6 +cm-6.1 +cm-6.2 +cm-7 +cm-7.1 +cm-7.2 +cm-7.5 +cm-8 +cm-8.1 +cm-8.2 +cm-8.3 +cm-8.4 +cm-9 +cm-10 +cm-11 +cm-12 +cm-12.1 +cp-1 +cp-2 +cp-2.1 +cp-2.2 +cp-2.3 +cp-2.5 +cp-2.8 +cp-3 +cp-3.1 +cp-4 +cp-4.1 +cp-4.2 +cp-6 +cp-6.1 +cp-6.2 +cp-6.3 +cp-7 +cp-7.1 +cp-7.2 +cp-7.3 +cp-7.4 +cp-8 +cp-8.1 +cp-8.2 +cp-8.3 +cp-8.4 +cp-9 +cp-9.1 +cp-9.2 +cp-9.3 +cp-9.5 +cp-9.8 +cp-10 +cp-10.2 +cp-10.4 +ia-1 +ia-2 +ia-2.1 +ia-2.2 +ia-2.5 +ia-2.8 +ia-2.12 +ia-3 +ia-4 +ia-4.4 +ia-5 +ia-5.1 +ia-5.2 +ia-5.6 +ia-6 +ia-7 +ia-8 +ia-8.1 +ia-8.2 +ia-8.4 +ia-11 +ia-12 +ia-12.2 +ia-12.3 +ia-12.4 +ia-12.5 +ir-1 +ir-2 +ir-2.1 +ir-2.2 +ir-3 +ir-3.2 +ir-4 +ir-4.1 +ir-4.4 +ir-4.11 +ir-5 +ir-5.1 +ir-6 +ir-6.1 +ir-6.3 +ir-7 +ir-7.1 +ir-8 +ma-1 +ma-2 +ma-2.2 +ma-3 +ma-3.1 +ma-3.2 +ma-3.3 +ma-4 +ma-4.3 +ma-5 +ma-5.1 +ma-6 +mp-1 +mp-2 +mp-3 +mp-4 +mp-5 +mp-6 +mp-6.1 +mp-6.2 +mp-6.3 +mp-7 +pe-1 +pe-2 +pe-3 +pe-3.1 +pe-4 +pe-5 +pe-6 +pe-6.1 +pe-6.4 +pe-8 +pe-8.1 +pe-9 +pe-10 +pe-11 +pe-11.1 +pe-12 +pe-13 +pe-13.1 +pe-13.2 +pe-14 +pe-15 +pe-15.1 +pe-16 +pe-17 +pe-18 +pl-1 +pl-2 +pl-4 +pl-4.1 +pl-8 +pl-10 +pl-11 +ps-1 +ps-2 +ps-3 +ps-4 +ps-4.2 +ps-5 +ps-6 +ps-7 +ps-8 +ps-9 +ra-1 +ra-2 +ra-3 +ra-3.1 +ra-5 +ra-5.2 +ra-5.4 +ra-5.5 +ra-5.11 +ra-7 +ra-9 +sa-1 +sa-2 +sa-3 +sa-4 +sa-4.1 +sa-4.2 +sa-4.5 +sa-4.9 +sa-4.10 +sa-5 +sa-8 +sa-9 +sa-9.2 +sa-10 +sa-11 +sa-15 +sa-15.3 +sa-16 +sa-17 +sa-21 +sa-22 +sc-1 +sc-2 +sc-3 +sc-4 +sc-5 +sc-7 +sc-7.3 +sc-7.4 +sc-7.5 +sc-7.7 +sc-7.8 +sc-7.18 +sc-7.21 +sc-8 +sc-8.1 +sc-10 +sc-12 +sc-12.1 +sc-13 +sc-15 +sc-17 +sc-18 +sc-20 +sc-21 +sc-22 +sc-23 +sc-24 +sc-28 +sc-28.1 +sc-39 +si-1 +si-2 +si-2.2 +si-3 +si-4 +si-4.2 +si-4.4 +si-4.5 +si-4.10 +si-4.12 +si-4.14 +si-4.20 +si-4.22 +si-5 +si-5.1 +si-6 +si-7 +si-7.1 +si-7.2 +si-7.5 +si-7.7 +si-7.15 +si-8 +si-8.2 +si-10 +si-11 +si-12 +si-16 +sr-1 +sr-2 +sr-2.1 +sr-3 +sr-5 +sr-6 +sr-8 +sr-9 +sr-9.1 +sr-10 +sr-11 +sr-11.1 +sr-11.2 +sr-12 diff --git a/tests/data/LOW-ids.txt b/tests/data/LOW-ids.txt new file mode 100644 index 0000000..35aec12 --- /dev/null +++ b/tests/data/LOW-ids.txt @@ -0,0 +1,149 @@ +ac-1 +ac-2 +ac-3 +ac-7 +ac-8 +ac-14 +ac-17 +ac-18 +ac-19 +ac-20 +ac-22 +at-1 +at-2 +at-2.2 +at-3 +at-4 +au-1 +au-2 +au-3 +au-4 +au-5 +au-6 +au-8 +au-9 +au-11 +au-12 +ca-1 +ca-2 +ca-3 +ca-5 +ca-6 +ca-7 +ca-7.4 +ca-9 +cm-1 +cm-2 +cm-4 +cm-5 +cm-6 +cm-7 +cm-8 +cm-10 +cm-11 +cp-1 +cp-2 +cp-3 +cp-4 +cp-9 +cp-10 +ia-1 +ia-2 +ia-2.1 +ia-2.2 +ia-2.8 +ia-2.12 +ia-4 +ia-5 +ia-5.1 +ia-6 +ia-7 +ia-8 +ia-8.1 +ia-8.2 +ia-8.4 +ia-11 +ir-1 +ir-2 +ir-4 +ir-5 +ir-6 +ir-7 +ir-8 +ma-1 +ma-2 +ma-4 +ma-5 +mp-1 +mp-2 +mp-6 +mp-7 +pe-1 +pe-2 +pe-3 +pe-6 +pe-8 +pe-12 +pe-13 +pe-14 +pe-15 +pe-16 +pl-1 +pl-2 +pl-4 +pl-4.1 +pl-10 +pl-11 +ps-1 +ps-2 +ps-3 +ps-4 +ps-5 +ps-6 +ps-7 +ps-8 +ps-9 +ra-1 +ra-2 +ra-3 +ra-3.1 +ra-5 +ra-5.2 +ra-5.11 +ra-7 +sa-1 +sa-2 +sa-3 +sa-4 +sa-4.10 +sa-5 +sa-8 +sa-9 +sa-22 +sc-1 +sc-5 +sc-7 +sc-12 +sc-13 +sc-15 +sc-20 +sc-21 +sc-22 +sc-39 +si-1 +si-2 +si-3 +si-4 +si-5 +si-12 +sr-1 +sr-2 +sr-2.1 +sr-3 +sr-5 +sr-8 +sr-10 +sr-11 +sr-11.1 +sr-11.2 +sr-12 diff --git a/tests/data/MODERATE-ids.txt b/tests/data/MODERATE-ids.txt new file mode 100644 index 0000000..a774174 --- /dev/null +++ b/tests/data/MODERATE-ids.txt @@ -0,0 +1,287 @@ +ac-1 +ac-2 +ac-2.1 +ac-2.2 +ac-2.3 +ac-2.4 +ac-2.5 +ac-2.13 +ac-3 +ac-4 +ac-5 +ac-6 +ac-6.1 +ac-6.2 +ac-6.5 +ac-6.7 +ac-6.9 +ac-6.10 +ac-7 +ac-8 +ac-11 +ac-11.1 +ac-12 +ac-14 +ac-17 +ac-17.1 +ac-17.2 +ac-17.3 +ac-17.4 +ac-18 +ac-18.1 +ac-18.3 +ac-19 +ac-19.5 +ac-20 +ac-20.1 +ac-20.2 +ac-21 +ac-22 +at-1 +at-2 +at-2.2 +at-2.3 +at-3 +at-4 +au-1 +au-2 +au-3 +au-3.1 +au-4 +au-5 +au-6 +au-6.1 +au-6.3 +au-7 +au-7.1 +au-8 +au-9 +au-9.4 +au-11 +au-12 +ca-1 +ca-2 +ca-2.1 +ca-3 +ca-5 +ca-6 +ca-7 +ca-7.1 +ca-7.4 +ca-9 +cm-1 +cm-2 +cm-2.2 +cm-2.3 +cm-2.7 +cm-3 +cm-3.2 +cm-3.4 +cm-4 +cm-4.2 +cm-5 +cm-6 +cm-7 +cm-7.1 +cm-7.2 +cm-7.5 +cm-8 +cm-8.1 +cm-8.3 +cm-9 +cm-10 +cm-11 +cm-12 +cm-12.1 +cp-1 +cp-2 +cp-2.1 +cp-2.3 +cp-2.8 +cp-3 +cp-4 +cp-4.1 +cp-6 +cp-6.1 +cp-6.3 +cp-7 +cp-7.1 +cp-7.2 +cp-7.3 +cp-8 +cp-8.1 +cp-8.2 +cp-9 +cp-9.1 +cp-9.8 +cp-10 +cp-10.2 +ia-1 +ia-2 +ia-2.1 +ia-2.2 +ia-2.8 +ia-2.12 +ia-3 +ia-4 +ia-4.4 +ia-5 +ia-5.1 +ia-5.2 +ia-5.6 +ia-6 +ia-7 +ia-8 +ia-8.1 +ia-8.2 +ia-8.4 +ia-11 +ia-12 +ia-12.2 +ia-12.3 +ia-12.5 +ir-1 +ir-2 +ir-3 +ir-3.2 +ir-4 +ir-4.1 +ir-5 +ir-6 +ir-6.1 +ir-6.3 +ir-7 +ir-7.1 +ir-8 +ma-1 +ma-2 +ma-3 +ma-3.1 +ma-3.2 +ma-3.3 +ma-4 +ma-5 +ma-6 +mp-1 +mp-2 +mp-3 +mp-4 +mp-5 +mp-6 +mp-7 +pe-1 +pe-2 +pe-3 +pe-4 +pe-5 +pe-6 +pe-6.1 +pe-8 +pe-9 +pe-10 +pe-11 +pe-12 +pe-13 +pe-13.1 +pe-14 +pe-15 +pe-16 +pe-17 +pl-1 +pl-2 +pl-4 +pl-4.1 +pl-8 +pl-10 +pl-11 +ps-1 +ps-2 +ps-3 +ps-4 +ps-5 +ps-6 +ps-7 +ps-8 +ps-9 +ra-1 +ra-2 +ra-3 +ra-3.1 +ra-5 +ra-5.2 +ra-5.5 +ra-5.11 +ra-7 +ra-9 +sa-1 +sa-2 +sa-3 +sa-4 +sa-4.1 +sa-4.2 +sa-4.9 +sa-4.10 +sa-5 +sa-8 +sa-9 +sa-9.2 +sa-10 +sa-11 +sa-15 +sa-15.3 +sa-22 +sc-1 +sc-2 +sc-4 +sc-5 +sc-7 +sc-7.3 +sc-7.4 +sc-7.5 +sc-7.7 +sc-7.8 +sc-8 +sc-8.1 +sc-10 +sc-12 +sc-13 +sc-15 +sc-17 +sc-18 +sc-20 +sc-21 +sc-22 +sc-23 +sc-28 +sc-28.1 +sc-39 +si-1 +si-2 +si-2.2 +si-3 +si-4 +si-4.2 +si-4.4 +si-4.5 +si-5 +si-7 +si-7.1 +si-7.7 +si-8 +si-8.2 +si-10 +si-11 +si-12 +si-16 +sr-1 +sr-2 +sr-2.1 +sr-3 +sr-5 +sr-6 +sr-8 +sr-10 +sr-11 +sr-11.1 +sr-11.2 +sr-12 diff --git a/tests/data/PRIVACY-ids.txt b/tests/data/PRIVACY-ids.txt new file mode 100644 index 0000000..86fdb0a --- /dev/null +++ b/tests/data/PRIVACY-ids.txt @@ -0,0 +1,96 @@ +ac-1 +ac-3.14 +at-1 +at-2 +at-3 +at-3.5 +at-4 +au-1 +au-2 +au-3.3 +au-11 +ca-1 +ca-2 +ca-5 +ca-6 +ca-7 +ca-7.4 +cm-1 +cm-4 +ir-1 +ir-2 +ir-2.3 +ir-3 +ir-4 +ir-5 +ir-6 +ir-7 +ir-8 +ir-8.1 +mp-1 +mp-6 +pe-8.3 +pl-1 +pl-2 +pl-4 +pl-4.1 +pl-8 +pl-9 +pm-3 +pm-4 +pm-5.1 +pm-6 +pm-7 +pm-8 +pm-9 +pm-10 +pm-11 +pm-13 +pm-14 +pm-17 +pm-18 +pm-19 +pm-20 +pm-20.1 +pm-21 +pm-22 +pm-24 +pm-25 +pm-26 +pm-27 +pm-28 +pm-31 +ps-6 +pt-1 +pt-2 +pt-3 +pt-4 +pt-5 +pt-5.2 +pt-6 +pt-6.1 +pt-6.2 +pt-7 +pt-7.1 +pt-7.2 +pt-8 +ra-1 +ra-3 +ra-7 +ra-8 +sa-1 +sa-2 +sa-3 +sa-4 +sa-8.33 +sa-9 +sa-11 +sc-7.24 +si-1 +si-12 +si-12.1 +si-12.2 +si-12.3 +si-18 +si-18.4 +si-19 diff --git a/tests/test_misc.py b/tests/test_misc.py index c6c13cd..542e610 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -3,7 +3,54 @@ import pytest from munch import Munch -from ymltoxml.utils import StrYAML, load_config +from ymltoxml.utils import ( + StrYAML, + get_filelist, + get_profile_sets, + get_profile_type, + load_config, +) + +def test_get_filelist(): + test_path = Path('docs') / 'source' / 'index.rst' + files = get_filelist('docs/source', '*.rst') + assert isinstance(files, list) + assert len(files) == 6 + assert str(test_path) in files + + +def test_get_filelist_debug(): + test_path = Path('docs') / 'source' / 'index.rst' + files = get_filelist('docs/source', '*.rst', debug=True) + assert isinstance(files, list) + assert len(files) == 6 + assert str(test_path) in files + + +def test_get_profile_sets(): + dirpath = 'tests/data/' + id_sets = get_profile_sets(dirpath) + assert len(id_sets) == 2 + assert isinstance(id_sets, tuple) + for item in id_sets: + assert len(item) == 4 + for item in id_sets[0]: + assert isinstance(item, set) + assert id_sets[0][0] > id_sets[0][1] + + +def test_get_profile_type(): + filename = 'PRIVACY-ids.txt' + profile_type = get_profile_type(filename) + assert isinstance(profile_type, str) + assert profile_type == 'PRIVACY' + + +def test_get_profile_type_debug(): + filename = 'PRIVACY-ids.txt' + profile_type = get_profile_type(filename, debug=True) + assert isinstance(profile_type, str) + assert profile_type == 'PRIVACY' def test_str_dumper(): @@ -12,6 +59,15 @@ def test_str_dumper(): assert hasattr(my_yaml, 'dump') +def test_load_debug_config(): + popts, pfile = load_config(debug=True) + + assert isinstance(pfile, Path) + assert isinstance(popts, Munch) + assert hasattr(popts, 'default_xml_ext') + assert pfile.stem == 'ymltoxml' or '.ymltoxml' + + def test_load_ymltoxml_config(): popts, pfile = load_config() diff --git a/tests/test_yagrep.py b/tests/test_yagrep.py new file mode 100644 index 0000000..e539687 --- /dev/null +++ b/tests/test_yagrep.py @@ -0,0 +1,113 @@ +import pytest + +from munch import Munch +from ymltoxml.utils import FileTypeError, StrYAML +from ymltoxml.yagrep import process_inputs + +defconfig_str = """\ +# comments should be preserved +file_encoding: 'utf-8' +default_yml_ext: '.yaml' +default_separator: '/' +output_format: null +preserve_quotes: true +process_comments: false +mapping: 4 +sequence: 6 +offset: 4 +""" + +file_type_err = "FileTypeError: unknown input file extension" + +yaml_str = """\ +policy: Security Requirements Guide - General Purpose Operating System +title: Security Requirements Guide - General Purpose Operating System +id: srg_gpos +version: 'v2r3' +source: https://public.cyber.mil/stigs/downloads/ +controls_dir: srg_gpos +levels: +- id: high +- id: medium +- id: low +controls: # sequences can have nodes that are mappings + - id: Variables + levels: + - high + - medium + - low + title: Variables + rules: + - var_sshd_disable_compression=no # this should be last + - var_password_hashing_algorithm=SHA512 + - var_password_pam_dictcheck=1 + - sshd_approved_macs=stig + - sshd_approved_ciphers=stig + - sshd_idle_timeout_value=10_minutes + - var_accounts_authorized_local_users_regex=rhel8 + - var_account_disable_post_pw_expiration=35 + - var_auditd_action_mail_acct=root + - var_auditd_space_left_percentage=25pc + - var_auditd_space_left_action=email + - login_banner_text=dod_banners # this should be first + - var_authselect_profile=sssd +""" + +args_obj = Munch.fromDict( + { + "get": False, + "filter": False, + "text": "foo", + } +) + +testdata = [ + ("rules", True, False, "disable_compression",), + ("rules", False, True, "disable_compression"), + ("rules", False, False, "[]",), +] + +@pytest.mark.parametrize("a,b,c,expected", testdata) +def test_process_inputs(a, b, c, expected, capfd, tmp_path): + args_obj.text = a + args_obj.filter = b + args_obj.lookup = c + debug = False + yaml = StrYAML() + inp = tmp_path / "in.yml" + inp.write_text(yaml_str, encoding="utf-8") + + popts = yaml.load(defconfig_str) + process_inputs(inp, args_obj, popts, debug) + out, err = capfd.readouterr() + assert expected in out + assert "policy" not in out + + +def test_process_inputs_filter_debug(capfd, tmp_path): + args_obj.text = "low" + args_obj.filter = True + args_obj.lookup = False + debug = True + yaml = StrYAML() + inp = tmp_path / "in.yml" + inp.write_text(yaml_str, encoding="utf-8") + + popts = yaml.load(defconfig_str) + popts['output_format'] = 'json' + process_inputs(inp, args_obj, popts, debug) + out, err = capfd.readouterr() + assert "disable_compression" in out + assert "Searching in" in out + + +def test_bad_file(capfd, tmp_path): + args_obj.text = "low" + yaml = StrYAML() + popts = yaml.load(defconfig_str) + inp2 = tmp_path / "in.ymml" + inp2.write_text(yaml_str, encoding="utf-8") + + process_inputs(inp2, args_obj, popts) + out, err = capfd.readouterr() + assert file_type_err in out diff --git a/tests/test_yasort.py b/tests/test_yasort.py index 7123c28..b71b01b 100644 --- a/tests/test_yasort.py +++ b/tests/test_yasort.py @@ -69,6 +69,19 @@ def test_process_inputs(tmp_path): # for child in d.iterdir(): print(child) +def test_process_inputs_debug(tmp_path): + yaml = StrYAML() + d = tmp_path / "out" + d.mkdir() + inp = tmp_path / "in.yml" + inp.write_text(yaml_str, encoding="utf-8") + + popts = yaml.load(defconfig_str) + popts['output_dirname'] = d + process_inputs(inp, popts, True) + assert len(list(d.iterdir())) == 1 + + def test_bad_file(capfd, tmp_path): yaml = StrYAML() popts = yaml.load(defconfig_str) diff --git a/tests/test_ymltoxml.py b/tests/test_ymltoxml.py index 5bc0456..c5ab789 100644 --- a/tests/test_ymltoxml.py +++ b/tests/test_ymltoxml.py @@ -1,7 +1,7 @@ import pytest -from ymltoxml.utils import StrYAML -from ymltoxml.ymltoxml import process_inputs +from ymltoxml.utils import FileTypeError, StrYAML +from ymltoxml.ymltoxml import get_input_type, process_inputs defconfig_str = """\ file_encoding: 'utf-8' @@ -17,6 +17,8 @@ indent: ' ' """ +file_type_err = "FileTypeError: unknown input file extension" + def test_process_inputs(xml_file, yml_file): yaml = StrYAML() @@ -26,6 +28,14 @@ def test_process_inputs(xml_file, yml_file): process_inputs(yml_file, popts) +def test_process_inputs_debug(xml_file, yml_file): + yaml = StrYAML() + popts = yaml.load(defconfig_str) + + process_inputs(xml_file, popts, debug=True) + process_inputs(yml_file, popts, debug=True) + + def test_process_no_comments(xml_file, yml_file): yaml = StrYAML() popts = yaml.load(defconfig_str) @@ -33,3 +43,17 @@ def test_process_no_comments(xml_file, yml_file): process_inputs(xml_file, popts) process_inputs(yml_file, popts) + + +def test_bad_file(capfd, tmp_path): + yaml = StrYAML() + popts = yaml.load(defconfig_str) + inp2 = tmp_path / "in.ymml" + inp2.write_text("name: null", encoding="utf-8") + + process_inputs(inp2, popts) + out, err = capfd.readouterr() + assert file_type_err in out + + with pytest.raises(FileTypeError): + get_input_type(inp2, popts) diff --git a/tox.ini b/tox.ini index 4b9f3e0..2e87301 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,11 @@ [tox] -envlist = py3{7,8,9,10,11}-{linux,macos,windows},coverage +envlist = py3{8,9,10,11}-{linux,macos,windows},coverage skip_missing_interpreters = true isolated_build = true skipsdist = true [gh-actions] python = - 3.7: py37 3.8: py38 3.9: py39 3.10: py310 @@ -35,6 +34,7 @@ skip_install = true setenv = COVERAGE_FILE = .coverage.{envname} PYTHONPATH = {toxinidir}/src + SELFTEST = True passenv = SETUPTOOLS_SCM_DEBUG @@ -54,8 +54,6 @@ allowlist_externals = deps = {[base]deps} - #-r requirements.txt - pymavlink .[test,cov] commands = @@ -71,9 +69,6 @@ skip_install = allowlist_externals = bash -#setenv = -# COVERAGE_FILE = .coverage - deps = coverage @@ -82,7 +77,6 @@ commands = coverage xml depends = - py37 py38 py39 py310 @@ -115,12 +109,13 @@ allowlist_externals = deps = {[base]deps} pymavlink + fuzzyset2 -e . commands = - yasort -h - yasort --version - yasort --dump-config + yagrep -h + yagrep --version + yagrep --dump-config [testenv:conv] skip_install = true @@ -174,29 +169,28 @@ deps = commands = bash -c 'gitchangelog {posargs} > CHANGELOG.rst' -[testenv:clean] +[testenv:docs] skip_install = true allowlist_externals = bash + make deps = - pip>=21.1 + {[base]deps} + .[doc] -commands = - bash -c 'make -C docs/ clean' - bash -c 'rm -rf in.* out.* dist/ build/ paparazzi.xml docs/source/api/' +commands = make -C docs html -[testenv:docs] +[testenv:docs-clean] skip_install = true allowlist_externals = - bash - make + {[testenv:docs]allowlist_externals} deps = {[base]deps} .[doc] -commands = make -C docs html +commands = make -C docs/ clean [testenv:docs-lint] skip_install = true @@ -244,8 +238,6 @@ passenv = OS PYTHONIOENCODING -setenv = PYTHONPATH = {toxinidir} - deps = {[base]deps} pylint @@ -271,7 +263,7 @@ commands = [testenv:mypy] skip_install = true -setenv = PYTHONPATH = {toxinidir} +setenv = PYTHONPATH = {toxinidir}/src deps = {[base]deps} @@ -279,17 +271,22 @@ deps = munch-stubs -r requirements.txt -commands_pre = - {[testenv:docs]commands_pre} - commands = - #stubgen -m munch --export-less -o {toxinidir} + stubgen -p nested_lookup --export-less -o {toxinidir} python -m mypy --follow-imports=normal --install-types --non-interactive src/ -[testenv:isort] +[testenv:black] skip_install = true -setenv = PYTHONPATH = {toxinidir} +deps = + {[base]deps} + black + +commands = + black -v -S src/ {posargs} + +[testenv:isort] +skip_install = true deps = {[base]deps} @@ -314,3 +311,15 @@ deps = commands = bandit -c pyproject.toml -r src/ + +[testenv:clean] +skip_install = true +allowlist_externals = + bash + +deps = + pip>=21.1 + +commands = + bash -c 'rm -rf in.* out.* dist/ build/ docs/source/api/' + bash -c 'rm -rf paparazzi.xml nested_lookup/ sorted-out/'