diff --git a/.gitignore b/.gitignore index 894a44c..b81fba6 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# pycharm +.idea diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..0369925 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,20 @@ +language: python + +python: + - "3.7" + +cache: pip + +install: + - "pip install pytest" + - "pip install pylint" + +script: + - "git clone https://github.com/AgPipeline/Organization-info.git" + - mv base-image/test-files/*.py base-image + - cd base-image + - rm -r test-files + - cd .. + - pylint --rcfile=Organization-info/pylint.rc base-image/*.py + - "python -m pytest -v" + \ No newline at end of file diff --git a/README.md b/README.md index dc73946..da76adb 100644 --- a/README.md +++ b/README.md @@ -22,3 +22,6 @@ Be sure to read the [organization documentation](https://github.com/AgPipeline/O Every folder in this repo must have a README.md clearly explaining the interface for derived images, how to create a derived image, and other information on how to use the images created. Providing a quick start guide with links to more detailed information a good approach for some situations. The goal is to provide documentation for users of these base images that makes it easy for them to be used. + +## Testing +The testing modules and readme may be found in the [testing folder](https://github.com/AgPipeline/base-docker-support/tree/test-development/base-image/test-files). \ No newline at end of file diff --git a/base-image/README.md b/base-image/README.md index 1ea7219..19b2b04 100644 --- a/base-image/README.md +++ b/base-image/README.md @@ -87,10 +87,17 @@ If the transformer.py file has a function named `check_continue` it will be call The return from the check_continue() function is used to determine if processing should continue. If the function is not defined, processing will continue automatically. -5. Processsing: -The `perform_process` function in transformer.py is called getting passed the transformer_class.Transformer instance and any parameters previously defined. +5. Retrieve Files: +If the transformer_class.Transformer instance has a method named `retrieve_files` it will be called getting passed the dictionary returned by `transformer_class.Transformer.get_transformer_params()` (see step 3.) and the loaded metadata. +This allows the downloading of data when the transformer has determined it can proceed (see step 4.). +If this function is not defined, processing will continue automatically. -6. Result Handling: +6. Processsing: +The `perform_process` function in transformer.py is called getting passed the transformer_class.Transformer instance and any parameters previously defined (see step 3.). +This performs the processing of the data. +It's important to note that the dictionary returned in step 3 is used to populate the parameter list of the `perform_process` call. + +7. Result Handling: The result of the above steps may produce warnings, errors, or successful results. These results can be stored in a file, printed to standard output, and/or returned to the caller of `do_work`. In the default case that we're exploring here, the return value from do_work is ignored. @@ -102,7 +109,7 @@ The following command line parameters are defined for all transformers. * -h: (optional parameter) display help message (automatically defined by argparse) * --info, -i: (optional parameter) enable info level logging messages * --result: (optional parameter) how to handle the result of processing; one or more comma-separated strings of: all, file, print -* --metadata: mandatory path to file containing JSON metadata +* --metadata: mandatory path to file containing JSON metadata; can be specified multiple times * --working_space: path to folder to use as a working space and file store * the "file_list" argument contains all additional parameters (which are assumed to be file names but may not be) diff --git a/base-image/entrypoint.py b/base-image/entrypoint.py index cf45234..d5b43e6 100755 --- a/base-image/entrypoint.py +++ b/base-image/entrypoint.py @@ -6,6 +6,7 @@ import os import json import logging +from typing import Optional import transformer_class @@ -22,7 +23,7 @@ def __init__(self): @staticmethod def handle_error(code: int, message: str) -> dict: - """Handles logging and return values when an error ocurrs. Implies processing + """Handles logging and return values when an error occurs. Implies processing will stop. Arguments: code: return code related to the error @@ -35,14 +36,12 @@ def handle_error(code: int, message: str) -> dict: code = -1 if not message: logging.warning("An error has occurred without a message, setting default message") - message = "An error has ocurred with error code (%s)" % str(code) + message = "An error has occurred with error code (%s)" % str(code) logging.error(message) logging.error("Stopping processing") - result = {} - result['error'] = message - result['code'] = code + result = {'error': message, 'code': code} return result @@ -52,13 +51,13 @@ def load_metadata(metadata_path: str) -> dict: Arguments: metadata_path: path to the metadata file Return: - Returns a dict containing the loaded metadata. If an error ocurrs, the dict + Returns a dict containing the loaded metadata. If an error occurs, the dict won't contain metadata but will contain an error message under an 'error' key. """ try: with open(metadata_path, 'r') as in_file: md_load = json.load(in_file) - if not md_load is None: + if md_load is not None: md_return = {'metadata': md_load} else: msg = 'Invalid JSON specified in metadata file "%s"' % metadata_path @@ -72,6 +71,83 @@ def load_metadata(metadata_path: str) -> dict: return md_return + @staticmethod + def check_params_result_error(transformer_params: dict) -> Optional[dict]: + """Checks the transformer parameter results for an error + Arguments: + transformer_params: the dictionary to check for errors + Return: + An error dict if errors were found and None if not + Notes: + See handle_error() function + """ + if 'code' in transformer_params: + if 'error' in transformer_params: + error = transformer_params['error'] + else: + error = "Error returned from get_transformer_params with code: %s" % transformer_params['code'] + return __internal__.handle_error(-104, error) + + return None + + @staticmethod + def check_retrieve_results_error(transformer_retrieve: tuple) -> Optional[dict]: + """Checks the results of the transformer_class retrieving files + Arguments: + transformer_retrieve: the results of the retrieve + Return: + An error dict if errors were found and None if not + Notes: + See handle_error() function + """ + if not transformer_retrieve: + return None + + code = 0 + message = None + retrieve_len = len(transformer_retrieve) + if retrieve_len > 0: + code = transformer_retrieve[0] + if retrieve_len > 1: + message = transformer_retrieve[1] + else: + message = "Retrieving files returned a code of %s" % str(code) + + # Check for an error code + if code < 0: + return __internal__.handle_error(code, message) + + # Log the message if we get one returned to us + if retrieve_len > 1: + logging.info(transformer_retrieve[1]) + + return None + + @staticmethod + def load_metadata_files(metadata_files: list) -> dict: + """Loads the specified metadata files + Arguments: + metadata_files: list of metadata files to load + Returns: + Returns a dict containing the loaded metadata as an array. If an error occurs, the dict + won't contain metadata but will contain an error message under an 'error' key. + """ + metadata = [] + result = {'metadata': metadata} + for metadata_file in metadata_files: + if not os.path.exists(metadata_file): + result = __internal__.handle_error(-2, "Unable to access metadata file '%s'" % metadata_file) + break + logging.info("Loading metadata from file: '%s'", metadata_file) + md_loaded = __internal__.load_metadata(metadata_file) + if 'metadata' in md_loaded: + metadata.append(md_loaded['metadata']) + else: + result = __internal__.handle_error(-3, md_loaded['error']) + break + + return result + @staticmethod def parse_continue_result(result) -> tuple: """Parses the result of calling transformer.check_continue and returns @@ -129,7 +205,31 @@ def handle_check_continue(transformer_instance: transformer_class.Transformer, t return result @staticmethod - def perform_processing(transformer_instance, args: argparse.ArgumentParser, metadata: dict) -> dict: + def handle_retrieve_files(transformer_instance: transformer_class.Transformer, args: argparse.Namespace, metadata: dict) ->\ + Optional[dict]: + """Handles calling the transformer class to retrieve files + Arguments: + transformer_instance: the current transformer environment + args: the command line arguments + metadata: the loaded metadata + Return: + A dict containing error information if a problem occurs and None if no problems are found. + Note: + A side effect of this function is a information message logged if the transformer class instance does not have + a 'retrieve_files' function declared. + """ + if hasattr(transformer_instance, 'retrieve_files'): + transformer_retrieve = transformer_instance.retrieve_files(args, metadata) + retrieve_results = __internal__.check_retrieve_results_error(transformer_retrieve) + if retrieve_results: + return retrieve_results + else: + logging.info("Transformer class doesn't have function named 'retrieve_files'") + + return None + + @staticmethod + def perform_processing(transformer_instance: transformer_class.Transformer, args: argparse.Namespace, metadata: dict) -> dict: """Makes the calls to perform the processing Arguments: transformer_instance: instance of transformer class @@ -144,16 +244,14 @@ def perform_processing(transformer_instance, args: argparse.ArgumentParser, meta if hasattr(transformer_instance, 'get_transformer_params'): transformer_params = transformer_instance.get_transformer_params(args, metadata) if not isinstance(transformer_params, dict): - return __internal__.handle_error(-101, \ - "Invalid return from getting transformer parameters from transformer class instance") - if 'code' in transformer_params: - if 'error' in transformer_params: - error = transformer_params['error'] - else: - error = "Error returned from get_transformer_params with code: %s" % transformer_params['code'] - return __internal__.handle_error(-101, error) + return __internal__.handle_error(-101, + "Invalid return from getting transformer parameters from transformer class instance") + + params_result = __internal__.check_params_result_error(transformer_params) + if params_result: + return params_result else: - logging.debug("Transformer class instance does not have get_transformer_params method") + logging.info("Transformer class instance does not have get_transformer_params method") transformer_params = {} # First check if the transformer thinks everything is in place @@ -162,31 +260,30 @@ def perform_processing(transformer_instance, args: argparse.ArgumentParser, meta if 'code' in result and result['code'] < 0 and 'error' not in result: result['error'] = "Unknown error returned from check_continue call" else: - logging.debug("transformer module doesn't have a function named 'check_continue'") + logging.info("Transformer module doesn't have a function named 'check_continue'") # Retrieve additional files if indicated by return code from the check - if not 'error' in result and 'code' in result and result['code'] == 0: - # TODO: Fetch additional data for processing - pass + if 'error' not in result and 'code' in result and result['code'] == 0: + result = __internal__.handle_retrieve_files(transformer_instance, args, metadata) # Next make the call to perform the processing - if not 'error' in result: + if 'error' not in result: if hasattr(transformer, 'perform_process'): result = transformer.perform_process(transformer_instance, **transformer_params) else: - logging.debug("transformer module is missing function named 'perform_process'") + logging.debug("Transformer module is missing function named 'perform_process'") return __internal__.handle_error(-102, "Transformer perform_process interface is not available " + "for processing data") return result @staticmethod - def handle_result(result_types: str, result_file_path: str, result: dict) -> None: + def handle_result(result: dict, result_types: str = None, result_file_path: str = None) -> dict: """Handles the results of processing as dictated by the arguments passed in. Arguments: - result_types: comma separated string containing one or more of: all, file, print - result_file_path: location to place result file result: the dictionary of result information + result_types: optional, comma separated string containing one or more of: all, file, print + result_file_path: optional, location to place result file Return: Returns the result parameter Notes: @@ -224,7 +321,7 @@ def add_parameters(parser: argparse.ArgumentParser, transformer_instance) -> Non parser.add_argument('--result', nargs='?', default='all', help='Direct the result of a run to one or more of (all is default): "all,file,print"') - parser.add_argument('--metadata', type=str, help='The path to the source metadata') + parser.add_argument('--metadata', type=str, action='append', help='The path to the source metadata') parser.add_argument('--working_space', type=str, help='the folder to use use as a workspace and ' + 'for storing results') @@ -240,7 +337,7 @@ def add_parameters(parser: argparse.ArgumentParser, transformer_instance) -> Non # Assume the rest of the arguments are the files parser.add_argument('file_list', nargs=argparse.REMAINDER, help='additional files for transformer') -def do_work(parser: argparse.ArgumentParser, **kwargs) -> None: +def do_work(parser: argparse.ArgumentParser, **kwargs) -> dict: """Function to prepare and execute work unit Arguments: parser: an instance of argparse.ArgumentParser @@ -252,7 +349,7 @@ def do_work(parser: argparse.ArgumentParser, **kwargs) -> None: transformer_instance = transformer_class.Transformer(**kwargs) if not transformer_instance: result = __internal__.handle_error(-100, "Unable to create transformer class instance for processing") - return __internal__.handle_result(None, None, result) + return __internal__.handle_result(result, None, None) add_parameters(parser, transformer_instance) args = parser.parse_args() @@ -262,22 +359,20 @@ def do_work(parser: argparse.ArgumentParser, **kwargs) -> None: # Check that we have mandatory metadata if not args.metadata: - result = __internal__.handle_error(-1, "No metadata path was specified.") - elif not os.path.exists(args.metadata): - result = __internal__.handle_error(-2, "Unable to access metadata file '%s'" % args.metadata) + result = __internal__.handle_error(-1, "No metadata paths were specified.") else: - md_result = __internal__.load_metadata(args.metadata) - if 'metadata' in md_result: - result = __internal__.perform_processing(transformer_instance, args, md_result['metadata']) + md_results = __internal__.load_metadata_files(args.metadata) + if 'metadata' in md_results: + result = __internal__.perform_processing(transformer_instance, args, md_results['metadata']) else: - result = __internal__.handle_error(-3, md_result['error']) + result = __internal__.handle_error(-3, md_results['error']) if args.working_space: result_path = os.path.join(args.working_space, 'result.json') else: result_path = None - __internal__.handle_result(args.result, result_path, result) + __internal__.handle_result(result, args.result, result_path) return result if __name__ == "__main__": diff --git a/base-image/test-files/README.md b/base-image/test-files/README.md new file mode 100644 index 0000000..4dda402 --- /dev/null +++ b/base-image/test-files/README.md @@ -0,0 +1,38 @@ +# Transformer Unit Tests +This folder holds the unit tests [TravisCI](https://travis-ci.org/) will run in it's build. +These are basic functional unit tests, checks for output formatting and typing. All tests are written in and utilizing [pytest](https://docs.pytest.org/en/latest/). +In addition to this, [pylint](https://www.pylint.org/) will also be deployed. +Look into our organization's repository for our [pylint protocols](https://github.com/AgPipeline/Organization-info) + +### Running the tests +Upon submitting a pull request Travis will build and run the testing modules automatically and will return a report with all passing or failing code. + +### Running the tests before submitting a pull request +Should you wish to test your code before submitting a pull request follow these steps: +1) Clone, pull, copy or otherwise aquire the pylintrc file located at this [repo](https://github.com/AgPipeline/Organization-info) + +2) From the command line run the following commands (from base-docker-support as current directory) + ```sh + pylint --rcfile= base-image/*py + pylint --rcfile= base-image/**/*py + ``` +3) Once the previous commands have executed there will be a list of changes that should be made to bring any code up to standard +4) From the command line run the following command while the current working directory is base-image + ```sh + python -m pytest -v + ``` + or + ```sh + python3 -m pytest -v + ``` + + +### Requirements + +There are no additional requirements or dependancies if not running these tests locally, if however these are to be run before deploying travis the following are required. + +python 3 \ +pylint \ +pytest + +All three may be installed using pip, conda, or another preferred method. diff --git a/base-image/test-files/test_entrypoint.py b/base-image/test-files/test_entrypoint.py new file mode 100644 index 0000000..3c06b18 --- /dev/null +++ b/base-image/test-files/test_entrypoint.py @@ -0,0 +1,121 @@ +"""Tests for 'entrypoint.py' +""" + +#Import entrypoint.py and embedded modules +import argparse +import entrypoint +import transformer +from transformer_class import Transformer + +#Set up initial testing values +TEST_TRANSFORMER = Transformer() +PARSE = argparse.ArgumentParser(description="test") +TEST_INTERNAL = entrypoint.__internal__() + +# pylint: disable=assignment-from-no-return +def test_handle_error(): + """Test for handle error + """ + #Some testing arguments + test_code = 117 + test_message = "Test message" + + #Initial test using "ideal" args + ideal_example = TEST_INTERNAL.handle_error(test_code, test_message) + #Should return dict type + assert isinstance(ideal_example, dict) + + #A secondary test + test_code = None + test_message = False + + secondary_example = TEST_INTERNAL.handle_error(test_code, test_message) + assert isinstance(secondary_example, dict) + +def test_load_metadata(): + """Test load_metadata method + """ + + #Set some testing arguments + test_path = "https://example-metadata-path.com" + + #Save method call to variable + testee = TEST_INTERNAL.load_metadata(test_path) + #Should return dict type + assert isinstance(testee, dict) + +def test_parse_continue_result(): + """Test of the parse_continue method + """ + + #Saving check_continue result to a variable + check_result = transformer.check_continue(TEST_TRANSFORMER) + #Assigning method call to variable + continue_result = TEST_INTERNAL.parse_continue_result(check_result) + #Should return list type + assert isinstance(continue_result, tuple) + +def test_handle_check_continue(): + """Test for handle_check_continue method + """ + + #Creating a testing dictionary + test_dict = {} + + #Saving method call to variable + testee = TEST_INTERNAL.handle_check_continue(TEST_TRANSFORMER, test_dict) + + #Should return dict type + assert isinstance(testee, dict) + +def test_perform_processing(): + """Test for perform_processing method + """ + + #Call the load_metadata method and store result to a variable + test_path = "https://example-metadata-path.com" + test_metadata = TEST_INTERNAL.load_metadata(test_path) + + #Save method call to variable + test_process = TEST_INTERNAL.perform_processing(TEST_TRANSFORMER, PARSE, test_metadata) + + #Should return dict + assert isinstance(test_process, dict) + + +def test_handle_result(): + """Test for handle_result method + """ + + #Set up testing variables + test_result_str = "all,file,print" + test_path = "~/Documents" + test_dict = {} + + #Store function call to variable + test_handl_res = TEST_INTERNAL.handle_result(test_result_str, test_path, test_dict) + + #Should return the test_dict + assert test_handl_res == test_dict + + + +def test_add_parameters(): + """Test add_parameters function + """ + + #Save function call to variable + test_params = entrypoint.add_parameters(PARSE, TEST_TRANSFORMER) + + #Should return None + assert test_params is None + +def test_do_work(): + """Test for do_work function + """ + + #Save function call to variable + test_work = entrypoint.do_work(PARSE) + + #Should return None + assert test_work is None diff --git a/base-image/test-files/test_transformer.py b/base-image/test-files/test_transformer.py new file mode 100644 index 0000000..afc6d15 --- /dev/null +++ b/base-image/test-files/test_transformer.py @@ -0,0 +1,37 @@ +"""Tests for transformer.py +""" + +#Import transformer.py module and imbedded modules +import argparse +import transformer +from transformer_class import Transformer + + +#Initial testing values +TEST_TRANSFORMER = Transformer() +PARSE = argparse.ArgumentParser() + +# pylint: disable=assignment-from-no-return +def test_add_parameters(): + """Testing add_parameters function + """ + #Saving function call to variable + testee = transformer.add_parameters(TEST_TRANSFORMER) + #Should return None + assert testee is None + +def test_check_continue(): + """Testing check_continue function + """ + #Saving function call to variable + testee = transformer.check_continue(TEST_TRANSFORMER) + #Should return dict type + assert isinstance(testee, dict) + +def test_perform_process(): + """Test of the perform_process function + """ + #Storing function call to a variable + testee = transformer.perform_process(TEST_TRANSFORMER) + #Should return dict type + assert isinstance(testee, dict) diff --git a/base-image/test-files/test_transformer_class.py b/base-image/test-files/test_transformer_class.py new file mode 100644 index 0000000..70255ef --- /dev/null +++ b/base-image/test-files/test_transformer_class.py @@ -0,0 +1,28 @@ +"""Tests for 'transformer_class.py" +""" + +#Import transformer_class.py module and inmedded modules +import argparse +from transformer_class import Transformer + +#Initial testing values +TEST_TRANSFORMER = Transformer() +PARSE = argparse.ArgumentParser(description='Test') +TEST_METADATA = {} + +# pylint: disable=assignment-from-no-return +def test_add_parameters(): + """Test for add_parameters function + """ + #Saving method call to variable + testee = TEST_TRANSFORMER.add_parameters(PARSE) + #Should return None + assert testee is None + +def test_transformer_params(): + """Test for transformer params + """ + #Saving method call to variable + testee = TEST_TRANSFORMER.get_transformer_params(PARSE, TEST_METADATA) + #Should return dict type + assert isinstance(testee, dict) diff --git a/base-image/transformer.py b/base-image/transformer.py index 19a8cae..caaf07c 100644 --- a/base-image/transformer.py +++ b/base-image/transformer.py @@ -4,7 +4,7 @@ import argparse import transformer_class -#pylint: disable=unused-argument +# pylint: disable=unused-argument def add_parameters(parser: argparse.ArgumentParser) -> None: """Adds parameters Arguments: diff --git a/base-image/transformer_class.py b/base-image/transformer_class.py index 2459aed..56417e2 100644 --- a/base-image/transformer_class.py +++ b/base-image/transformer_class.py @@ -3,7 +3,7 @@ import argparse -#pylint: disable=unused-argument +# pylint: disable=unused-argument class Transformer(): """Generic class for supporting transformers """ @@ -20,14 +20,30 @@ def add_parameters(self, parser: argparse.ArgumentParser) -> None: parser: instance of argparse """ - #pylint: disable=no-self-use - def get_transformer_params(self, args: argparse.Namespace, metadata: dict) -> dict: + # pylint: disable=no-self-use + def get_transformer_params(self, args: argparse.Namespace, metadata: list) -> dict: """Returns a parameter list for processing data Arguments: args: result of calling argparse.parse_args - metadata: the loaded metadata + metadata: the list of loaded metadata + Return: + A dictionary of parameter names and value to pass to transformer """ self.args = args params = {} return params + + # pylint: disable=no-self-use + def retrieve_files(self, transformer_params: dict, metadata: list) -> tuple: + """Retrieves files as needed to make them available for processing + Arguments: + transformer_params: the values returned from get_transformer_params() call + metadata: the loaded metadata + Return: + A tuple consisting of the return code and an optional error message. + Notes: + A negative return code is considered an error and an associated message, if specified, + will be treated as such. + """ + return 0, "everything's in order"