From 0a7268357a7d9448d0f63e8bba487fe8e820b8c8 Mon Sep 17 00:00:00 2001 From: claxn Date: Mon, 29 Jun 2020 16:36:56 +0200 Subject: [PATCH] changed process download to overview endpoint and introduced single collection download to speed up validation --- src/openeo_pg_parser/utils.py | 41 +++++++++++++++++++------------- src/openeo_pg_parser/validate.py | 10 ++++---- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/openeo_pg_parser/utils.py b/src/openeo_pg_parser/utils.py index bd33fe7..0b4d3f7 100644 --- a/src/openeo_pg_parser/utils.py +++ b/src/openeo_pg_parser/utils.py @@ -63,6 +63,10 @@ def load_processes(src): dict : Dictionary linking process IDs with the respective process definitions. + Notes + ----- + When an URL is given, this function downloads the process specifications from the overview endpoint. + """ if isinstance(src, dict): @@ -74,12 +78,7 @@ def load_processes(src): elif isinstance(src, str) and url_is_valid(src): r = requests.get(url=src) data = r.json() - process_overview_list = data['processes'] - process_list = [] - for process in process_overview_list: - process_url = src + "/" + process['id'] - r = requests.get(url=process_url) - process_list.append(r.json()) + process_list = data['processes'] elif isinstance(src, list): process_list = src else: @@ -93,7 +92,7 @@ def load_processes(src): return processes -def load_collections(src): +def load_collections(src, collection_ids=None): """ Collects collection definitions from a local collections directory, from a URL or a list of collection definitions. @@ -101,17 +100,24 @@ def load_collections(src): Parameters ---------- src : dict or str or list, optional - It can be: - - dictionary of loaded collection definitions (keys are the collection ID's) - - directory path to collections (.json) - - URL of the remote collection endpoint (e.g., "https://earthengine.openeo.org/v1.0/collections") - - list of loaded collection definitions + It can be: + - dictionary of loaded collection definitions (keys are the collection ID's) + - directory path to collections (.json) + - URL of the remote collection endpoint (e.g., "https://earthengine.openeo.org/v1.0/collections") + - list of loaded collection definitions + collection_ids : list of str, optional + List of collection ID's used when an URL is given as a source. Returns ------- dict : Dictionary linking collection IDs with the respective collection definitions. + Notes + ----- + When an URL is given, this function downloads the collections from each exact collection endpoint. + Note that downloading all collections can take quite some time. + """ if isinstance(src, dict): @@ -121,12 +127,13 @@ def load_collections(src): filepaths = glob.glob(os.path.join(src, "*.json")) collection_list = [load_json_file(filepath) for filepath in filepaths] elif isinstance(src, str) and url_is_valid(src): - r = requests.get(url=src) - data = r.json() - collection_overview_list = data['collections'] + if not collection_ids: + r = requests.get(url=src) + data = r.json() + collection_ids = [collection['id'] for collection in data['collections']] collection_list = [] - for collection in collection_overview_list: - collection_url = src + "/" + collection['id'] + for collection_id in collection_ids: + collection_url = src + "/" + collection_id r = requests.get(url=collection_url) collection_list.append(r.json()) elif isinstance(src, list): diff --git a/src/openeo_pg_parser/validate.py b/src/openeo_pg_parser/validate.py index 090b7b9..8572c9e 100644 --- a/src/openeo_pg_parser/validate.py +++ b/src/openeo_pg_parser/validate.py @@ -67,16 +67,16 @@ def validate_collections(process_graph, collections_src): If True, the given process graph is valid with respect to the given process definitions. """ - collection_defs = load_collections(collections_src) - err_msgs = [] for node in process_graph.nodes: if node.process_id == 'load_collection': + collection_id = node.arguments['id'] + collection_defs = load_collections(collections_src, collection_ids=[collection_id]) if node.arguments['id'] not in collection_defs.keys(): - err_msg = "'{}' is not in the current set of collections.".format(node.arguments['id']) + err_msg = "'{}' is not in the current set of collections.".format(collection_id) err_msgs.append(err_msg) else: - collection = collection_defs[node.arguments['id']] + collection = collection_defs[collection_id] collection_dims = collection['cube:dimensions'] available_bands = [] for _, collection_dim in collection_dims.items(): @@ -92,7 +92,7 @@ def validate_collections(process_graph, collections_src): for available_band in available_bands]) err_msg = "'{}' is not a valid band name for collection '{}' " \ "with the following bands: {}.".format(node_band, - collection['id'], + collection_id, available_bands_str) err_msgs.append(err_msg)