Skip to content

Commit

Permalink
changed process download to overview endpoint and introduced single c…
Browse files Browse the repository at this point in the history
…ollection download to speed up validation
  • Loading branch information
claxn committed Jun 29, 2020
1 parent eb6ad11 commit 0a72683
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 22 deletions.
41 changes: 24 additions & 17 deletions src/openeo_pg_parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def load_processes(src):
dict :
Dictionary linking process IDs with the respective process definitions.
Notes
-----
When an URL is given, this function downloads the process specifications from the overview endpoint.
"""

if isinstance(src, dict):
Expand All @@ -74,12 +78,7 @@ def load_processes(src):
elif isinstance(src, str) and url_is_valid(src):
r = requests.get(url=src)
data = r.json()
process_overview_list = data['processes']
process_list = []
for process in process_overview_list:
process_url = src + "/" + process['id']
r = requests.get(url=process_url)
process_list.append(r.json())
process_list = data['processes']
elif isinstance(src, list):
process_list = src
else:
Expand All @@ -93,25 +92,32 @@ def load_processes(src):
return processes


def load_collections(src):
def load_collections(src, collection_ids=None):
"""
Collects collection definitions from a local collections directory, from a URL or a list of collection
definitions.
Parameters
----------
src : dict or str or list, optional
It can be:
- dictionary of loaded collection definitions (keys are the collection ID's)
- directory path to collections (.json)
- URL of the remote collection endpoint (e.g., "https://earthengine.openeo.org/v1.0/collections")
- list of loaded collection definitions
It can be:
- dictionary of loaded collection definitions (keys are the collection ID's)
- directory path to collections (.json)
- URL of the remote collection endpoint (e.g., "https://earthengine.openeo.org/v1.0/collections")
- list of loaded collection definitions
collection_ids : list of str, optional
List of collection ID's used when an URL is given as a source.
Returns
-------
dict :
Dictionary linking collection IDs with the respective collection definitions.
Notes
-----
When an URL is given, this function downloads the collections from each exact collection endpoint.
Note that downloading all collections can take quite some time.
"""

if isinstance(src, dict):
Expand All @@ -121,12 +127,13 @@ def load_collections(src):
filepaths = glob.glob(os.path.join(src, "*.json"))
collection_list = [load_json_file(filepath) for filepath in filepaths]
elif isinstance(src, str) and url_is_valid(src):
r = requests.get(url=src)
data = r.json()
collection_overview_list = data['collections']
if not collection_ids:
r = requests.get(url=src)
data = r.json()
collection_ids = [collection['id'] for collection in data['collections']]
collection_list = []
for collection in collection_overview_list:
collection_url = src + "/" + collection['id']
for collection_id in collection_ids:
collection_url = src + "/" + collection_id
r = requests.get(url=collection_url)
collection_list.append(r.json())
elif isinstance(src, list):
Expand Down
10 changes: 5 additions & 5 deletions src/openeo_pg_parser/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,16 @@ def validate_collections(process_graph, collections_src):
If True, the given process graph is valid with respect to the given process definitions.
"""

collection_defs = load_collections(collections_src)

err_msgs = []
for node in process_graph.nodes:
if node.process_id == 'load_collection':
collection_id = node.arguments['id']
collection_defs = load_collections(collections_src, collection_ids=[collection_id])
if node.arguments['id'] not in collection_defs.keys():
err_msg = "'{}' is not in the current set of collections.".format(node.arguments['id'])
err_msg = "'{}' is not in the current set of collections.".format(collection_id)
err_msgs.append(err_msg)
else:
collection = collection_defs[node.arguments['id']]
collection = collection_defs[collection_id]
collection_dims = collection['cube:dimensions']
available_bands = []
for _, collection_dim in collection_dims.items():
Expand All @@ -92,7 +92,7 @@ def validate_collections(process_graph, collections_src):
for available_band in available_bands])
err_msg = "'{}' is not a valid band name for collection '{}' " \
"with the following bands: {}.".format(node_band,
collection['id'],
collection_id,
available_bands_str)
err_msgs.append(err_msg)

Expand Down

0 comments on commit 0a72683

Please sign in to comment.