Skip to content

Commit

Permalink
ENH: Move the private function in nhdplus_attrs to outside to make th…
Browse files Browse the repository at this point in the history
…e function more readable. [skip ci]
  • Loading branch information
Taher Chegini committed May 16, 2024
1 parent 96f364a commit ad174df
Showing 1 changed file with 19 additions and 18 deletions.
37 changes: 19 additions & 18 deletions pynhd/nhdplus_derived.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def enhd_attrs(
"The download was interrupted. Please try again.",
"If the problem persists, please download the file manually",
"from the following link and place it in the cache directory:",
f"{url}",
url,
)
)
raise ServiceError(msg)
Expand Down Expand Up @@ -177,7 +177,7 @@ def nhdplus_vaa(
"The download was interrupted. Please try again.",
"If the problem persists, please download the file manually",
"from the following link and place it in the cache directory:",
f"{url}",
url,
)
)
raise ServiceError(msg)
Expand All @@ -191,6 +191,17 @@ def nhdplus_vaa(
return vaa


def _get_files(item: str) -> dict[str, tuple[str, str]]:
"""Get all the available zip files in an item."""
url = "https://www.sciencebase.gov/catalog/item"
payload = {"fields": "files,downloadUri", "format": "json"}
resp = ar.retrieve_json([f"{url}/{item}"], [{"params": payload}])
resp = cast("list[dict[str, Any]]", resp)
files_url = zip(tlz.pluck("name", resp[0]["files"]), tlz.pluck("url", resp[0]["files"]))
meta = list(tlz.pluck("metadataHtmlViewUri", resp[0]["files"], default=""))[-1]
return {f.replace("_CONUS.zip", ""): (u, meta) for f, u in files_url if ".zip" in f}


def nhdplus_attrs(attr_name: str | None = None) -> pd.DataFrame:
"""Stage the NHDPlus Attributes database and save to nhdplus_attrs.parquet.
Expand Down Expand Up @@ -219,16 +230,6 @@ def nhdplus_attrs(attr_name: str | None = None) -> pd.DataFrame:

main_items = dict(zip(titles, tlz.pluck("id", r["items"])))

def get_files(item: str) -> dict[str, tuple[str, str]]:
"""Get all the available zip files in an item."""
url = "https://www.sciencebase.gov/catalog/item"
payload = {"fields": "files,downloadUri", "format": "json"}
resp = ar.retrieve_json([f"{url}/{item}"], [{"params": payload}])
resp = cast("list[dict[str, Any]]", resp)
files_url = zip(tlz.pluck("name", resp[0]["files"]), tlz.pluck("url", resp[0]["files"]))
meta = list(tlz.pluck("metadataHtmlViewUri", resp[0]["files"], default=""))[-1]
return {f.replace("_CONUS.zip", ""): (u, meta) for f, u in files_url if ".zip" in f}

files = {}
soil = main_items.pop("Soil")
for i, item in main_items.items():
Expand All @@ -238,7 +239,7 @@ def get_files(item: str) -> dict[str, tuple[str, str]]:
titles = tlz.map(lambda s: s.split(":")[1].strip() if ":" in s else s, titles)

child_items = dict(zip(titles, tlz.pluck("id", r["items"])))
files[i] = {t: get_files(c) for t, c in child_items.items()}
files[i] = {t: _get_files(c) for t, c in child_items.items()}

r = sb.get_children(soil)
titles = tlz.pluck("title", r["items"])
Expand All @@ -247,19 +248,19 @@ def get_files(item: str) -> dict[str, tuple[str, str]]:
child_items = dict(zip(titles, tlz.pluck("id", r["items"])))
stat = child_items.pop("STATSGO Soil Characteristics")
ssur = child_items.pop("SSURGO Soil Characteristics")
files["Soil"] = {t: get_files(c) for t, c in child_items.items()}
files["Soil"] = {t: _get_files(c) for t, c in child_items.items()}

r = sb.get_children(stat)
titles = tlz.pluck("title", r["items"])
titles = tlz.map(lambda s: s.split(":")[1].split(",")[1].strip(), titles)
child_items = dict(zip(titles, tlz.pluck("id", r["items"])))
files["STATSGO"] = {t: get_files(c) for t, c in child_items.items()}
files["STATSGO"] = {t: _get_files(c) for t, c in child_items.items()}

r = sb.get_children(ssur)
titles = tlz.pluck("title", r["items"])
titles = tlz.map(lambda s: s.split(":")[1].strip(), titles)
child_items = dict(zip(titles, tlz.pluck("id", r["items"])))
files["SSURGO"] = {t: get_files(c) for t, c in child_items.items()}
files["SSURGO"] = {t: _get_files(c) for t, c in child_items.items()}

chars = []
types = {"CAT": "local", "TOT": "upstream_acc", "ACC": "div_routing"}
Expand Down Expand Up @@ -292,7 +293,7 @@ def get_files(item: str) -> dict[str, tuple[str, str]]:
"The download was interrupted. Please try again.",
"If the problem persists, please download the file manually",
"from the following link and place it in the cache directory:",
f"{url}",
url,
)
)
raise ServiceError(msg)
Expand Down Expand Up @@ -383,7 +384,7 @@ def nhdplus_h12pp(gpkg_path: Path | str | None = None) -> pd.DataFrame:
"The download was interrupted. Please try again.",
"If the problem persists, please download the file manually",
"from the following link and place it in the cache directory:",
f"{url}",
url,
)
)
raise ServiceError(msg)
Expand Down

0 comments on commit ad174df

Please sign in to comment.