-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathcheck_metrics.py
102 lines (91 loc) · 3.25 KB
/
check_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import click
import numpy as np
import pandas as pd
from pandas.testing import assert_series_equal
from pathlib import Path as P
from stream_cat_config import OUT_DIR, FINAL_DIR
ACC = "Allocation_and_Accumulation"
FTP = "FTP_Staging/HydroRegions"
ODV = P("O:/PRIV/CPHEA/PESD/COR/CORFiles/Geospatial_Library_Projects/StreamCat")
config_error = (
"\n\tImproperly Configured!!!\n\n\tYour `stream_cat_config.py` file's "
"`{}` value\n\n\twill need to point to a directory other than:\n\n\t> {}")
check_file_error = (
"\n\tFile Missing!!!\n\n\tMetric -- `{}` does not exist in:\n\n\t> {}"
)
@click.command()
@click.option(
"--debug",
"-d",
show_default=True,
is_flag=True,
help="enter pdb debugger on fail",
default=False,
)
@click.option(
"--precision", "-p",
type=float,
show_default=True,
help="value threshold on comparison",
default=0.00001,
)
@click.option(
"--final", "-f",
default=False,
show_default=True,
is_flag=True,
help="check against final tables, default is alloc/accum",
)
@click.argument("metrics", nargs=-1)
def compare(debug, metrics, final, precision):
"""Assert that values from another run of StreamCat align with
the base run that we have on th O: drive. It is imperative that
the 'OUT_DIR` or `FINAL_DIR` paths in `stream_cat_config.py`
point to a place other than where our data is on the O:
\b
assertions:
* column names match
* table lengths match
* COMIDs are equivalent
* equality across columns w/in `precision`
example:
`$ python check_metrics.py compare nlcd2006 Clay Dams`
"""
check_dir = P(FINAL_DIR) if final else P(OUT_DIR)
alloc_dir = ODV.joinpath(FTP if final else ACC)
if (check_dir == alloc_dir or
check_dir.match("*Geospatial_Library_Projects*")):
click.echo(
config_error.format(
"FINAL_DIR" if final else "OUT_DIR", check_dir))
exit()
INPUTS = np.load("accum_npy/vpu_inputs.npy", allow_pickle=True).item()
for metric in metrics:
print("Checking --", metric)
for zone in INPUTS:
und = "" if final else "_"
fn = f"{metric}{und}{zone}.csv"
if not (check_dir / fn).exists():
click.echo(check_file_error.format(fn, OUT_DIR))
exit()
print(zone, end="..", flush=True)
t1 = pd.read_csv(alloc_dir / fn).set_index("COMID").sort_index()
t2 = pd.read_csv(check_dir / fn).set_index("COMID").sort_index()
try:
assert t1.index.equals(t2.index)
assert t1.columns.equals(t2.columns)
for col in t1.columns:
if "StorM3" in col: # N/A values won't compare in Dams summary
continue
assert_series_equal(t1[col],t2[col], check_dtype=False,
atol=precision, rtol=precision)
except AssertionError as e:
if debug:
print(e, f"\n\ncolumn: {col}\n")
import pdb
pdb.set_trace()
else:
raise e
print("good!")
if __name__ == "__main__":
compare()