misc. upgrades

mheberger · Sep 28, 2023 · 850da7a · 850da7a
1 parent 58df7fa
commit 850da7a
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 70 deletions.
diff --git a/README.md b/README.md
@@ -95,7 +95,7 @@ Unzip these files and save them to a folder on your hard drive. Then, in `config
 
 ## <a name="step3">3. Download simplified MERIT-Basins data</a>
 
-In "low-resolution" mode, the script will look for shapefiles with *simplified* unit catchment boundaries. I have created these files and you can download them here:
+In "low-resolution" mode, the script will look for shapefiles with *simplified* unit catchment boundaries. I have created these files, and you can download them here:
 
 [https://mghydro.com/watersheds/share/catchments\_simplified.zip](https://mghydro.com/watersheds/share/catchments_simplified.zip)
 
@@ -258,16 +258,3 @@ they are just much faster to load.
 # Contributing
 
 If you have any comments or suggestions, please get in touch. If you find a bug, you can report an issue here on GitHub. Finally, this code is open source, so if you are motivated to make any modifications, additions, or bug fixes, you can fork the repository and then do a pull request on GitHub. 
-
-# Version History
-
-## v1.2, 2025-09-28
-- updated code so it can handle input shapefiles with missing .prj files -- the latest version of MERIT-Basins does not include.
-- added ability to save GeoDataFrames as Python pickle files. This should speed up one of the slow steps, 
-- reading shapefiles and constructing the spatial index. Especially if you use the script multiple times.
-- Considered upgrading various Python libraries (namely pysheds and shapely), but it looks to be a major effort. 
-
-## v1.1, 2025-09-21
- - Bug fix for rare issue where delineated watershed contains MultiPolygons instead of Polygons
-
-
diff --git a/config.py b/config.py
@@ -4,7 +4,8 @@
 Edit this file carefully before running delineator.py
 
 See README for more information about the options and for instructions
-on how to download the input data. 
+on how to download the input data for delineating watersheds in areas
+outside of the sample data provided for Iceland.
 
 """
 
@@ -24,7 +25,7 @@
 MERIT_ACCUM_DIR = "data/raster/accum_basins"
 
 # Set to True if you want the script to write status messages to the console
-VERBOSE = False
+VERBOSE = True
 
 # Set to True to make a bunch of plots of each watershed.
 # (Just for debugging. Slows down the script a lot.)
@@ -45,11 +46,11 @@
 # Folder where the script will write the output GeoJSON files or shapefiles
 OUTPUT_DIR = "output"
 
-# The file extension will determine the types of files the script creates.
+# The file extension will determine the types of geodata files the script creates.
 #   "gpkg" for GeoPackage (recommended)
 #   "geojson" for GeoJSON files
 #   "shp" for shapefile
-# Use a blank string "" if you DO NOT want any output (for example,
+# Use a blank string "" if you DO NOT want any geodata files (for example,
 # you are only making the interactive map and don't need geodata).
 # Other file formats are available;
 # see: https://geopandas.org/en/stable/docs/user_guide/io.html#writing-spatial-data
@@ -61,7 +62,8 @@
 # Directory to store Python pickle files. Because it can be slow for Python to
 # read shapefiles and create a GeoDataFrame. Once you have done this once, you
 # can save time in the future by storing the GeoDataFrame as a .pkl file.
-# The script will not search for pickle files if you leave this as a blank string, ''
+# Enter a blank string, '' if you do NOT want the script to create .pkl files.
+# Please note that these files can be large! (Up to around 1 GB for large basins.)
 PICKLE_DIR = 'pkl'
 
 # Threshold for watershed size in km² above which the script will revert to
@@ -85,12 +87,11 @@
 # from the watershed boundary and output smaller files.
 SIMPLIFY = False
 
-# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees. 
-# Note that the vector polygons
+# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees.
 SIMPLIFY_TOLERANCE = 0.0008
 
 # Set to TRUE if you want the script to create a local web page where you 
-# can review the results
+# can review the results.
 MAKE_MAP = True
 
 # Folder where the script should put the map files. (MAKE sure it exists!)
@@ -100,19 +101,20 @@
 # On the map, do you also want to include the rivers?
 MAP_RIVERS = True
 
-# If you mapped the rivers, how many stream orders to include?
-# I recommend 4 or 5. More than this and the browser may not display all the rivers in a large watershed.
+# On the web page map, if MAP_RIVERS is True, how many stream orders shall we display?
+# I recommend 4 or less. More than this and the browser may not display all the rivers in a large watershed.
 NUM_STREAM_ORDERS = 3
 
 # Set to True to use the experimental match areas feature. 
-# You must include watershed areas in your outlets CSV file to use this feature. 
+# You must include the field `area` in your outlets CSV file to use this feature (in km²)
 MATCH_AREAS = False
 
 # If you set MATCH_AREAS = True, how close of a match should the script look for?
-# Enter 0.25 for 25%. If you have not entered areas in your CSV file, you can ignore this parameter.
+# Enter 0.25 for 25%. If you set MATCH_AREAS to False you can ignore this parameter.
 AREA_MATCHING_THRESHOLD = 0.25
 
 # If you set MATCH_AREAS = True, how far away from the original outlet point should the script look 
 # for a river reach that is a better match in terms of upstream area?
-# Units are decimal degrees (not a proper distance measurement!)
+# Units are decimal degrees (sorry, not a proper distance measurement, this feature could be improved!)
+# 0.1° is about 11 km near the equator, and about 8 km near at a latitude of 45°
 MAX_DIST = 0.075
diff --git a/delineate.py b/delineate.py
@@ -40,41 +40,21 @@
 import pyproj
 from functools import partial
 from config import *
+from py.mapper import make_map, create_folder_if_not_exists
 
 if PLOTS:
     import matplotlib.pyplot as plt
 
 if HIGH_RES:
     import py.merit_detailed
 
-if MAKE_MAP:
-    from py.mapper import make_map
 
 gpd.options.use_pygeos = True
 
 # The WGS84 projection string, used in a few places
 PROJ_WGS84 = 'EPSG:4326'
 
 
-def create_folder_if_not_exists(folder_path: str) -> bool:
-    """
-    checks for the existence of a folder and tries to create it if it doesn't exist.
-
-    returns:
-        True if the folder already exists or could be created successfully
-        False if it couldn't create the folder
-    """
-    try:
-        if not os.path.exists(folder_path):
-            os.makedirs(folder_path)
-        return True
-    except Exception as e:
-        # Handle the specific exception that might occur during folder creation.
-        # You can customize this error handling based on your needs.
-        print(f"An error occurred: {e}")
-        return False
-
-
 def validate(gages_df: pd.DataFrame) -> bool:
     """
     After we have read in the gages CSV, check whether it appears to be OK
@@ -122,6 +102,7 @@ def validate(gages_df: pd.DataFrame) -> bool:
 
     return True
 
+
 def validate_search_distance():
     """
     For the pour point relocation routine. SEARCH_DIST governs how far away we'll look for
@@ -274,8 +255,8 @@ def addnode(B, node):
     def find_close_catchment():
         """
         Part of my simple pour point relocation method. If the outlet falls in a unit catchment
-        whose area is a mismatch of our a priori estimate,
-        look around the neighborhood for a unit catchment whose area that matches more closely
+        whose area is a mismatch of our a priori estimate of the upstream area,
+        look around the neighborhood for a unit catchment whose area matches more closely
         """
         # Find the river segment that is within a bounding box around the point
         dist = 0.01
@@ -342,14 +323,30 @@ def plot_basins():
         plt.savefig(f"plots/{wid}_vector_unit_catch.png")
         plt.close(fig)
 
+    # Check that the OUTPUT directories are there. If not, try to create them.
+    folder_exists =  create_folder_if_not_exists(OUTPUT_DIR)
+    if not folder_exists:
+        raise Exception(f"No folder for output. Stopping")
+
+    # Check for the folder to put Python PICKLE files
+    if PICKLE_DIR != "":
+        folder_exists =  create_folder_if_not_exists(OUTPUT_DIR)
+        if not folder_exists:
+            raise Exception(f"No folder for pickle files. Stopping")
+
+    # Check if the MAP_FOLDER is there
+    if MAKE_MAP:
+        folder_exists = create_folder_if_not_exists(MAP_FOLDER)
+        if not folder_exists:
+            raise Exception(f"No folder for the map files. Stopping")
+
     # Check that the CSV file is there
     if not os.path.isfile(OUTLETS_CSV):
         raise Exception(f"Could not your outlets file at: {OUTLETS_CSV}")
 
-    if VERBOSE: print(f"Reading your outlets data in: {OUTLETS_CSV}")
-
-    # Create a Pandas DataFrame for the outlet points
+    # Read the outlet points CSV file and put into a Pandas DataFrame
     # (I call the outlet points gages, because I usually in delineated watersheds at streamflow gages)
+    if VERBOSE: print(f"Reading your outlets data in: {OUTLETS_CSV}")
     gages_df = pd.read_csv(OUTLETS_CSV, header=0, dtype={'id': 'str', 'lat': 'float', 'lng': 'float'})
 
     # Check that the CSV file includes at a minimum: id, lat, lng and that all values are appropriate
@@ -439,7 +436,7 @@ def plot_basins():
         # Create a dataframe of the gages_basins_join in that basins
         gages_in_basin = gages_basins_join[gages_basins_join["BASIN"] == basin]
         num_gages_in_basin = len(gages_in_basin)
-        print("\nBeginning delineation for %s outlet point(s) in Level 2 Basin #%s." % (num_gages_in_basin, basin))
+        if VERBOSE: print("\nBeginning delineation for %s outlet point(s) in Level 2 Basin #%s." % (num_gages_in_basin, basin))
 
         if HIGH_RES:
             catchments_gdf = load_gdf("catchments", basin, True)
@@ -481,19 +478,26 @@ def plot_basins():
         # Iterate over the gages and assemble the watershed
         for i in range(0, num_gages_in_basin):
             gages_counter += 1
+            if VERBOSE: print(f"\n* Delineating watershed {gages_counter} of {n_gages}, with outlet id = {wid}")
+
+            # Reset the local boolean flag for high-res mode. If the watershed is too big, script will switch to low.
             bool_high_res = HIGH_RES
 
-            # Let wid be the watershed ID
+            # Let wid be the watershed ID. Get the lat, lng coords of the gage.
             wid = gages_joined['id'].iloc[i]
             lat = gages_joined['lat'].iloc[i]
             lng = gages_joined['lng'].iloc[i]
 
             # The terminal comid is the unit catchment that contains (overlaps) the outlet point
             terminal_comid = gages_joined['COMID'].iloc[i]
 
-            # If the user provided the area, check whether it is a good match.
-            # If it is not, look around for a better match in the neighborhood.
+            # Get the upstream area of the unit catchment we found, according to MERIT-Basins
             up_area = rivers_gdf.loc[terminal_comid].uparea
+
+            # If MATCH_AREAS is True and the user provided the area in the outlets CSV file,
+            # the script will check whether the upstream area of the unit catchment is a good match.
+            # If the areas do not match well, look around the neighborhood for another unit catchment
+            # whose area is a closer match to what we think it is.
             if bAreas and MATCH_AREAS:
                 area_reported = gages_df.loc[wid].area_reported
                 PD_area = abs((area_reported - up_area) / area_reported)
@@ -504,20 +508,17 @@ def plot_basins():
                     candidate_comid, up_area = find_close_catchment()
                     if candidate_comid is None:
                         failed[wid] = "Could not find a nearby river reach whose upstream area is " \
-                                      "within {}% of reported area of {:,.0f} km²"\
-                                      .format(AREA_MATCHING_THRESHOLD * 100, area_reported)
+                                      "within {}% of reported area of {:,.0f} km²" \
+                            .format(AREA_MATCHING_THRESHOLD * 100, area_reported)
                         continue
                     else:
                         terminal_comid = candidate_comid
 
-            # Let B be the list of unit catchments_gdf that are in the basin
+            # Let B be the list of unit catchments (and river reaches) that are in the basin
             B = []
 
-            # add the first node, and the rest will be added, as the function is recursive.
-            print(f"\n*** DELINEATING watershed {gages_counter} of {n_gages}, with outlet id = {wid}")
+            # Add the first node, and the rest will be added recursively
             addnode(B, terminal_comid)
-
-            # Now we have a list B containing the COMIDs of rivers and unit watersheds.
             if VERBOSE: print(f"  found {len(B)} unit catchments in the watershed")
 
             # If the watershed is too big, revert to low-precision mode.
@@ -637,7 +638,6 @@ def plot_basins():
             # because we need it in a .js file assigned to a variable, to avoid cross-origin restrictions
             # of modern web browsers.
             if MAKE_MAP:
-                create_folder_if_not_exists(MAP_FOLDER)
                 watershed_js = f"{MAP_FOLDER}/{wid}.js"
                 with open(watershed_js, 'w') as f:
                     s = f"gage_coords = [{lat}, {lng}];\n"

diff --git a/py/mapper.py b/py/mapper.py
@@ -2,20 +2,40 @@
 Makes a nice little map in an .html file for viewing in a web browser
 using a bit of javascript and Leaflet.
 """
-
+import pandas as pd
 from jinja2 import Template
-from config import MAP_FOLDER, HIGH_RES
+from config import MAP_FOLDER, VERBOSE
+import os
 
+def create_folder_if_not_exists(folder_path: str) -> bool:
+    """
+    Check if a folder exists at the specified path. If not, create it.
 
-def make_map(df):
+    :param folder_path: The path of the folder to check/create.
+    :return: True if the folder exists or is created, False otherwise.
+    """
+    try:
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+            if VERBOSE: print(f"Created folder: {folder_path}")
+        else:
+            pass  # Folder is already there
+
+        return True
+
+    except Exception as e:
+        print(f"Error creating folder: {e}")
+        return False
+
+def make_map(df: pd.DataFrame) -> bool:
     """
 
     input:
         df: a Pandas dataframe with the cols [id, lat, lng, name, area_reported, area_calc, result]
 
     returns:
         True if it successfully wrote the file viewer.html.
-        Otherwise, will raise some kind of error message
+        Otherwise, the function will raise some kind of error message
 
     """
     # Reorganize the columns a bit so it looks good.
@@ -83,7 +103,10 @@ def make_map(df):
         cols=columns
     )
 
-    viewer_fname = "{}/_viewer.html".format(MAP_FOLDER)
+    # Make sure the folder is there. If not, try to create it.
+    create_folder_if_not_exists(MAP_FOLDER)
+
+    viewer_fname = f"{MAP_FOLDER}/_viewer.html"
     f = open(viewer_fname, 'w')
     f.write(html)
     f.close()