Merge pull request #3 from Hitchwiki/tillwenke/replace-print-with-logger

Replace print statements with logger.info
Hitchwiki · Jan 12, 2025 · 0453e72 · 0453e72
2 parents f4068c5 + eff08f7
commit 0453e72
Show file tree

Hide file tree

Showing 9 changed files with 67 additions and 41 deletions.
diff --git a/heatchmap/map_based_model.py b/heatchmap/map_based_model.py
@@ -1,6 +1,7 @@
 """Map-based model for hitchhiking waiting times."""
 import os
 import time
+import logging
 
 import geopandas as gpd
 import matplotlib.colors as colors
@@ -42,6 +43,9 @@
 
 HERE = os.path.dirname(os.path.abspath(__file__))
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 class MapBasedModel(BaseEstimator, RegressorMixin):
     def __init__(
         self,
@@ -273,7 +277,7 @@ def build_map(
         # from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/
 
         if self.verbose:
-            print("Loading country shapes...")
+            logger.info("Loading country shapes...")
         countries = gpd.read_file(
             "countries/ne_110m_admin_0_countries.shp"
         )
@@ -289,15 +293,15 @@ def build_map(
                 facecolor="none",
                 edgecolor="black",
             )
-            print(f"Time elapsed to load countries: {time.time() - start}")
+            logger.info(f"Time elapsed to load countries: {time.time() - start}")
 
         # use a pre-compiles list of important roads
         # download https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip
         # from https://www.naturalearthdata.com/downloads/10m-cultural-vectors/
         if show_roads:
             start = time.time()
             if self.verbose:
-                print("Loading roads...")
+                logger.info("Loading roads...")
             roads = gpd.read_file("map_features/roads/ne_10m_roads.shp")
             roads = roads.to_crs(epsg=3857)
             roads = roads[roads.geometry.within(self.map_to_polygon())]
@@ -308,7 +312,7 @@ def build_map(
                 color="gray",
                 zorder=2,
             )
-            print(f"Time elapsed to load roads: {time.time() - start}")
+            logger.info(f"Time elapsed to load roads: {time.time() - start}")
 
         # takes a lot of time
         # use a pre-compiled list of important cities
@@ -317,39 +321,39 @@ def build_map(
         if show_cities:
             start = time.time()
             if self.verbose:
-                print("Loading cities...")
+                logger.info("Loading cities...")
             cities = gpd.read_file("map_features/cities/ne_10m_populated_places.shp")
             cities = cities.to_crs(epsg=3857)
             cities = cities[cities.geometry.within(self.map_to_polygon())]
             cities = cities[cities.geometry.within(unary_union(country_shapes))]
             cities.plot(
                 ax=ax, markersize=1.0 * figsize, color="navy", marker="o", zorder=10
             )
-            print(f"Time elapsed to load cities: {time.time() - start}")
+            logger.info(f"Time elapsed to load cities: {time.time() - start}")
 
         if show_points:
             start = time.time()
             points.plot(ax=ax, markersize=10, color="red")
-            print(f"Time elapsed to load points: {time.time() - start}")
+            logger.info(f"Time elapsed to load points: {time.time() - start}")
 
         # limit heatmap to landmass by asigning no data value to sea
         if self.verbose:
-            print("Transforming heatmap...")
+            logger.info("Transforming heatmap...")
         nodata = np.nan
         with rasterio.open(self.rasterio_path) as heatmap:
             start = time.time()
             max_map_wait = heatmap.read().max()
             min_map_wait = heatmap.read().min()
             if self.verbose:
-                print("max map waiting time:", max_map_wait)
+                logger.info("max map waiting time:", max_map_wait)
             if self.verbose:
-                print("min map waiting time:", min_map_wait)
+                logger.info("min map waiting time:", min_map_wait)
 
             out_image, out_transform = rasterio.mask.mask(
                 heatmap, country_shapes, nodata=nodata
             )
             out_meta = heatmap.meta
-            print(f"Time elapsed to transform heatmap: {time.time() - start}")
+            logger.info(f"Time elapsed to transform heatmap: {time.time() - start}")
 
         out_meta.update(
             {
@@ -365,7 +369,7 @@ def build_map(
             destination.write(out_image)
 
         # plot the heatmap
-        print("Plotting heatmap...") if self.verbose else None
+        logger.info("Plotting heatmap...") if self.verbose else None
         raster = rasterio.open(new_map_path)
 
         # TODO smoother spectrum instead of buckets
@@ -418,7 +422,7 @@ def build_map(
                 np.float64
             )  # matplotlib cannot handle float128
             self.uncertainties = uncertainties
-            print(f"Time elapsed to load uncertainties: {time.time() - start}")
+            logger.info(f"Time elapsed to load uncertainties: {time.time() - start}")
         else:
             uncertainties = 1.0
 
@@ -545,4 +549,4 @@ def build_map(
             else:
                 file_name = f"maps/{self.method}_{self.region}_{self.resolution}.png"
         plt.savefig(file_name, bbox_inches="tight")
-        plt.show()
+        plt.show()
diff --git a/heatchmap/models.py b/heatchmap/models.py
@@ -3,6 +3,7 @@
 from shapely.geometry import Point
 from sklearn.base import BaseEstimator, RegressorMixin
 from tqdm.auto import tqdm
+import logging
 
 from .map_based_model import MapBasedModel
 
@@ -13,6 +14,8 @@
 # 180 degree meridian in epsg 3857
 MERIDIAN = 20037508
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 class Average(BaseEstimator, RegressorMixin):
     def __init__(self):
@@ -175,7 +178,7 @@ def fit(
             # create a raster map - resulution is defined above
             # https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining
             if self.verbose:
-                print("Weighting gaussians for all points...")
+                logger.info("Weighting gaussians for all points...")
             with tqdm(
                 zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose
             ) as t:
@@ -237,4 +240,4 @@ def predict(self, X):
             # read the raster at the given coordinates
             predictions.append(self.rasterio_raster.read(1)[x, y])
 
-        return np.array(predictions)
+        return np.array(predictions)
diff --git a/heatchmap/utils/numeric_transformers.py b/heatchmap/utils/numeric_transformers.py
@@ -1,12 +1,15 @@
 # from https://github.com/scikit-learn/scikit-learn/issues/24638
 
 from enum import Enum
+import logging
 
 import numpy as np
 import pandas as pd
 from scipy.stats import skew
 from sklearn.base import BaseEstimator, TransformerMixin
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 # see https://en.wikipedia.org/wiki/Log-normal_distribution#Arithmetic_moments
 # needed for Target transformer when we have std
@@ -197,7 +200,7 @@ def get_transformer_with_least_skew(
 
         transformed_y = transformer.func(y_array)
         transformed_skew = abs(skew(transformed_y))
-        print(f"Skew for {t}: {transformed_skew}")
+        logger.info(f"Skew for {t}: {transformed_skew}")
 
         if transformed_skew < min_skew:
             min_skew = transformed_skew

diff --git a/heatchmap/utils/plotting.py b/heatchmap/utils/plotting.py
@@ -3,12 +3,16 @@
 import pandas as pd
 from matplotlib import pyplot as plt
 from matplotlib.colors import LogNorm
+import logging
 
 from .numeric_transformers import exp_minus_tiny, log_plus_tiny
 from .utils_data import get_points
 from .utils_map import *
 from .utils_models import TargetTransformer
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 
 def plot_distribution_of_data_points():
     points = get_points("../data/points_train.csv")
@@ -46,11 +50,11 @@ def plot_distribution_of_data_points():
 
     plt.show()
 
-    print(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %")
-    print(
+    logger.info(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %")
+    logger.info(
         f"Europe without Germany: {round(len(europe_without_germany_data) / len(points) * 100, 2)} %"
     )
-    print(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %")
+    logger.info(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %")
 
 
 def plot_1d_model_comparison(

diff --git a/heatchmap/utils/transformed_target_regressor_with_uncertainty.py b/heatchmap/utils/transformed_target_regressor_with_uncertainty.py
@@ -1,11 +1,11 @@
-# from https://github.com/scikit-learn/scikit-learn/issues/24638
-
-
+import logging
 from sklearn.base import BaseEstimator
 from sklearn.compose import TransformedTargetRegressor
 
 from .numeric_transformers import Transformer
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 class TransformedTargetRegressorWithUncertainty(TransformedTargetRegressor):
     """Thin wrapper over sklearn.compose.TransformedTargetRegressor.
@@ -40,7 +40,7 @@ def fit(self, X, y, **fit_params):
     def predict(self, X, return_std=False, transform_predictions=True, verbose=True, **predict_params):
         """Predict using the underlying regressor and transform the result back.
         """
-        print(f"Model called for prediction with X of shape {X.shape}")
+        logger.info(f"Model called for prediction with X of shape {X.shape}")
         # always return the standard deviation as it is required for the proper inverse_transform
         # regressor_ is the fitted regressor
         model: BaseEstimator = self.regressor_

diff --git a/heatchmap/utils/utils_data.py b/heatchmap/utils/utils_data.py
@@ -1,4 +1,5 @@
 import sqlite3
+import logging
 
 import geopandas as gpd
 import pandas as pd
@@ -7,6 +8,9 @@
 
 from .utils_map import *
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 DAY = 24 * 60
 WAIT_MAX = DAY
 
@@ -42,7 +46,7 @@ def get_points(path, wait_max=WAIT_MAX, begin:pd.Timestamp=pd.Timestamp.min, unt
     points.crs = CRS.from_epsg(4326)
     points = points.to_crs(epsg=3857)
 
-    print(f"Got {len(points)} points from {begin.date()} to {until.date()}.")
+    logger.info(f"Got {len(points)} points from {begin.date()} to {until.date()}.")
 
     return points
 

diff --git a/heatchmap/utils/utils_map.py b/heatchmap/utils/utils_map.py
@@ -1,4 +1,5 @@
 import time
+import logging
 
 import geopandas as gpd
 import numpy as np
@@ -15,6 +16,8 @@
 
 RESOLUTION = 2
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 def save_numpy_map(
     map,
@@ -109,7 +112,7 @@ def raster_from_model(
 
     # transposing the grid enables us to iterate over it vertically
     # and single elements become lon-lat pairs that can be fed into the model
-    print("Compute rows of pixels...")
+    logger.info("Compute rows of pixels...")
     start = time.time()
     for vertical_line in tqdm(grid.transpose(), disable=not verbose):
         if show_uncertainties:
@@ -119,8 +122,8 @@ def raster_from_model(
             pred = model.predict(vertical_line)
         map = np.vstack((map, pred))
 
-    print(f"Time elapsed to compute full map: {time.time() - start}")
-    print(
+    logger.info(f"Time elapsed to compute full map: {time.time() - start}")
+    logger.info(
         f"For map of shape: {map.shape} that is {map.shape[0] * map.shape[1]} pixels and a time per pixel of {(time.time() - start) / (map.shape[0] * map.shape[1])} seconds"
     )
 

diff --git a/heatchmap/utils/utils_models.py b/heatchmap/utils/utils_models.py
@@ -5,6 +5,7 @@
 from sklearn.metrics import mean_absolute_error, root_mean_squared_error
 from sklearn.model_selection import cross_validate
 from sklearn.utils._testing import ignore_warnings
+import logging
 
 from .numeric_transformers import MyLogTransformer
 from .transformed_target_regressor_with_uncertainty import TransformedTargetRegressorWithUncertainty
@@ -37,15 +38,15 @@ def fit_transform(self, y):
 def evaluate(model, train, validation, features=["lon", "lat"]):
     train["pred"] = model.predict(train[features].values)
 
-    print(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}")
-    print(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}")
+    logger.info(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}")
+    logger.info(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}")
 
     validation["pred"] = model.predict(validation[features].values)
 
-    print(
+    logger.info(
         f"Validation RMSE: {root_mean_squared_error(validation['wait'], validation['pred'])}"
     )
-    print(
+    logger.info(
         f"Validation MAE {mean_absolute_error(validation['wait'], validation['pred'])}\n"
     )
 
@@ -62,15 +63,15 @@ def evaluate_cv(estimator, X, y, folds=5):
         return_estimator=True,
     )
 
-    print("Cross-validated averaged metrics...")
-    print(
+    logger.info("Cross-validated averaged metrics...")
+    logger.info(
         f"Training RMSE: {cv_result['train_neg_root_mean_squared_error'].mean() * -1}"
     )
-    print(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}")
-    print(
+    logger.info(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}")
+    logger.info(
         f"Validation RMSE: {cv_result['test_neg_root_mean_squared_error'].mean() * -1}"
     )
-    print(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n")
+    logger.info(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n")
 
     # returning one estimators trained on all samples for visualization purposes
     return estimator.fit(X, y)
@@ -102,9 +103,9 @@ def fit_gpr(gpr, X, y):
 
 @ignore_warnings(category=ConvergenceWarning)
 def fit_gpr_silent(gpr, X, y):
-    print(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...")
+    logger.info(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...")
     gpr.fit(X, y)
-    print("Fitting done.")
+    logger.info("Fitting done.")
 
     return gpr
 

diff --git a/heatchmap/weighted_average_gaussian.py b/heatchmap/weighted_average_gaussian.py
@@ -2,6 +2,7 @@
 import pandas as pd
 from shapely.geometry import Point
 from tqdm.auto import tqdm
+import logging
 
 from .map_based_model import MapBasedModel
 
@@ -12,6 +13,9 @@
 # 180 degree meridian in epsg 3857
 MERIDIAN = 20037508
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 class WeightedAveragedGaussian(MapBasedModel):
     def __init__(
         self, region="world", method="ordinary", resolution=RESOLUTION, verbose=False
@@ -92,7 +96,7 @@ def fit(
             # create a raster map - resulution is defined above
             # https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining
             if self.verbose:
-                print("Weighting gaussians for all points...")
+                logger.info("Weighting gaussians for all points...")
             with tqdm(
                 zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose
             ) as t:
@@ -154,4 +158,4 @@ def predict(self, X):
             # read the raster at the given coordinates
             predictions.append(self.rasterio_raster.read(1)[x, y])
 
-        return np.array(predictions)
+        return np.array(predictions)