Skip to content

Commit

Permalink
Merge pull request #3 from Hitchwiki/tillwenke/replace-print-with-logger
Browse files Browse the repository at this point in the history
Replace print statements with logger.info
  • Loading branch information
tillwenke authored Jan 12, 2025
2 parents f4068c5 + eff08f7 commit 0453e72
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 41 deletions.
32 changes: 18 additions & 14 deletions heatchmap/map_based_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Map-based model for hitchhiking waiting times."""
import os
import time
import logging

import geopandas as gpd
import matplotlib.colors as colors
Expand Down Expand Up @@ -42,6 +43,9 @@

HERE = os.path.dirname(os.path.abspath(__file__))

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class MapBasedModel(BaseEstimator, RegressorMixin):
def __init__(
self,
Expand Down Expand Up @@ -273,7 +277,7 @@ def build_map(
# from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/

if self.verbose:
print("Loading country shapes...")
logger.info("Loading country shapes...")
countries = gpd.read_file(
"countries/ne_110m_admin_0_countries.shp"
)
Expand All @@ -289,15 +293,15 @@ def build_map(
facecolor="none",
edgecolor="black",
)
print(f"Time elapsed to load countries: {time.time() - start}")
logger.info(f"Time elapsed to load countries: {time.time() - start}")

# use a pre-compiles list of important roads
# download https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip
# from https://www.naturalearthdata.com/downloads/10m-cultural-vectors/
if show_roads:
start = time.time()
if self.verbose:
print("Loading roads...")
logger.info("Loading roads...")
roads = gpd.read_file("map_features/roads/ne_10m_roads.shp")
roads = roads.to_crs(epsg=3857)
roads = roads[roads.geometry.within(self.map_to_polygon())]
Expand All @@ -308,7 +312,7 @@ def build_map(
color="gray",
zorder=2,
)
print(f"Time elapsed to load roads: {time.time() - start}")
logger.info(f"Time elapsed to load roads: {time.time() - start}")

# takes a lot of time
# use a pre-compiled list of important cities
Expand All @@ -317,39 +321,39 @@ def build_map(
if show_cities:
start = time.time()
if self.verbose:
print("Loading cities...")
logger.info("Loading cities...")
cities = gpd.read_file("map_features/cities/ne_10m_populated_places.shp")
cities = cities.to_crs(epsg=3857)
cities = cities[cities.geometry.within(self.map_to_polygon())]
cities = cities[cities.geometry.within(unary_union(country_shapes))]
cities.plot(
ax=ax, markersize=1.0 * figsize, color="navy", marker="o", zorder=10
)
print(f"Time elapsed to load cities: {time.time() - start}")
logger.info(f"Time elapsed to load cities: {time.time() - start}")

if show_points:
start = time.time()
points.plot(ax=ax, markersize=10, color="red")
print(f"Time elapsed to load points: {time.time() - start}")
logger.info(f"Time elapsed to load points: {time.time() - start}")

# limit heatmap to landmass by asigning no data value to sea
if self.verbose:
print("Transforming heatmap...")
logger.info("Transforming heatmap...")
nodata = np.nan
with rasterio.open(self.rasterio_path) as heatmap:
start = time.time()
max_map_wait = heatmap.read().max()
min_map_wait = heatmap.read().min()
if self.verbose:
print("max map waiting time:", max_map_wait)
logger.info("max map waiting time:", max_map_wait)
if self.verbose:
print("min map waiting time:", min_map_wait)
logger.info("min map waiting time:", min_map_wait)

out_image, out_transform = rasterio.mask.mask(
heatmap, country_shapes, nodata=nodata
)
out_meta = heatmap.meta
print(f"Time elapsed to transform heatmap: {time.time() - start}")
logger.info(f"Time elapsed to transform heatmap: {time.time() - start}")

out_meta.update(
{
Expand All @@ -365,7 +369,7 @@ def build_map(
destination.write(out_image)

# plot the heatmap
print("Plotting heatmap...") if self.verbose else None
logger.info("Plotting heatmap...") if self.verbose else None
raster = rasterio.open(new_map_path)

# TODO smoother spectrum instead of buckets
Expand Down Expand Up @@ -418,7 +422,7 @@ def build_map(
np.float64
) # matplotlib cannot handle float128
self.uncertainties = uncertainties
print(f"Time elapsed to load uncertainties: {time.time() - start}")
logger.info(f"Time elapsed to load uncertainties: {time.time() - start}")
else:
uncertainties = 1.0

Expand Down Expand Up @@ -545,4 +549,4 @@ def build_map(
else:
file_name = f"maps/{self.method}_{self.region}_{self.resolution}.png"
plt.savefig(file_name, bbox_inches="tight")
plt.show()
plt.show()
7 changes: 5 additions & 2 deletions heatchmap/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from shapely.geometry import Point
from sklearn.base import BaseEstimator, RegressorMixin
from tqdm.auto import tqdm
import logging

from .map_based_model import MapBasedModel

Expand All @@ -13,6 +14,8 @@
# 180 degree meridian in epsg 3857
MERIDIAN = 20037508

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class Average(BaseEstimator, RegressorMixin):
def __init__(self):
Expand Down Expand Up @@ -175,7 +178,7 @@ def fit(
# create a raster map - resulution is defined above
# https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining
if self.verbose:
print("Weighting gaussians for all points...")
logger.info("Weighting gaussians for all points...")
with tqdm(
zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose
) as t:
Expand Down Expand Up @@ -237,4 +240,4 @@ def predict(self, X):
# read the raster at the given coordinates
predictions.append(self.rasterio_raster.read(1)[x, y])

return np.array(predictions)
return np.array(predictions)
5 changes: 4 additions & 1 deletion heatchmap/utils/numeric_transformers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# from https://github.com/scikit-learn/scikit-learn/issues/24638

from enum import Enum
import logging

import numpy as np
import pandas as pd
from scipy.stats import skew
from sklearn.base import BaseEstimator, TransformerMixin

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# see https://en.wikipedia.org/wiki/Log-normal_distribution#Arithmetic_moments
# needed for Target transformer when we have std
Expand Down Expand Up @@ -197,7 +200,7 @@ def get_transformer_with_least_skew(

transformed_y = transformer.func(y_array)
transformed_skew = abs(skew(transformed_y))
print(f"Skew for {t}: {transformed_skew}")
logger.info(f"Skew for {t}: {transformed_skew}")

if transformed_skew < min_skew:
min_skew = transformed_skew
Expand Down
10 changes: 7 additions & 3 deletions heatchmap/utils/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
import logging

from .numeric_transformers import exp_minus_tiny, log_plus_tiny
from .utils_data import get_points
from .utils_map import *
from .utils_models import TargetTransformer

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def plot_distribution_of_data_points():
points = get_points("../data/points_train.csv")
Expand Down Expand Up @@ -46,11 +50,11 @@ def plot_distribution_of_data_points():

plt.show()

print(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %")
print(
logger.info(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %")
logger.info(
f"Europe without Germany: {round(len(europe_without_germany_data) / len(points) * 100, 2)} %"
)
print(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %")
logger.info(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %")


def plot_1d_model_comparison(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# from https://github.com/scikit-learn/scikit-learn/issues/24638


import logging
from sklearn.base import BaseEstimator
from sklearn.compose import TransformedTargetRegressor

from .numeric_transformers import Transformer

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TransformedTargetRegressorWithUncertainty(TransformedTargetRegressor):
"""Thin wrapper over sklearn.compose.TransformedTargetRegressor.
Expand Down Expand Up @@ -40,7 +40,7 @@ def fit(self, X, y, **fit_params):
def predict(self, X, return_std=False, transform_predictions=True, verbose=True, **predict_params):
"""Predict using the underlying regressor and transform the result back.
"""
print(f"Model called for prediction with X of shape {X.shape}")
logger.info(f"Model called for prediction with X of shape {X.shape}")
# always return the standard deviation as it is required for the proper inverse_transform
# regressor_ is the fitted regressor
model: BaseEstimator = self.regressor_
Expand Down
6 changes: 5 additions & 1 deletion heatchmap/utils/utils_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sqlite3
import logging

import geopandas as gpd
import pandas as pd
Expand All @@ -7,6 +8,9 @@

from .utils_map import *

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

DAY = 24 * 60
WAIT_MAX = DAY

Expand Down Expand Up @@ -42,7 +46,7 @@ def get_points(path, wait_max=WAIT_MAX, begin:pd.Timestamp=pd.Timestamp.min, unt
points.crs = CRS.from_epsg(4326)
points = points.to_crs(epsg=3857)

print(f"Got {len(points)} points from {begin.date()} to {until.date()}.")
logger.info(f"Got {len(points)} points from {begin.date()} to {until.date()}.")

return points

Expand Down
9 changes: 6 additions & 3 deletions heatchmap/utils/utils_map.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import logging

import geopandas as gpd
import numpy as np
Expand All @@ -15,6 +16,8 @@

RESOLUTION = 2

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def save_numpy_map(
map,
Expand Down Expand Up @@ -109,7 +112,7 @@ def raster_from_model(

# transposing the grid enables us to iterate over it vertically
# and single elements become lon-lat pairs that can be fed into the model
print("Compute rows of pixels...")
logger.info("Compute rows of pixels...")
start = time.time()
for vertical_line in tqdm(grid.transpose(), disable=not verbose):
if show_uncertainties:
Expand All @@ -119,8 +122,8 @@ def raster_from_model(
pred = model.predict(vertical_line)
map = np.vstack((map, pred))

print(f"Time elapsed to compute full map: {time.time() - start}")
print(
logger.info(f"Time elapsed to compute full map: {time.time() - start}")
logger.info(
f"For map of shape: {map.shape} that is {map.shape[0] * map.shape[1]} pixels and a time per pixel of {(time.time() - start) / (map.shape[0] * map.shape[1])} seconds"
)

Expand Down
23 changes: 12 additions & 11 deletions heatchmap/utils/utils_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from sklearn.model_selection import cross_validate
from sklearn.utils._testing import ignore_warnings
import logging

from .numeric_transformers import MyLogTransformer
from .transformed_target_regressor_with_uncertainty import TransformedTargetRegressorWithUncertainty
Expand Down Expand Up @@ -37,15 +38,15 @@ def fit_transform(self, y):
def evaluate(model, train, validation, features=["lon", "lat"]):
train["pred"] = model.predict(train[features].values)

print(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}")
print(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}")
logger.info(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}")
logger.info(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}")

validation["pred"] = model.predict(validation[features].values)

print(
logger.info(
f"Validation RMSE: {root_mean_squared_error(validation['wait'], validation['pred'])}"
)
print(
logger.info(
f"Validation MAE {mean_absolute_error(validation['wait'], validation['pred'])}\n"
)

Expand All @@ -62,15 +63,15 @@ def evaluate_cv(estimator, X, y, folds=5):
return_estimator=True,
)

print("Cross-validated averaged metrics...")
print(
logger.info("Cross-validated averaged metrics...")
logger.info(
f"Training RMSE: {cv_result['train_neg_root_mean_squared_error'].mean() * -1}"
)
print(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}")
print(
logger.info(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}")
logger.info(
f"Validation RMSE: {cv_result['test_neg_root_mean_squared_error'].mean() * -1}"
)
print(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n")
logger.info(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n")

# returning one estimators trained on all samples for visualization purposes
return estimator.fit(X, y)
Expand Down Expand Up @@ -102,9 +103,9 @@ def fit_gpr(gpr, X, y):

@ignore_warnings(category=ConvergenceWarning)
def fit_gpr_silent(gpr, X, y):
print(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...")
logger.info(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...")
gpr.fit(X, y)
print("Fitting done.")
logger.info("Fitting done.")

return gpr

Expand Down
8 changes: 6 additions & 2 deletions heatchmap/weighted_average_gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from shapely.geometry import Point
from tqdm.auto import tqdm
import logging

from .map_based_model import MapBasedModel

Expand All @@ -12,6 +13,9 @@
# 180 degree meridian in epsg 3857
MERIDIAN = 20037508

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class WeightedAveragedGaussian(MapBasedModel):
def __init__(
self, region="world", method="ordinary", resolution=RESOLUTION, verbose=False
Expand Down Expand Up @@ -92,7 +96,7 @@ def fit(
# create a raster map - resulution is defined above
# https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining
if self.verbose:
print("Weighting gaussians for all points...")
logger.info("Weighting gaussians for all points...")
with tqdm(
zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose
) as t:
Expand Down Expand Up @@ -154,4 +158,4 @@ def predict(self, X):
# read the raster at the given coordinates
predictions.append(self.rasterio_raster.read(1)[x, y])

return np.array(predictions)
return np.array(predictions)

0 comments on commit 0453e72

Please sign in to comment.