diff --git a/heatchmap/map_based_model.py b/heatchmap/map_based_model.py index a2738e8..fa233df 100644 --- a/heatchmap/map_based_model.py +++ b/heatchmap/map_based_model.py @@ -1,6 +1,7 @@ """Map-based model for hitchhiking waiting times.""" import os import time +import logging import geopandas as gpd import matplotlib.colors as colors @@ -42,6 +43,9 @@ HERE = os.path.dirname(os.path.abspath(__file__)) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + class MapBasedModel(BaseEstimator, RegressorMixin): def __init__( self, @@ -273,7 +277,7 @@ def build_map( # from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/ if self.verbose: - print("Loading country shapes...") + logger.info("Loading country shapes...") countries = gpd.read_file( "countries/ne_110m_admin_0_countries.shp" ) @@ -289,7 +293,7 @@ def build_map( facecolor="none", edgecolor="black", ) - print(f"Time elapsed to load countries: {time.time() - start}") + logger.info(f"Time elapsed to load countries: {time.time() - start}") # use a pre-compiles list of important roads # download https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip @@ -297,7 +301,7 @@ def build_map( if show_roads: start = time.time() if self.verbose: - print("Loading roads...") + logger.info("Loading roads...") roads = gpd.read_file("map_features/roads/ne_10m_roads.shp") roads = roads.to_crs(epsg=3857) roads = roads[roads.geometry.within(self.map_to_polygon())] @@ -308,7 +312,7 @@ def build_map( color="gray", zorder=2, ) - print(f"Time elapsed to load roads: {time.time() - start}") + logger.info(f"Time elapsed to load roads: {time.time() - start}") # takes a lot of time # use a pre-compiled list of important cities @@ -317,7 +321,7 @@ def build_map( if show_cities: start = time.time() if self.verbose: - print("Loading cities...") + logger.info("Loading cities...") cities = gpd.read_file("map_features/cities/ne_10m_populated_places.shp") cities = cities.to_crs(epsg=3857) cities = cities[cities.geometry.within(self.map_to_polygon())] @@ -325,31 +329,31 @@ def build_map( cities.plot( ax=ax, markersize=1.0 * figsize, color="navy", marker="o", zorder=10 ) - print(f"Time elapsed to load cities: {time.time() - start}") + logger.info(f"Time elapsed to load cities: {time.time() - start}") if show_points: start = time.time() points.plot(ax=ax, markersize=10, color="red") - print(f"Time elapsed to load points: {time.time() - start}") + logger.info(f"Time elapsed to load points: {time.time() - start}") # limit heatmap to landmass by asigning no data value to sea if self.verbose: - print("Transforming heatmap...") + logger.info("Transforming heatmap...") nodata = np.nan with rasterio.open(self.rasterio_path) as heatmap: start = time.time() max_map_wait = heatmap.read().max() min_map_wait = heatmap.read().min() if self.verbose: - print("max map waiting time:", max_map_wait) + logger.info("max map waiting time:", max_map_wait) if self.verbose: - print("min map waiting time:", min_map_wait) + logger.info("min map waiting time:", min_map_wait) out_image, out_transform = rasterio.mask.mask( heatmap, country_shapes, nodata=nodata ) out_meta = heatmap.meta - print(f"Time elapsed to transform heatmap: {time.time() - start}") + logger.info(f"Time elapsed to transform heatmap: {time.time() - start}") out_meta.update( { @@ -365,7 +369,7 @@ def build_map( destination.write(out_image) # plot the heatmap - print("Plotting heatmap...") if self.verbose else None + logger.info("Plotting heatmap...") if self.verbose else None raster = rasterio.open(new_map_path) # TODO smoother spectrum instead of buckets @@ -418,7 +422,7 @@ def build_map( np.float64 ) # matplotlib cannot handle float128 self.uncertainties = uncertainties - print(f"Time elapsed to load uncertainties: {time.time() - start}") + logger.info(f"Time elapsed to load uncertainties: {time.time() - start}") else: uncertainties = 1.0 @@ -545,4 +549,4 @@ def build_map( else: file_name = f"maps/{self.method}_{self.region}_{self.resolution}.png" plt.savefig(file_name, bbox_inches="tight") - plt.show() \ No newline at end of file + plt.show() diff --git a/heatchmap/models.py b/heatchmap/models.py index 21738d2..cfe45e2 100644 --- a/heatchmap/models.py +++ b/heatchmap/models.py @@ -3,6 +3,7 @@ from shapely.geometry import Point from sklearn.base import BaseEstimator, RegressorMixin from tqdm.auto import tqdm +import logging from .map_based_model import MapBasedModel @@ -13,6 +14,8 @@ # 180 degree meridian in epsg 3857 MERIDIAN = 20037508 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) class Average(BaseEstimator, RegressorMixin): def __init__(self): @@ -175,7 +178,7 @@ def fit( # create a raster map - resulution is defined above # https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining if self.verbose: - print("Weighting gaussians for all points...") + logger.info("Weighting gaussians for all points...") with tqdm( zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose ) as t: @@ -237,4 +240,4 @@ def predict(self, X): # read the raster at the given coordinates predictions.append(self.rasterio_raster.read(1)[x, y]) - return np.array(predictions) \ No newline at end of file + return np.array(predictions) diff --git a/heatchmap/utils/numeric_transformers.py b/heatchmap/utils/numeric_transformers.py index f93a7c6..f5ec30a 100644 --- a/heatchmap/utils/numeric_transformers.py +++ b/heatchmap/utils/numeric_transformers.py @@ -1,12 +1,15 @@ # from https://github.com/scikit-learn/scikit-learn/issues/24638 from enum import Enum +import logging import numpy as np import pandas as pd from scipy.stats import skew from sklearn.base import BaseEstimator, TransformerMixin +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) # see https://en.wikipedia.org/wiki/Log-normal_distribution#Arithmetic_moments # needed for Target transformer when we have std @@ -197,7 +200,7 @@ def get_transformer_with_least_skew( transformed_y = transformer.func(y_array) transformed_skew = abs(skew(transformed_y)) - print(f"Skew for {t}: {transformed_skew}") + logger.info(f"Skew for {t}: {transformed_skew}") if transformed_skew < min_skew: min_skew = transformed_skew diff --git a/heatchmap/utils/plotting.py b/heatchmap/utils/plotting.py index 7d65fc5..673f9b0 100644 --- a/heatchmap/utils/plotting.py +++ b/heatchmap/utils/plotting.py @@ -3,12 +3,16 @@ import pandas as pd from matplotlib import pyplot as plt from matplotlib.colors import LogNorm +import logging from .numeric_transformers import exp_minus_tiny, log_plus_tiny from .utils_data import get_points from .utils_map import * from .utils_models import TargetTransformer +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def plot_distribution_of_data_points(): points = get_points("../data/points_train.csv") @@ -46,11 +50,11 @@ def plot_distribution_of_data_points(): plt.show() - print(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %") - print( + logger.info(f"Germany: {round(len(germany_data) / len(points) * 100, 2)} %") + logger.info( f"Europe without Germany: {round(len(europe_without_germany_data) / len(points) * 100, 2)} %" ) - print(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %") + logger.info(f"Rest of the world: {round(len(world_data) / len(points) * 100, 2)} %") def plot_1d_model_comparison( diff --git a/heatchmap/utils/transformed_target_regressor_with_uncertainty.py b/heatchmap/utils/transformed_target_regressor_with_uncertainty.py index a00ba3f..35e3c08 100644 --- a/heatchmap/utils/transformed_target_regressor_with_uncertainty.py +++ b/heatchmap/utils/transformed_target_regressor_with_uncertainty.py @@ -1,11 +1,11 @@ -# from https://github.com/scikit-learn/scikit-learn/issues/24638 - - +import logging from sklearn.base import BaseEstimator from sklearn.compose import TransformedTargetRegressor from .numeric_transformers import Transformer +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) class TransformedTargetRegressorWithUncertainty(TransformedTargetRegressor): """Thin wrapper over sklearn.compose.TransformedTargetRegressor. @@ -40,7 +40,7 @@ def fit(self, X, y, **fit_params): def predict(self, X, return_std=False, transform_predictions=True, verbose=True, **predict_params): """Predict using the underlying regressor and transform the result back. """ - print(f"Model called for prediction with X of shape {X.shape}") + logger.info(f"Model called for prediction with X of shape {X.shape}") # always return the standard deviation as it is required for the proper inverse_transform # regressor_ is the fitted regressor model: BaseEstimator = self.regressor_ diff --git a/heatchmap/utils/utils_data.py b/heatchmap/utils/utils_data.py index 0ed5355..3044268 100644 --- a/heatchmap/utils/utils_data.py +++ b/heatchmap/utils/utils_data.py @@ -1,4 +1,5 @@ import sqlite3 +import logging import geopandas as gpd import pandas as pd @@ -7,6 +8,9 @@ from .utils_map import * +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + DAY = 24 * 60 WAIT_MAX = DAY @@ -42,7 +46,7 @@ def get_points(path, wait_max=WAIT_MAX, begin:pd.Timestamp=pd.Timestamp.min, unt points.crs = CRS.from_epsg(4326) points = points.to_crs(epsg=3857) - print(f"Got {len(points)} points from {begin.date()} to {until.date()}.") + logger.info(f"Got {len(points)} points from {begin.date()} to {until.date()}.") return points diff --git a/heatchmap/utils/utils_map.py b/heatchmap/utils/utils_map.py index 2f9c8f7..bd80620 100644 --- a/heatchmap/utils/utils_map.py +++ b/heatchmap/utils/utils_map.py @@ -1,4 +1,5 @@ import time +import logging import geopandas as gpd import numpy as np @@ -15,6 +16,8 @@ RESOLUTION = 2 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) def save_numpy_map( map, @@ -109,7 +112,7 @@ def raster_from_model( # transposing the grid enables us to iterate over it vertically # and single elements become lon-lat pairs that can be fed into the model - print("Compute rows of pixels...") + logger.info("Compute rows of pixels...") start = time.time() for vertical_line in tqdm(grid.transpose(), disable=not verbose): if show_uncertainties: @@ -119,8 +122,8 @@ def raster_from_model( pred = model.predict(vertical_line) map = np.vstack((map, pred)) - print(f"Time elapsed to compute full map: {time.time() - start}") - print( + logger.info(f"Time elapsed to compute full map: {time.time() - start}") + logger.info( f"For map of shape: {map.shape} that is {map.shape[0] * map.shape[1]} pixels and a time per pixel of {(time.time() - start) / (map.shape[0] * map.shape[1])} seconds" ) diff --git a/heatchmap/utils/utils_models.py b/heatchmap/utils/utils_models.py index 1cdc116..53fa4b6 100644 --- a/heatchmap/utils/utils_models.py +++ b/heatchmap/utils/utils_models.py @@ -5,6 +5,7 @@ from sklearn.metrics import mean_absolute_error, root_mean_squared_error from sklearn.model_selection import cross_validate from sklearn.utils._testing import ignore_warnings +import logging from .numeric_transformers import MyLogTransformer from .transformed_target_regressor_with_uncertainty import TransformedTargetRegressorWithUncertainty @@ -37,15 +38,15 @@ def fit_transform(self, y): def evaluate(model, train, validation, features=["lon", "lat"]): train["pred"] = model.predict(train[features].values) - print(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}") - print(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}") + logger.info(f"Training RMSE: {root_mean_squared_error(train['wait'], train['pred'])}") + logger.info(f"Training MAE {mean_absolute_error(train['wait'], train['pred'])}") validation["pred"] = model.predict(validation[features].values) - print( + logger.info( f"Validation RMSE: {root_mean_squared_error(validation['wait'], validation['pred'])}" ) - print( + logger.info( f"Validation MAE {mean_absolute_error(validation['wait'], validation['pred'])}\n" ) @@ -62,15 +63,15 @@ def evaluate_cv(estimator, X, y, folds=5): return_estimator=True, ) - print("Cross-validated averaged metrics...") - print( + logger.info("Cross-validated averaged metrics...") + logger.info( f"Training RMSE: {cv_result['train_neg_root_mean_squared_error'].mean() * -1}" ) - print(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}") - print( + logger.info(f"Training MAE: {cv_result['train_neg_mean_absolute_error'].mean() * -1}") + logger.info( f"Validation RMSE: {cv_result['test_neg_root_mean_squared_error'].mean() * -1}" ) - print(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n") + logger.info(f"Validation MAE: {cv_result['test_neg_mean_absolute_error'].mean() * -1}\n") # returning one estimators trained on all samples for visualization purposes return estimator.fit(X, y) @@ -102,9 +103,9 @@ def fit_gpr(gpr, X, y): @ignore_warnings(category=ConvergenceWarning) def fit_gpr_silent(gpr, X, y): - print(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...") + logger.info(f"Fitting Gaussian Process Regressor with X of shape {X.shape} and y of shape {y.shape}...") gpr.fit(X, y) - print("Fitting done.") + logger.info("Fitting done.") return gpr diff --git a/heatchmap/weighted_average_gaussian.py b/heatchmap/weighted_average_gaussian.py index 3376a5a..910b89a 100644 --- a/heatchmap/weighted_average_gaussian.py +++ b/heatchmap/weighted_average_gaussian.py @@ -2,6 +2,7 @@ import pandas as pd from shapely.geometry import Point from tqdm.auto import tqdm +import logging from .map_based_model import MapBasedModel @@ -12,6 +13,9 @@ # 180 degree meridian in epsg 3857 MERIDIAN = 20037508 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + class WeightedAveragedGaussian(MapBasedModel): def __init__( self, region="world", method="ordinary", resolution=RESOLUTION, verbose=False @@ -92,7 +96,7 @@ def fit( # create a raster map - resulution is defined above # https://stackoverflow.com/questions/56677267/tqdm-extract-time-passed-time-remaining if self.verbose: - print("Weighting gaussians for all points...") + logger.info("Weighting gaussians for all points...") with tqdm( zip(X[:, 0], X[:, 1], y), total=X.shape[0], disable=not self.verbose ) as t: @@ -154,4 +158,4 @@ def predict(self, X): # read the raster at the given coordinates predictions.append(self.rasterio_raster.read(1)[x, y]) - return np.array(predictions) \ No newline at end of file + return np.array(predictions)