From 84d14cf9d9dccc28dd116c7a78d380633f082aa1 Mon Sep 17 00:00:00 2001 From: Tomasini Luca Date: Wed, 12 Feb 2025 11:51:55 +0100 Subject: [PATCH] Add numpy_function --- docs/modules.rst | 3 ++- docs/numpy_function.rst | 7 ++++++ pyproject.toml | 1 + src/numpy_function.py | 30 +++++++++++++++++++++++ src/polars_function.py | 53 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 docs/numpy_function.rst create mode 100644 src/numpy_function.py diff --git a/docs/modules.rst b/docs/modules.rst index 072fc23..fddb96f 100644 --- a/docs/modules.rst +++ b/docs/modules.rst @@ -6,6 +6,7 @@ Utility Functions documentation general_function polars_function - polars_shapely_function shapely_function + polars_shapely_function + numpy_function networkx_function diff --git a/docs/numpy_function.rst b/docs/numpy_function.rst new file mode 100644 index 0000000..f8f6010 --- /dev/null +++ b/docs/numpy_function.rst @@ -0,0 +1,7 @@ +numpy\_function +======================= + +.. automodule:: numpy_function + :members: + :undoc-members: + :show-inheritance: diff --git a/pyproject.toml b/pyproject.toml index 89241df..8b52986 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ packages = [ {include = "networkx_function.py", from="src"}, {include = "shapely_function.py", from="src"}, {include = "polars_shapely_function.py", from="src"}, + {include = "numpy_function.py", from="src"}, ] diff --git a/src/numpy_function.py b/src/numpy_function.py new file mode 100644 index 0000000..a0f7dda --- /dev/null +++ b/src/numpy_function.py @@ -0,0 +1,30 @@ +import numpy as np + +def relative_error_within_boundaries(x: np.array, low: np.array, high: np.array) -> np.array: # type: ignore + """ + Calculate the relative error of values within specified boundaries. + + Args: + x (np.array): The array of values. + low (np.array): The lower boundary array. + high (np.array): The upper boundary array. + + Returns: + np.array: The array of relative errors. + """ + return np.abs(error_within_boundaries(x, low, high))/x + +def error_within_boundaries(x: np.array, low: np.array, high: np.array) -> np.array: # type: ignore + """ + Calculate the error of values within specified boundaries. + + Args: + x (np.array): The array of values. + low (np.array): The lower boundary array. + high (np.array): The upper boundary array. + + Returns: + np.array: The array of errors. + """ + nearest_boundary = np.where(x < low, low, np.where(x > high, high, x)) + return x - nearest_boundary \ No newline at end of file diff --git a/src/polars_function.py b/src/polars_function.py index 2e1ce66..2cab817 100644 --- a/src/polars_function.py +++ b/src/polars_function.py @@ -360,3 +360,56 @@ def concat_list_of_list(col_list: pl.Expr) -> pl.Expr: return pl.concat_list( col_list.map_elements(lambda x: [x], return_dtype=pl.List(pl.List(pl.Float64))) ) + + +def linear_interpolation_for_bound(x_col: pl.Expr, y_col: pl.Expr) -> pl.Expr: + """ + Perform linear interpolation for boundary values in a column. + + Args: + x_col (pl.Expr): The x-axis column. + y_col (pl.Expr): The y-axis column to interpolate. + + Returns: + pl.Expr: The interpolated y-axis column. + """ + a_diff: pl.Expr = y_col.diff()/x_col.diff() + x_diff: pl.Expr = x_col.diff().backward_fill() + y_diff: pl.Expr = pl.coalesce( + pl.when(y_col.is_null().or_(y_col.is_nan())) + .then(a_diff.forward_fill()*x_diff) + .otherwise(pl.lit(0)).cum_sum(), + pl.when(y_col.is_null().or_(y_col.is_nan())) + .then(-a_diff.backward_fill()*x_diff) + .otherwise(pl.lit(0)).cum_sum(reverse=True) + ) + + return y_col.backward_fill().forward_fill() + y_diff + +def linear_interpolation_using_cols( + df: pl.DataFrame, x_col: str, y_col: Union[list[str], str] + ) -> pl.DataFrame: + """ + Perform linear interpolation on specified columns of a DataFrame. + + Args: + df (pl.DataFrame): The DataFrame containing the data. + x_col (str): The name of the x-axis column. + y_col (Union[list[str], str]): The name(s) of the y-axis column(s) to interpolate. + + Returns: + pl.DataFrame: The DataFrame with interpolated y-axis columns. + """ + df = df.sort(x_col) + x = df[x_col].to_numpy() + if isinstance(y_col, str): + y_col = [y_col] + for col in y_col: + y = df[col].to_numpy() + mask = ~np.isnan(y) + df = df.with_columns( + pl.Series(np.interp(x, x[mask], y[mask], left=np.nan, right=np.nan)).fill_nan(None).alias(col) + ).with_columns( + linear_interpolation_for_bound(x_col=c(x_col), y_col=c(col)).alias(col) + ) + return df \ No newline at end of file