From 8a1e0ad5a248077a2654a881db8b5bc7442688b3 Mon Sep 17 00:00:00 2001 From: "samuel.oranyeli" Date: Thu, 4 Jul 2024 18:46:50 +1000 Subject: [PATCH] update docs --- janitor/functions/complete.py | 14 ++++++++------ janitor/functions/expand_grid.py | 25 +++++++++++++------------ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/janitor/functions/complete.py b/janitor/functions/complete.py index 9de05127c..5a53f8d38 100644 --- a/janitor/functions/complete.py +++ b/janitor/functions/complete.py @@ -21,19 +21,19 @@ def complete( """ Complete a data frame with missing combinations of data. - It is modeled after tidyr's `complete` function, and is a wrapper around - [`expand_grid`][janitor.functions.expand_grid.expand_grid], `pd.merge` - and `pd.fillna`. In a way, it is the inverse of `pd.dropna`, as it exposes + It is modeled after tidyr's `complete` function. + In a way, it is the inverse of `pd.dropna`, as it exposes implicitly missing rows. - The variable `columns` parameter can be a combination - of column names or a list/tuple of column names, + The variable `columns` parameter can be a column name, + a list of column names, or a pandas Index, Series, or DataFrame. If a pandas Index, Series, or DataFrame is passed, it should have a name or names that exist in `df`. A callable can also be passed - the callable should evaluate - to a pandas Index, Series, or DataFrame. + to a pandas Index, Series, or DataFrame, + and the names of the pandas object should exist in `df`. A dictionary can also be passed - the values of the dictionary should be @@ -215,8 +215,10 @@ def complete( It could be a column name, a list of column names, or a pandas Index, Series, or DataFrame. + It can also be a callable that gets evaluated to a pandas Index, Series, or DataFrame. + It can also be a dictionary, where the values are either a 1D array or a callable that evaluates to a diff --git a/janitor/functions/expand_grid.py b/janitor/functions/expand_grid.py index 0256f3efb..3e64b9d63 100644 --- a/janitor/functions/expand_grid.py +++ b/janitor/functions/expand_grid.py @@ -28,12 +28,13 @@ def expand_grid( *, others: Optional[dict] = None, ) -> Union[pd.DataFrame, None]: - """Creates a DataFrame from a cartesian combination of all inputs. + """ + Creates a DataFrame from a cartesian combination of all inputs. - !!!note + !!!note - This function will be deprecated in a 1.x release. - Please use [`cartesian_product`][janitor.functions.expand_grid.cartesian_product] + This function will be deprecated in a 1.x release; + use [`cartesian_product`][janitor.functions.expand_grid.cartesian_product] instead. It is not restricted to a pandas DataFrame; @@ -59,9 +60,8 @@ def expand_grid( `droplevel` method. Examples: - >>> import pandas as pd - >>> from janitor.functions.expand_grid import expand_grid + >>> import janitor as jn >>> df = pd.DataFrame({"x": [1, 2], "y": [2, 1]}) >>> data = {"z": [1, 2, 3]} >>> df.expand_grid(df_key="df", others=data) @@ -77,7 +77,7 @@ def expand_grid( `expand_grid` works with non-pandas objects: >>> data = {"x": [1, 2, 3], "y": [1, 2]} - >>> expand_grid(others=data) + >>> jn.expand_grid(others=data) x y 0 0 0 1 1 @@ -144,7 +144,9 @@ def expand( Inspiration is from tidyr's expand() function. - expand() is often useful with `pd.merge` to convert implicit + expand() is often useful with + [pd.merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) + to convert implicit missing values to explicit missing values - similar to [`complete`][janitor.functions.complete.complete]. @@ -324,7 +326,7 @@ def expand( Returns: A pandas DataFrame. - """ + """ # noqa: E501 if by is None: contents = _build_pandas_objects_for_expand(df=df, columns=columns) return cartesian_product(*contents) @@ -401,12 +403,11 @@ def cartesian_product(*inputs: tuple) -> pd.DataFrame: a 1D array. Examples: - >>> import pandas as pd - >>> from janitor import cartesian_product + >>> import janitor as jn >>> df = pd.DataFrame({"x": [1, 2], "y": [2, 1]}) >>> data = pd.Series([1, 2, 3], name='z') - >>> cartesian_product(df, data) + >>> jn.cartesian_product(df, data) x y z 0 1 2 1 1 1 2 2