Skip to content

Commit

Permalink
Merge dev into samukweku/refactor_expand_grid
Browse files Browse the repository at this point in the history
  • Loading branch information
ericmjl authored Aug 1, 2024
2 parents e3722ae + a58ebd9 commit 71c82ed
Show file tree
Hide file tree
Showing 21 changed files with 583 additions and 624 deletions.
36 changes: 18 additions & 18 deletions janitor/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
# General Functions
pyjanitor's general-purpose data cleaning functions.
"""

NOTE: Instructions for future contributors:
# NOTE: Instructions for future contributors:

1. Place the source code of the functions in a file named after the function.
2. Place utility functions in the same file.
3. If you use a utility function from another source file,
please refactor it out to `janitor.functions.utils`.
4. Import the function into this file so that it shows up in the top-level API.
5. Sort the imports in alphabetical order.
6. Try to group related functions together (e.g. see `convert_date.py`)
7. Never import utils.
"""
# 1. Place the source code of the functions in a file named after the function.
# 2. Place utility functions in the same file.
# 3. If you use a utility function from another source file,
# please refactor it out to `janitor.functions.utils`.
# 4. Import the function into this file so that it shows up in the top-level API.
# 5. Sort the imports in alphabetical order.
# 6. Try to group related functions together (e.g. see `convert_date.py`)
# 7. Never import utils.

from .add_columns import add_columns
from .also import also
Expand Down Expand Up @@ -65,7 +65,14 @@
from .reorder_columns import reorder_columns
from .round_to_fraction import round_to_fraction
from .row_to_names import row_to_names
from .select import select, select_columns, select_rows
from .select import (
DropLabel,
get_columns,
get_index_labels,
select,
select_columns,
select_rows,
)
from .shuffle import shuffle
from .sort_column_value_order import sort_column_value_order
from .sort_naturally import sort_naturally
Expand All @@ -77,11 +84,6 @@
from .truncate_datetime import truncate_datetime_dataframe
from .update_where import update_where
from .utils import (
DropLabel,
col,
get_columns,
get_index_labels,
patterns,
unionize_dataframe_categories,
)

Expand Down Expand Up @@ -158,10 +160,8 @@
"transform_columns",
"truncate_datetime_dataframe",
"update_where",
"patterns",
"unionize_dataframe_categories",
"DropLabel",
"get_index_labels",
"col",
"get_columns",
]
3 changes: 2 additions & 1 deletion janitor/functions/clean_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from pandas.api.types import is_scalar

from janitor.errors import JanitorError
from janitor.functions.utils import _is_str_or_cat, get_index_labels
from janitor.functions.select import get_index_labels
from janitor.functions.utils import _is_str_or_cat
from janitor.utils import deprecated_alias


Expand Down
2 changes: 1 addition & 1 deletion janitor/functions/coalesce.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
import pandas_flavor as pf

from janitor.functions.utils import _select_index
from janitor.functions.select import _select_index
from janitor.utils import check, deprecated_alias


Expand Down
8 changes: 3 additions & 5 deletions janitor/functions/collapse_levels.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Implementation of the `collapse_levels` function."""

from typing import Union

import pandas as pd
import pandas_flavor as pf
from pandas.api.types import is_string_dtype
Expand All @@ -12,9 +10,9 @@
@pf.register_dataframe_method
def collapse_levels(
df: pd.DataFrame,
sep: Union[str, None] = None,
glue: Union[str, None] = None,
axis="columns",
sep: str = None,
glue: str = None,
axis: str = "columns",
) -> pd.DataFrame:
"""Flatten multi-level index/column dataframe to a single level.
Expand Down
38 changes: 10 additions & 28 deletions janitor/functions/conditional_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
_generic_func_cond_join,
_JoinOperator,
_keep_output,
col,
greater_than_join_types,
less_than_join_types,
)
Expand Down Expand Up @@ -72,8 +71,6 @@ def conditional_join(
label from `df`, `right_on` is the column label from `right`,
while `op` is the operator.
The `col` class is also supported in the `conditional_join` syntax.
For multiple conditions, the and(`&`)
operator is used to combine the results of the individual conditions.
Expand Down Expand Up @@ -133,24 +130,11 @@ def conditional_join(
3 4 3 5
4 4 3 6
Use the `col` class:
>>> df1.conditional_join(
... df2,
... col("value_1") > col("value_2A"),
... col("value_1") < col("value_2B")
... )
value_1 value_2A value_2B
0 2 1 3
1 5 3 6
2 3 2 4
3 4 3 5
4 4 3 6
Select specific columns, after the join:
>>> df1.conditional_join(
... df2,
... col("value_1") > col("value_2A"),
... col("value_1") < col("value_2B"),
... ("value_1", "value_2A", ">"),
... ("value_1", "value_2B", "<"),
... right_columns='value_2B',
... how='left'
... )
Expand All @@ -168,8 +152,8 @@ def conditional_join(
... .rename(columns={'value_1':'left_column'})
... .conditional_join(
... df2,
... ("left_column", "value_2A", ">"),
... ("left_column", "value_2B", "<"),
... ("left_column", "value_2A", ">"),
... ("left_column", "value_2B", "<"),
... right_columns='value_2B',
... how='outer')
... )
Expand All @@ -189,8 +173,8 @@ def conditional_join(
Get the first match:
>>> df1.conditional_join(
... df2,
... col("value_1") > col("value_2A"),
... col("value_1") < col("value_2B"),
... ("value_1", "value_2A", ">"),
... ("value_1", "value_2B", "<"),
... keep='first'
... )
value_1 value_2A value_2B
Expand All @@ -202,8 +186,8 @@ def conditional_join(
Get the last match:
>>> df1.conditional_join(
... df2,
... col("value_1") > col("value_2A"),
... col("value_1") < col("value_2B"),
... ("value_1", "value_2A", ">"),
... ("value_1", "value_2B", "<"),
... keep='last'
... )
value_1 value_2A value_2B
Expand Down Expand Up @@ -245,6 +229,8 @@ def conditional_join(
- Numba support for equi join
- 0.27.0
- Added support for timedelta dtype.
- 0.28.0
- `col` class deprecated.
Args:
df: A pandas DataFrame.
Expand Down Expand Up @@ -355,10 +341,6 @@ def _conditional_join_preliminary_checks(
if not conditions:
raise ValueError("Kindly provide at least one join condition.")

conditions = [
cond.join_args if isinstance(cond, col) else cond
for cond in conditions
]
for condition in conditions:
check("condition", condition, [tuple])
len_condition = len(condition)
Expand Down
2 changes: 1 addition & 1 deletion janitor/functions/encode_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas_flavor as pf
from pandas.api.types import is_list_like

from janitor.functions.utils import get_index_labels
from janitor.functions.select import get_index_labels
from janitor.utils import check_column, deprecated_alias, find_stack_level


Expand Down
2 changes: 1 addition & 1 deletion janitor/functions/impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import pandas_flavor as pf

from janitor.functions.utils import get_index_labels
from janitor.functions.select import get_index_labels
from janitor.utils import deprecated_alias


Expand Down
2 changes: 1 addition & 1 deletion janitor/functions/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import pandas_flavor as pf

from janitor.functions.utils import _index_converter, _select_index
from janitor.functions.select import _index_converter, _select_index


@pf.register_dataframe_method
Expand Down
2 changes: 1 addition & 1 deletion janitor/functions/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pandas.api.types import is_extension_array_dtype, is_list_like
from pandas.core.dtypes.concat import concat_compat

from janitor.functions.utils import (
from janitor.functions.select import (
_index_converter,
_select_index,
get_index_labels,
Expand Down
Loading

0 comments on commit 71c82ed

Please sign in to comment.