From 239ee294ab88a79e3a1d7f8f71c6e5e67f42a606 Mon Sep 17 00:00:00 2001 From: "samuel.oranyeli" Date: Sat, 7 Sep 2024 23:34:47 +1000 Subject: [PATCH] fix doctest --- janitor/functions/conditional_join.py | 142 ++++++++++++-------------- 1 file changed, 68 insertions(+), 74 deletions(-) diff --git a/janitor/functions/conditional_join.py b/janitor/functions/conditional_join.py index 5cc22f770..2933daf17 100644 --- a/janitor/functions/conditional_join.py +++ b/janitor/functions/conditional_join.py @@ -146,11 +146,11 @@ def conditional_join( value_1 value_2B 0 2 3.0 1 5 6.0 - 2 7 NaN - 3 1 NaN - 4 3 4.0 - 5 4 5.0 - 6 4 6.0 + 2 3 4.0 + 3 4 5.0 + 4 4 6.0 + 5 7 NaN + 6 1 NaN Rename columns, before the join: >>> (df1 @@ -163,13 +163,13 @@ def conditional_join( ... how='outer') ... ) left_column value_2B - 0 7.0 NaN - 1 1.0 NaN - 2 2.0 3.0 - 3 5.0 6.0 - 4 3.0 4.0 - 5 4.0 5.0 - 6 4.0 6.0 + 0 2.0 3.0 + 1 5.0 6.0 + 2 3.0 4.0 + 3 4.0 5.0 + 4 4.0 6.0 + 5 7.0 NaN + 6 1.0 NaN 7 NaN 1.0 8 NaN 9.0 9 NaN 15.0 @@ -209,18 +209,18 @@ def conditional_join( ... how='outer', ... indicator=True ... ) - value_1 _merge value_2A value_2B - 0 7.0 left_only NaN NaN - 1 1.0 left_only NaN NaN - 2 2.0 both 1.0 3.0 - 3 5.0 both 3.0 6.0 - 4 3.0 both 2.0 4.0 - 5 4.0 both 3.0 5.0 - 6 4.0 both 3.0 6.0 - 7 NaN right_only 0.0 1.0 - 8 NaN right_only 7.0 9.0 - 9 NaN right_only 12.0 15.0 - 10 NaN right_only 0.0 1.0 + value_1 value_2A value_2B _merge + 0 2.0 1.0 3.0 both + 1 5.0 3.0 6.0 both + 2 3.0 2.0 4.0 both + 3 4.0 3.0 5.0 both + 4 4.0 3.0 6.0 both + 5 7.0 NaN NaN left_only + 6 1.0 NaN NaN left_only + 7 NaN 0.0 1.0 right_only + 8 NaN 7.0 9.0 right_only + 9 NaN 12.0 15.0 right_only + 10 NaN 0.0 1.0 right_only !!! abstract "Version Changed" @@ -1226,11 +1226,11 @@ def _inner( Returns: An inner joined DataFrame. """ - frame = {key: value._values[left_index] for key, value in df.items()} - r_frame = { - key: value._values[right_index] for key, value in right.items() - } - frame.update(r_frame) + dictionary = {} + for key, value in df.items(): + dictionary[key] = value._values[left_index] + for key, value in right.items(): + dictionary[key] = value._values[right_index] if indicator: indicator, arr = _add_indicator( indicator=indicator, @@ -1238,8 +1238,8 @@ def _inner( column_length=left_index.size, columns=df.columns.union(right.columns), ) - frame[indicator] = arr - return pd.DataFrame(frame, copy=False) + dictionary[indicator] = arr + return pd.DataFrame(dictionary, copy=False) if how == "inner": return _inner( @@ -1262,8 +1262,13 @@ def _inner( right_index=right_index, indicator=indicator, ) - - right_dict = {} + dictionary = {} + for key, value in df.items(): + array = value._values + top = array[left_index] + bottom = array[indexer] + value = concat_compat([top, bottom]) + dictionary[key] = value for key, value in right.items(): array = value._values value = array[right_index] @@ -1271,7 +1276,7 @@ def _inner( value=array[:1], length=length ) value = concat_compat([value, other]) - right_dict[key] = value + dictionary[key] = value if indicator: columns = df.columns.union(right.columns) name, arr1 = _add_indicator( @@ -1287,16 +1292,8 @@ def _inner( columns=columns, ) value = concat_compat([arr1, arr2]) - right_dict[name] = value - left_dict = {} - for key, value in df.items(): - array = value._values - top = array[left_index] - bottom = array[indexer] - value = concat_compat([top, bottom]) - left_dict[key] = value - left_dict.update(right_dict) - return pd.DataFrame(left_dict, copy=False) + dictionary[name] = value + return pd.DataFrame(dictionary, copy=False) if how == "right": indexer = pd.unique(right_index) @@ -1311,7 +1308,7 @@ def _inner( right_index=right_index, indicator=indicator, ) - left_dict = {} + dictionary = {} for key, value in df.items(): array = value._values value = array[left_index] @@ -1319,14 +1316,13 @@ def _inner( value=array[:1], length=length ) value = concat_compat([value, other]) - left_dict[key] = value - right_dict = {} + dictionary[key] = value for key, value in right.items(): array = value._values top = array[right_index] bottom = array[indexer] value = concat_compat([top, bottom]) - right_dict[key] = value + dictionary[key] = value if indicator: columns = df.columns.union(right.columns) name, arr1 = _add_indicator( @@ -1342,9 +1338,8 @@ def _inner( columns=columns, ) value = concat_compat([arr1, arr2]) - right_dict[name] = value - left_dict.update(right_dict) - return pd.DataFrame(left_dict, copy=False) + dictionary[name] = value + return pd.DataFrame(dictionary, copy=False) # how == 'outer' left_indexer = pd.unique(left_index) left_indexer = pd.Index(left_indexer).get_indexer(range(len(df))) @@ -1355,7 +1350,24 @@ def _inner( df_nulls_length = left_indexer.size right_nulls_length = right_indexer.size - right_dict = {} + dictionary = {} + for key, value in df.items(): + array = value._values + top = array[left_index] + top = [top] + if df_nulls_length: + middle = array[left_indexer] + top.append(middle) + if right_nulls_length: + bottom = construct_1d_array_from_inferred_fill_value( + value=array[:1], length=right_nulls_length + ) + top.append(bottom) + if len(top) == 1: + top = top[0] + else: + top = concat_compat(top) + dictionary[key] = top for key, value in right.items(): array = value._values top = array[right_index] @@ -1372,7 +1384,7 @@ def _inner( top = top[0] else: top = concat_compat(top) - right_dict[key] = top + dictionary[key] = top if indicator: columns = df.columns.union(right.columns) name, arr1 = _add_indicator( @@ -1402,27 +1414,9 @@ def _inner( arr1 = arr1[0] else: arr1 = concat_compat(arr1) - right_dict[name] = arr1 - left_dict = {} - for key, value in df.items(): - array = value._values - top = array[left_index] - top = [top] - if df_nulls_length: - middle = array[left_indexer] - top.append(middle) - if right_nulls_length: - bottom = construct_1d_array_from_inferred_fill_value( - value=array[:1], length=right_nulls_length - ) - top.append(bottom) - if len(top) == 1: - top = top[0] - else: - top = concat_compat(top) - right_dict[key] = top - left_dict.update(right_dict) - return pd.DataFrame(left_dict, copy=False) + dictionary[name] = arr1 + + return pd.DataFrame(dictionary, copy=False) def get_join_indices(