pyjanitor-devs · ericmjl · Jun 20, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
@@ -1,12 +1,13 @@
-from .dataframe import PolarsDataFrame
-from .expressions import PolarsExpr
-from .lazyframe import PolarsLazyFrame
-from .pivot_longer import pivot_longer_spec
+from .clean_names import clean_names, make_clean_names
+from .complete import complete
+from .pivot_longer import pivot_longer, pivot_longer_spec
+from .row_to_names import row_to_names
 
 __all__ = [
     "pivot_longer_spec",
+    "pivot_longer",
     "clean_names",
-    "PolarsDataFrame",
-    "PolarsLazyFrame",
-    "PolarsExpr",
+    "make_clean_names",
+    "row_to_names",
+    "complete",
 ]
diff --git a/janitor/polars/clean_names.py b/janitor/polars/clean_names.py
@@ -15,6 +15,12 @@
 )
 from janitor.utils import import_message
 
+from .polars_flavor import (
+    register_dataframe_method,
+    register_expr_method,
+    register_lazyframe_method,
+)
+
 try:
     import polars as pl
 except ImportError:
@@ -26,6 +32,162 @@
     )
 
 
+@register_lazyframe_method
+@register_dataframe_method
+def clean_names(
+    df: pl.DataFrame | pl.LazyFrame,
+    strip_underscores: str | bool = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    truncate_limit: int = None,
+) -> pl.DataFrame | pl.LazyFrame:
+    """
+    Clean the column names in a polars DataFrame.
+
+    `clean_names` can also be applied to a LazyFrame.
+
+    Examples:
+        >>> import polars as pl
+        >>> import janitor.polars
+        >>> df = pl.DataFrame(
+        ...     {
+        ...         "Aloha": range(3),
+        ...         "Bell Chart": range(3),
+        ...         "Animals@#$%^": range(3)
+        ...     }
+        ... )
+        >>> df
+        shape: (3, 3)
+        ┌───────┬────────────┬──────────────┐
+        │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
+        │ ---   ┆ ---        ┆ ---          │
+        │ i64   ┆ i64        ┆ i64          │
+        ╞═══════╪════════════╪══════════════╡
+        │ 0     ┆ 0          ┆ 0            │
+        │ 1     ┆ 1          ┆ 1            │
+        │ 2     ┆ 2          ┆ 2            │
+        └───────┴────────────┴──────────────┘
+        >>> df.clean_names(remove_special=True)
+        shape: (3, 3)
+        ┌───────┬────────────┬─────────┐
+        │ aloha ┆ bell_chart ┆ animals │
+        │ ---   ┆ ---        ┆ ---     │
+        │ i64   ┆ i64        ┆ i64     │
+        ╞═══════╪════════════╪═════════╡
+        │ 0     ┆ 0          ┆ 0       │
+        │ 1     ┆ 1          ┆ 1       │
+        │ 2     ┆ 2          ┆ 2       │
+        └───────┴────────────┴─────────┘
+
+    !!! info "New in version 0.28.0"
+
+    Args:
+        strip_underscores: Removes the outer underscores from all
+            column names. Default None keeps outer underscores. Values can be
+            either 'left', 'right' or 'both' or the respective shorthand 'l',
+            'r' and True.
+        case_type: Whether to make the column names lower or uppercase.
+            Current case may be preserved with 'preserve',
+            while snake case conversion (from CamelCase or camelCase only)
+            can be turned on using "snake".
+            Default 'lower' makes all characters lowercase.
+        remove_special: Remove special characters from the column names.
+            Only letters, numbers and underscores are preserved.
+        strip_accents: Whether or not to remove accents from
+            the labels.
+        truncate_limit: Truncates formatted column names to
+            the specified length. Default None does not truncate.
+
+    Returns:
+        A polars DataFrame/LazyFrame.
+    """  # noqa: E501
+    return df.rename(
+        lambda col: _clean_column_names(
+            obj=col,
+            strip_accents=strip_accents,
+            strip_underscores=strip_underscores,
+            case_type=case_type,
+            remove_special=remove_special,
+            truncate_limit=truncate_limit,
+        )
+    )
+
+
+@register_expr_method
+def make_clean_names(
+    expression,
+    strip_underscores: str | bool = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    enforce_string: bool = False,
+    truncate_limit: int = None,
+) -> pl.Expr:
+    """
+    Clean the labels in a polars Expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import janitor.polars
+        >>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
+        >>> df
+        shape: (1, 1)
+        ┌─────────────┐
+        │ raw         │
+        │ ---         │
+        │ str         │
+        ╞═════════════╡
+        │ Abçdê fgí j │
+        └─────────────┘
+
+        Clean the column values:
+        >>> df.with_columns(pl.col("raw").make_clean_names(strip_accents=True))
+        shape: (1, 1)
+        ┌─────────────┐
+        │ raw         │
+        │ ---         │
+        │ str         │
+        ╞═════════════╡
+        │ abcde_fgi_j │
+        └─────────────┘
+
+    !!! info "New in version 0.28.0"
+
+    Args:
+        strip_underscores: Removes the outer underscores
+            from all labels in the expression.
+            Default None keeps outer underscores.
+            Values can be either 'left', 'right'
+            or 'both' or the respective shorthand 'l',
+            'r' and True.
+        case_type: Whether to make the labels in the expression lower or uppercase.
+            Current case may be preserved with 'preserve',
+            while snake case conversion (from CamelCase or camelCase only)
+            can be turned on using "snake".
+            Default 'lower' makes all characters lowercase.
+        remove_special: Remove special characters from the values in the expression.
+            Only letters, numbers and underscores are preserved.
+        strip_accents: Whether or not to remove accents from
+            the expression.
+        enforce_string: Whether or not to cast the expression to a string type.
+        truncate_limit: Truncates formatted labels in the expression to
+            the specified length. Default None does not truncate.
+
+    Returns:
+        A polars Expression.
+    """
+    return _clean_expr_names(
+        obj=expression,
+        strip_accents=strip_accents,
+        strip_underscores=strip_underscores,
+        case_type=case_type,
+        remove_special=remove_special,
+        enforce_string=enforce_string,
+        truncate_limit=truncate_limit,
+    )
+
+
 def _change_case_expr(
     obj: pl.Expr,
     case_type: str,