diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c438d85b7..90e766b87 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,7 +35,7 @@ repos:
- "--config=pyproject.toml"
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
- rev: v0.5.6
+ rev: v0.6.1
hooks:
- id: ruff
args: [--fix]
diff --git a/examples/notebooks/Pivot Data from Long to Wide Form.ipynb b/examples/notebooks/Pivot Data from Long to Wide Form.ipynb
index 4ec840901..d19d93189 100644
--- a/examples/notebooks/Pivot Data from Long to Wide Form.ipynb
+++ b/examples/notebooks/Pivot Data from Long to Wide Form.ipynb
@@ -13,8 +13,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd \n",
- "import janitor as jn "
+ "import pandas as pd\n"
]
},
{
diff --git a/examples/notebooks/Pivoting Data from Wide to Long.ipynb b/examples/notebooks/Pivoting Data from Wide to Long.ipynb
index 7203100a8..1d202e078 100644
--- a/examples/notebooks/Pivoting Data from Wide to Long.ipynb
+++ b/examples/notebooks/Pivoting Data from Wide to Long.ipynb
@@ -13,10 +13,10 @@
"metadata": {},
"outputs": [],
"source": [
- "import janitor\n",
- "import pandas as pd\n",
+ "import re\n",
+ "\n",
"import numpy as np\n",
- "import re"
+ "import pandas as pd"
]
},
{
diff --git a/examples/notebooks/Row_to_Names.ipynb b/examples/notebooks/Row_to_Names.ipynb
index 2852ffdbd..9562fb2fa 100644
--- a/examples/notebooks/Row_to_Names.ipynb
+++ b/examples/notebooks/Row_to_Names.ipynb
@@ -27,9 +27,9 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import janitor\n",
- "from io import StringIO"
+ "from io import StringIO\n",
+ "\n",
+ "import pandas as pd\n"
]
},
{
diff --git a/examples/notebooks/anime.ipynb b/examples/notebooks/anime.ipynb
index 7f4eaff6a..fae98321d 100644
--- a/examples/notebooks/anime.ipynb
+++ b/examples/notebooks/anime.ipynb
@@ -44,7 +44,6 @@
"outputs": [],
"source": [
"# Import pyjanitor and pandas\n",
- "import janitor\n",
"import pandas as pd\n",
"import pandas_flavor as pf"
]
@@ -57,6 +56,7 @@
"source": [
"# Suppress user warnings when we try overwriting our custom pandas flavor functions\n",
"import warnings\n",
+ "\n",
"warnings.filterwarnings('ignore')"
]
},
diff --git a/examples/notebooks/bad_values.ipynb b/examples/notebooks/bad_values.ipynb
index 0018c1aa8..cfcb9820a 100644
--- a/examples/notebooks/bad_values.ipynb
+++ b/examples/notebooks/bad_values.ipynb
@@ -19,9 +19,8 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import janitor\n",
- "import numpy as np"
+ "import numpy as np\n",
+ "import pandas as pd\n"
]
},
{
diff --git a/examples/notebooks/bird_call.ipynb b/examples/notebooks/bird_call.ipynb
index 06b32b128..ef3c765cb 100644
--- a/examples/notebooks/bird_call.ipynb
+++ b/examples/notebooks/bird_call.ipynb
@@ -38,8 +38,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import janitor"
+ "import pandas as pd\n"
]
},
{
diff --git a/examples/notebooks/board_games.ipynb b/examples/notebooks/board_games.ipynb
index 43a0e65c2..7f44eaa0d 100644
--- a/examples/notebooks/board_games.ipynb
+++ b/examples/notebooks/board_games.ipynb
@@ -30,9 +30,8 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import janitor\n",
- "import os"
+ "\n",
+ "import pandas as pd\n"
]
},
{
diff --git a/examples/notebooks/case_when.ipynb b/examples/notebooks/case_when.ipynb
index 0c729e808..b1ae20492 100644
--- a/examples/notebooks/case_when.ipynb
+++ b/examples/notebooks/case_when.ipynb
@@ -2,41 +2,27 @@
"cells": [
{
"cell_type": "markdown",
+ "metadata": {},
"source": [
"# Multiple Conditions with case_when"
- ],
- "metadata": {}
+ ]
},
{
"cell_type": "code",
"execution_count": 1,
- "source": [
- "import pandas as pd\n",
- "import janitor"
- ],
+ "metadata": {},
"outputs": [],
- "metadata": {}
+ "source": [
+ "import pandas as pd\n"
+ ]
},
{
"cell_type": "code",
"execution_count": 2,
- "source": [
- "# https://stackoverflow.com/q/19913659/7175713\n",
- "df = pd.DataFrame({'col1': list('ABBC'), 'col2': list('ZZXY')})\n",
- "\n",
- "df"
- ],
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " col1 col2\n",
- "0 A Z\n",
- "1 B Z\n",
- "2 B X\n",
- "3 C Y"
- ],
"text/html": [
"
\n",
"\n
\n \n \n | \n First Name | \n Last Name | \n Employee Status | \n Subject | \n Hire Date | \n % Allocated | \n Full time? | \n do not edit! ---> | \n Certification | \n Certification.1 | \n Certification.2 | \n Unnamed: 11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
",
"text/plain": [
" First Name Last Name Employee Status Subject Hire Date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -224,12 +218,16 @@
"14 NaN NaN \n",
"15 NaN NaN \n",
"16 NaN NaN "
- ],
- "text/html": "\n\n
\n \n \n | \n First Name | \n Last Name | \n Employee Status | \n Subject | \n Hire Date | \n % Allocated | \n Full time? | \n do not edit! ---> | \n Certification | \n Certification.1 | \n Certification.2 | \n Unnamed: 11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 2,
"metadata": {},
- "execution_count": 2
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n",
+ "df"
]
},
{
@@ -253,28 +251,13 @@
]
},
{
- "source": [
- "cleaned_df = (\n",
- " pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n",
- " .clean_names()\n",
- " .remove_empty()\n",
- " .rename_column(\"%_allocated\", \"percent_allocated\")\n",
- " .rename_column(\"full_time_\", \"full_time\")\n",
- " .coalesce([\"certification\", \"certification_1\"], \"certification\")\n",
- " .encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n",
- " .convert_excel_date(\"hire_date\")\n",
- " .reset_index(drop=True)\n",
- ")\n",
- "\n",
- "cleaned_df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 3,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 2008-08-30 \n",
@@ -303,12 +286,27 @@
"9 NaN No Physical ed \n",
"10 NaN No Political sci. \n",
"11 0.80 No Vocal music "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 3,
"metadata": {},
- "execution_count": 3
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "cleaned_df = (\n",
+ " pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n",
+ " .clean_names()\n",
+ " .remove_empty()\n",
+ " .rename_column(\"%_allocated\", \"percent_allocated\")\n",
+ " .rename_column(\"full_time_\", \"full_time\")\n",
+ " .coalesce([\"certification\", \"certification_1\"], \"certification\")\n",
+ " .encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n",
+ " .convert_excel_date(\"hire_date\")\n",
+ " .reset_index(drop=True)\n",
+ ")\n",
+ "\n",
+ "cleaned_df"
]
},
{
@@ -330,17 +328,13 @@
]
},
{
- "source": [
- "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 4,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n First Name | \n Last Name | \n Employee Status | \n Subject | \n Hire Date | \n % Allocated | \n Full time? | \n do not edit! ---> | \n Certification | \n Certification.1 | \n Certification.2 | \n Unnamed: 11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
",
"text/plain": [
" First Name Last Name Employee Status Subject Hire Date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -398,12 +392,16 @@
"14 NaN NaN \n",
"15 NaN NaN \n",
"16 NaN NaN "
- ],
- "text/html": "\n\n
\n \n \n | \n First Name | \n Last Name | \n Employee Status | \n Subject | \n Hire Date | \n % Allocated | \n Full time? | \n do not edit! ---> | \n Certification | \n Certification.1 | \n Certification.2 | \n Unnamed: 11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 4,
"metadata": {},
- "execution_count": 4
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n",
+ "df"
]
},
{
@@ -414,17 +412,13 @@
]
},
{
- "source": [
- "df = df.clean_names()\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 5,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n %_allocated | \n full_time_ | \n do_not_edit!_> | \n certification | \n certification_1 | \n certification_2 | \n unnamed_11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -482,12 +476,16 @@
"14 NaN NaN \n",
"15 NaN NaN \n",
"16 NaN NaN "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n %_allocated | \n full_time_ | \n do_not_edit!_> | \n certification | \n certification_1 | \n certification_2 | \n unnamed_11 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n NaN | \n Physical ed | \n Theater | \n NaN | \n NaN | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n NaN | \n Instr. music | \n Vocal music | \n NaN | \n NaN | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n Computers | \n NaN | \n NaN | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n NaN | \n Science 6-12 | \n Physics | \n NaN | \n NaN | \n
\n \n 7 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 8 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n NaN | \n English 6-12 | \n NaN | \n NaN | \n
\n \n 9 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n NaN | \n PENDING | \n NaN | \n NaN | \n NaN | \n
\n \n 10 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n NaN | \n Physical ed | \n NaN | \n NaN | \n NaN | \n
\n \n 11 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n NaN | \n Political sci. | \n NaN | \n NaN | \n
\n \n 12 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n NaN | \n Vocal music | \n English | \n NaN | \n NaN | \n
\n \n 13 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 14 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 15 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n 16 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 5,
"metadata": {},
- "execution_count": 5
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = df.clean_names()\n",
+ "df"
]
},
{
@@ -498,17 +496,13 @@
]
},
{
- "source": [
- "df = df.remove_empty()\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 6,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n %_allocated | \n full_time_ | \n certification | \n certification_1 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n Vocal music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n Computers | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n NaN | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n NaN | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n English | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -537,12 +531,16 @@
"9 NaN No Physical ed NaN \n",
"10 NaN No NaN Political sci. \n",
"11 0.80 No Vocal music English "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n %_allocated | \n full_time_ | \n certification | \n certification_1 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n Vocal music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n Computers | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n NaN | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n NaN | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n English | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 6,
"metadata": {},
- "execution_count": 6
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = df.remove_empty()\n",
+ "df"
]
},
{
@@ -553,20 +551,13 @@
]
},
{
- "source": [
- "df = (\n",
- " df.rename_column(\"%_allocated\", \"percent_allocated\")\n",
- " .rename_column(\"full_time_\", \"full_time\")\n",
- ")\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 7,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n certification_1 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n Vocal music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n Computers | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n NaN | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n NaN | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n English | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -595,12 +586,19 @@
"9 NaN No Physical ed NaN \n",
"10 NaN No NaN Political sci. \n",
"11 0.80 No Vocal music English "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n certification_1 | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n Theater | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n Vocal music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n Computers | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n NaN | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n Physics | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n NaN | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n NaN | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n NaN | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n NaN | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n English | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 7,
"metadata": {},
- "execution_count": 7
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = (\n",
+ " df.rename_column(\"%_allocated\", \"percent_allocated\")\n",
+ " .rename_column(\"full_time_\", \"full_time\")\n",
+ ")\n",
+ "df"
]
},
{
@@ -611,17 +609,13 @@
]
},
{
- "source": [
- "df = df.coalesce([\"certification\", \"certification_1\"], \"certification\")\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 8,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 39690.0 \n",
@@ -650,12 +644,16 @@
"9 NaN No Physical ed \n",
"10 NaN No Political sci. \n",
"11 0.80 No Vocal music "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 39690.0 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 39690.0 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 37118.0 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 27515.0 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 41431.0 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 11037.0 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 32994.0 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 27919.0 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 42221.0 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 34700.0 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 40071.0 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 8,
"metadata": {},
- "execution_count": 8
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df = df.coalesce([\"certification\", \"certification_1\"], \"certification\")\n",
+ "df"
]
},
{
@@ -666,15 +664,11 @@
]
},
{
- "source": [
- "df.dtypes"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 9,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"first_name object\n",
@@ -688,22 +682,21 @@
"dtype: object"
]
},
+ "execution_count": 9,
"metadata": {},
- "execution_count": 9
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df.dtypes"
]
},
{
- "source": [
- "df.encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n",
- "df.dtypes"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 10,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"first_name object\n",
@@ -717,9 +710,14 @@
"dtype: object"
]
},
+ "execution_count": 10,
"metadata": {},
- "execution_count": 10
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df.encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n",
+ "df.dtypes"
]
},
{
@@ -730,17 +728,13 @@
]
},
{
- "source": [
- "df.convert_excel_date(\"hire_date\")\n",
- "df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 11,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 2008-08-30 \n",
@@ -769,12 +763,16 @@
"9 NaN No Physical ed \n",
"10 NaN No Political sci. \n",
"11 0.80 No Vocal music "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 11,
"metadata": {},
- "execution_count": 11
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "df.convert_excel_date(\"hire_date\")\n",
+ "df"
]
},
{
@@ -792,26 +790,13 @@
]
},
{
- "source": [
- "data_df = (\n",
- " cleaned_df\n",
- " .copy()\n",
- " .add_columns(\n",
- " lucky_number=np.random.randint(0, 10, len(cleaned_df)),\n",
- " age=np.random.randint(10, 100, len(cleaned_df)),\n",
- " employee_of_month_count=np.random.randint(0, 5, len(cleaned_df))\n",
- " )\n",
- ")\n",
- "\n",
- "data_df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 12,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n lucky_number | \n age | \n employee_of_month_count | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n 6 | \n 74 | \n 0 | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n 6 | \n 90 | \n 4 | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n 2 | \n 47 | \n 0 | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n 9 | \n 76 | \n 0 | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n 4 | \n 69 | \n 2 | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n 7 | \n 61 | \n 3 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n 4 | \n 47 | \n 0 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n 9 | \n 34 | \n 3 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n 6 | \n 23 | \n 0 | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n 1 | \n 21 | \n 3 | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n 1 | \n 83 | \n 1 | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n 5 | \n 81 | \n 2 | \n
\n \n
\n
",
"text/plain": [
" first_name last_name employee_status subject hire_date \\\n",
"0 Jason Bourne Teacher PE 2008-08-30 \n",
@@ -854,12 +839,25 @@
"9 3 \n",
"10 1 \n",
"11 2 "
- ],
- "text/html": "\n\n
\n \n \n | \n first_name | \n last_name | \n employee_status | \n subject | \n hire_date | \n percent_allocated | \n full_time | \n certification | \n lucky_number | \n age | \n employee_of_month_count | \n
\n \n \n \n 0 | \n Jason | \n Bourne | \n Teacher | \n PE | \n 2008-08-30 | \n 0.75 | \n Yes | \n Physical ed | \n 6 | \n 74 | \n 0 | \n
\n \n 1 | \n Jason | \n Bourne | \n Teacher | \n Drafting | \n 2008-08-30 | \n 0.25 | \n Yes | \n Physical ed | \n 6 | \n 90 | \n 4 | \n
\n \n 2 | \n Alicia | \n Keys | \n Teacher | \n Music | \n 2001-08-15 | \n 1.00 | \n Yes | \n Instr. music | \n 2 | \n 47 | \n 0 | \n
\n \n 3 | \n Ada | \n Lovelace | \n Teacher | \n NaN | \n 1975-05-01 | \n 1.00 | \n Yes | \n PENDING | \n 9 | \n 76 | \n 0 | \n
\n \n 4 | \n Desus | \n Nice | \n Administration | \n Dean | \n 2013-06-06 | \n 1.00 | \n Yes | \n PENDING | \n 4 | \n 69 | \n 2 | \n
\n \n 5 | \n Chien-Shiung | \n Wu | \n Teacher | \n Physics | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n 7 | \n 61 | \n 3 | \n
\n \n 6 | \n Chien-Shiung | \n Wu | \n Teacher | \n Chemistry | \n 1930-03-20 | \n 0.50 | \n Yes | \n Science 6-12 | \n 4 | \n 47 | \n 0 | \n
\n \n 7 | \n James | \n Joyce | \n Teacher | \n English | \n 1990-05-01 | \n 0.50 | \n No | \n English 6-12 | \n 9 | \n 34 | \n 3 | \n
\n \n 8 | \n Hedy | \n Lamarr | \n Teacher | \n Science | \n 1976-06-08 | \n 0.50 | \n No | \n PENDING | \n 6 | \n 23 | \n 0 | \n
\n \n 9 | \n Carlos | \n Boozer | \n Coach | \n Basketball | \n 2015-08-05 | \n NaN | \n No | \n Physical ed | \n 1 | \n 21 | \n 3 | \n
\n \n 10 | \n Young | \n Boozer | \n Coach | \n NaN | \n 1995-01-01 | \n NaN | \n No | \n Political sci. | \n 1 | \n 83 | \n 1 | \n
\n \n 11 | \n Micheal | \n Larsen | \n Teacher | \n English | \n 2009-09-15 | \n 0.80 | \n No | \n Vocal music | \n 5 | \n 81 | \n 2 | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 12,
"metadata": {},
- "execution_count": 12
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "data_df = (\n",
+ " cleaned_df\n",
+ " .copy()\n",
+ " .add_columns(\n",
+ " lucky_number=np.random.randint(0, 10, len(cleaned_df)),\n",
+ " age=np.random.randint(10, 100, len(cleaned_df)),\n",
+ " employee_of_month_count=np.random.randint(0, 5, len(cleaned_df))\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "data_df"
]
},
{
@@ -870,23 +868,13 @@
]
},
{
- "source": [
- "stats_df = (\n",
- " data_df.groupby('employee_status')\n",
- " .agg(['mean', 'median'])\n",
- " .collapse_levels()\n",
- " .reset_index()\n",
- ")\n",
- "\n",
- "stats_df"
- ],
"cell_type": "code",
- "metadata": {},
"execution_count": 13,
+ "metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
+ "text/html": "\n\n
\n \n \n | \n employee_status | \n percent_allocated_mean | \n percent_allocated_median | \n lucky_number_mean | \n lucky_number_median | \n age_mean | \n age_median | \n employee_of_month_count_mean | \n employee_of_month_count_median | \n
\n \n \n \n 0 | \n Administration | \n 1.000000 | \n 1.0 | \n 4 | \n 4 | \n 69.000000 | \n 69 | \n 2.000000 | \n 2 | \n
\n \n 1 | \n Coach | \n NaN | \n NaN | \n 1 | \n 1 | \n 52.000000 | \n 52 | \n 2.000000 | \n 2 | \n
\n \n 2 | \n Teacher | \n 0.644444 | \n 0.5 | \n 6 | \n 6 | \n 59.222222 | \n 61 | \n 1.333333 | \n 0 | \n
\n \n
\n
",
"text/plain": [
" employee_status percent_allocated_mean percent_allocated_median \\\n",
"0 Administration 1.000000 1.0 \n",
@@ -902,12 +890,22 @@
"0 2.000000 2 \n",
"1 2.000000 2 \n",
"2 1.333333 0 "
- ],
- "text/html": "\n\n
\n \n \n | \n employee_status | \n percent_allocated_mean | \n percent_allocated_median | \n lucky_number_mean | \n lucky_number_median | \n age_mean | \n age_median | \n employee_of_month_count_mean | \n employee_of_month_count_median | \n
\n \n \n \n 0 | \n Administration | \n 1.000000 | \n 1.0 | \n 4 | \n 4 | \n 69.000000 | \n 69 | \n 2.000000 | \n 2 | \n
\n \n 1 | \n Coach | \n NaN | \n NaN | \n 1 | \n 1 | \n 52.000000 | \n 52 | \n 2.000000 | \n 2 | \n
\n \n 2 | \n Teacher | \n 0.644444 | \n 0.5 | \n 6 | \n 6 | \n 59.222222 | \n 61 | \n 1.333333 | \n 0 | \n
\n \n
\n
"
+ ]
},
+ "execution_count": 13,
"metadata": {},
- "execution_count": 13
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "stats_df = (\n",
+ " data_df.groupby('employee_status')\n",
+ " .agg(['mean', 'median'])\n",
+ " .collapse_levels()\n",
+ " .reset_index()\n",
+ ")\n",
+ "\n",
+ "stats_df"
]
}
],
diff --git a/examples/notebooks/select_columns.ipynb b/examples/notebooks/select_columns.ipynb
index 88a9e1a37..09d410833 100644
--- a/examples/notebooks/select_columns.ipynb
+++ b/examples/notebooks/select_columns.ipynb
@@ -13,13 +13,14 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import janitor\n",
- "import numpy as np\n",
"import datetime\n",
"import re\n",
- "from janitor import patterns\n",
- "from pandas.api.types import is_datetime64_dtype"
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pandas.api.types import is_datetime64_dtype\n",
+ "\n",
+ "from janitor import patterns"
]
},
{
diff --git a/examples/notebooks/sort_columns.ipynb b/examples/notebooks/sort_columns.ipynb
index efbb2bc30..c143e2f24 100644
--- a/examples/notebooks/sort_columns.ipynb
+++ b/examples/notebooks/sort_columns.ipynb
@@ -13,9 +13,8 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd \n",
- "import janitor\n",
- "from numpy import nan"
+ "import pandas as pd\n",
+ "from numpy import nan\n"
]
},
{
diff --git a/examples/notebooks/sort_naturally.ipynb b/examples/notebooks/sort_naturally.ipynb
index 672683650..806392c6c 100644
--- a/examples/notebooks/sort_naturally.ipynb
+++ b/examples/notebooks/sort_naturally.ipynb
@@ -13,9 +13,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas_flavor as pf\n",
- "import pandas as pd\n",
- "import janitor"
+ "import pandas as pd"
]
},
{
diff --git a/examples/notebooks/teacher_pupil.ipynb b/examples/notebooks/teacher_pupil.ipynb
index f2a38206e..3c62df006 100644
--- a/examples/notebooks/teacher_pupil.ipynb
+++ b/examples/notebooks/teacher_pupil.ipynb
@@ -151,7 +151,6 @@
}
],
"source": [
- "import janitor\n",
"import pandas as pd\n",
"import pandas_flavor as pf\n",
"\n",
diff --git a/examples/notebooks/transform_column.ipynb b/examples/notebooks/transform_column.ipynb
index fe53a2550..5a2b112f0 100644
--- a/examples/notebooks/transform_column.ipynb
+++ b/examples/notebooks/transform_column.ipynb
@@ -36,9 +36,8 @@
"metadata": {},
"outputs": [],
"source": [
- "import janitor\n",
- "import pandas as pd\n",
- "import numpy as np"
+ "import numpy as np\n",
+ "import pandas as pd"
]
},
{
@@ -148,6 +147,7 @@
"source": [
"from random import choice\n",
"\n",
+ "\n",
"def make_strings(length: int):\n",
" return \"\".join(choice(\"ABCDEFGHIJKLMNOPQRSTUVWXYZ\") for _ in range(length))\n",
"\n",