diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c438d85b7..90e766b87 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - "--config=pyproject.toml" - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.6 + rev: v0.6.1 hooks: - id: ruff args: [--fix] diff --git a/examples/notebooks/Pivot Data from Long to Wide Form.ipynb b/examples/notebooks/Pivot Data from Long to Wide Form.ipynb index 4ec840901..d19d93189 100644 --- a/examples/notebooks/Pivot Data from Long to Wide Form.ipynb +++ b/examples/notebooks/Pivot Data from Long to Wide Form.ipynb @@ -13,8 +13,7 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd \n", - "import janitor as jn " + "import pandas as pd\n" ] }, { diff --git a/examples/notebooks/Pivoting Data from Wide to Long.ipynb b/examples/notebooks/Pivoting Data from Wide to Long.ipynb index 7203100a8..1d202e078 100644 --- a/examples/notebooks/Pivoting Data from Wide to Long.ipynb +++ b/examples/notebooks/Pivoting Data from Wide to Long.ipynb @@ -13,10 +13,10 @@ "metadata": {}, "outputs": [], "source": [ - "import janitor\n", - "import pandas as pd\n", + "import re\n", + "\n", "import numpy as np\n", - "import re" + "import pandas as pd" ] }, { diff --git a/examples/notebooks/Row_to_Names.ipynb b/examples/notebooks/Row_to_Names.ipynb index 2852ffdbd..9562fb2fa 100644 --- a/examples/notebooks/Row_to_Names.ipynb +++ b/examples/notebooks/Row_to_Names.ipynb @@ -27,9 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import janitor\n", - "from io import StringIO" + "from io import StringIO\n", + "\n", + "import pandas as pd\n" ] }, { diff --git a/examples/notebooks/anime.ipynb b/examples/notebooks/anime.ipynb index 7f4eaff6a..fae98321d 100644 --- a/examples/notebooks/anime.ipynb +++ b/examples/notebooks/anime.ipynb @@ -44,7 +44,6 @@ "outputs": [], "source": [ "# Import pyjanitor and pandas\n", - "import janitor\n", "import pandas as pd\n", "import pandas_flavor as pf" ] @@ -57,6 +56,7 @@ "source": [ "# Suppress user warnings when we try overwriting our custom pandas flavor functions\n", "import warnings\n", + "\n", "warnings.filterwarnings('ignore')" ] }, diff --git a/examples/notebooks/bad_values.ipynb b/examples/notebooks/bad_values.ipynb index 0018c1aa8..cfcb9820a 100644 --- a/examples/notebooks/bad_values.ipynb +++ b/examples/notebooks/bad_values.ipynb @@ -19,9 +19,8 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import janitor\n", - "import numpy as np" + "import numpy as np\n", + "import pandas as pd\n" ] }, { diff --git a/examples/notebooks/bird_call.ipynb b/examples/notebooks/bird_call.ipynb index 06b32b128..ef3c765cb 100644 --- a/examples/notebooks/bird_call.ipynb +++ b/examples/notebooks/bird_call.ipynb @@ -38,8 +38,7 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import janitor" + "import pandas as pd\n" ] }, { diff --git a/examples/notebooks/board_games.ipynb b/examples/notebooks/board_games.ipynb index 43a0e65c2..7f44eaa0d 100644 --- a/examples/notebooks/board_games.ipynb +++ b/examples/notebooks/board_games.ipynb @@ -30,9 +30,8 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import janitor\n", - "import os" + "\n", + "import pandas as pd\n" ] }, { diff --git a/examples/notebooks/case_when.ipynb b/examples/notebooks/case_when.ipynb index 0c729e808..b1ae20492 100644 --- a/examples/notebooks/case_when.ipynb +++ b/examples/notebooks/case_when.ipynb @@ -2,41 +2,27 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "# Multiple Conditions with case_when" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 1, - "source": [ - "import pandas as pd\n", - "import janitor" - ], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "import pandas as pd\n" + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "# https://stackoverflow.com/q/19913659/7175713\n", - "df = pd.DataFrame({'col1': list('ABBC'), 'col2': list('ZZXY')})\n", - "\n", - "df" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " col1 col2\n", - "0 A Z\n", - "1 B Z\n", - "2 B X\n", - "3 C Y" - ], "text/html": [ "
\n", "\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
First NameLast NameEmployee StatusSubjectHire Date% AllocatedFull time?do not edit! --->CertificationCertification.1Certification.2Unnamed: 11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
", "text/plain": [ " First Name Last Name Employee Status Subject Hire Date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -224,12 +218,16 @@ "14 NaN NaN \n", "15 NaN NaN \n", "16 NaN NaN " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
First NameLast NameEmployee StatusSubjectHire Date% AllocatedFull time?do not edit! --->CertificationCertification.1Certification.2Unnamed: 11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
" + ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } + ], + "source": [ + "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n", + "df" ] }, { @@ -253,28 +251,13 @@ ] }, { - "source": [ - "cleaned_df = (\n", - " pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n", - " .clean_names()\n", - " .remove_empty()\n", - " .rename_column(\"%_allocated\", \"percent_allocated\")\n", - " .rename_column(\"full_time_\", \"full_time\")\n", - " .coalesce([\"certification\", \"certification_1\"], \"certification\")\n", - " .encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n", - " .convert_excel_date(\"hire_date\")\n", - " .reset_index(drop=True)\n", - ")\n", - "\n", - "cleaned_df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 3, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING
4DesusNiceAdministrationDean2013-06-061.00YesPENDING
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-12
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-12
8HedyLamarrTeacherScience1976-06-080.50NoPENDING
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 2008-08-30 \n", @@ -303,12 +286,27 @@ "9 NaN No Physical ed \n", "10 NaN No Political sci. \n", "11 0.80 No Vocal music " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING
4DesusNiceAdministrationDean2013-06-061.00YesPENDING
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-12
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-12
8HedyLamarrTeacherScience1976-06-080.50NoPENDING
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music
\n
" + ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } + ], + "source": [ + "cleaned_df = (\n", + " pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n", + " .clean_names()\n", + " .remove_empty()\n", + " .rename_column(\"%_allocated\", \"percent_allocated\")\n", + " .rename_column(\"full_time_\", \"full_time\")\n", + " .coalesce([\"certification\", \"certification_1\"], \"certification\")\n", + " .encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n", + " .convert_excel_date(\"hire_date\")\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "cleaned_df" ] }, { @@ -330,17 +328,13 @@ ] }, { - "source": [ - "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 4, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
First NameLast NameEmployee StatusSubjectHire Date% AllocatedFull time?do not edit! --->CertificationCertification.1Certification.2Unnamed: 11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
", "text/plain": [ " First Name Last Name Employee Status Subject Hire Date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -398,12 +392,16 @@ "14 NaN NaN \n", "15 NaN NaN \n", "16 NaN NaN " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
First NameLast NameEmployee StatusSubjectHire Date% AllocatedFull time?do not edit! --->CertificationCertification.1Certification.2Unnamed: 11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
" + ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } + ], + "source": [ + "df = pd.read_excel('dirty_data.xlsx', engine='openpyxl')\n", + "df" ] }, { @@ -414,17 +412,13 @@ ] }, { - "source": [ - "df = df.clean_names()\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 5, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_date%_allocatedfull_time_do_not_edit!_>certificationcertification_1certification_2unnamed_11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -482,12 +476,16 @@ "14 NaN NaN \n", "15 NaN NaN \n", "16 NaN NaN " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_date%_allocatedfull_time_do_not_edit!_>certificationcertification_1certification_2unnamed_11
0JasonBourneTeacherPE39690.00.75YesNaNPhysical edTheaterNaNNaN
1JasonBourneTeacherDrafting39690.00.25YesNaNPhysical edTheaterNaNNaN
2AliciaKeysTeacherMusic37118.01.00YesNaNInstr. musicVocal musicNaNNaN
3AdaLovelaceTeacherNaN27515.01.00YesNaNPENDINGComputersNaNNaN
4DesusNiceAdministrationDean41431.01.00YesNaNPENDINGNaNNaNNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesNaNScience 6-12PhysicsNaNNaN
6Chien-ShiungWuTeacherChemistry11037.00.50YesNaNScience 6-12PhysicsNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
8JamesJoyceTeacherEnglish32994.00.50NoNaNNaNEnglish 6-12NaNNaN
9HedyLamarrTeacherScience27919.00.50NoNaNPENDINGNaNNaNNaN
10CarlosBoozerCoachBasketball42221.0NaNNoNaNPhysical edNaNNaNNaN
11YoungBoozerCoachNaN34700.0NaNNoNaNNaNPolitical sci.NaNNaN
12MichealLarsenTeacherEnglish40071.00.80NoNaNVocal musicEnglishNaNNaN
13NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n
" + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } + ], + "source": [ + "df = df.clean_names()\n", + "df" ] }, { @@ -498,17 +496,13 @@ ] }, { - "source": [ - "df = df.remove_empty()\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 6, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_date%_allocatedfull_time_certificationcertification_1
0JasonBourneTeacherPE39690.00.75YesPhysical edTheater
1JasonBourneTeacherDrafting39690.00.25YesPhysical edTheater
2AliciaKeysTeacherMusic37118.01.00YesInstr. musicVocal music
3AdaLovelaceTeacherNaN27515.01.00YesPENDINGComputers
4DesusNiceAdministrationDean41431.01.00YesPENDINGNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12Physics
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12Physics
7JamesJoyceTeacherEnglish32994.00.50NoNaNEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDINGNaN
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical edNaN
10YoungBoozerCoachNaN34700.0NaNNoNaNPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal musicEnglish
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -537,12 +531,16 @@ "9 NaN No Physical ed NaN \n", "10 NaN No NaN Political sci. \n", "11 0.80 No Vocal music English " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_date%_allocatedfull_time_certificationcertification_1
0JasonBourneTeacherPE39690.00.75YesPhysical edTheater
1JasonBourneTeacherDrafting39690.00.25YesPhysical edTheater
2AliciaKeysTeacherMusic37118.01.00YesInstr. musicVocal music
3AdaLovelaceTeacherNaN27515.01.00YesPENDINGComputers
4DesusNiceAdministrationDean41431.01.00YesPENDINGNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12Physics
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12Physics
7JamesJoyceTeacherEnglish32994.00.50NoNaNEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDINGNaN
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical edNaN
10YoungBoozerCoachNaN34700.0NaNNoNaNPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal musicEnglish
\n
" + ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } + ], + "source": [ + "df = df.remove_empty()\n", + "df" ] }, { @@ -553,20 +551,13 @@ ] }, { - "source": [ - "df = (\n", - " df.rename_column(\"%_allocated\", \"percent_allocated\")\n", - " .rename_column(\"full_time_\", \"full_time\")\n", - ")\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 7, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertificationcertification_1
0JasonBourneTeacherPE39690.00.75YesPhysical edTheater
1JasonBourneTeacherDrafting39690.00.25YesPhysical edTheater
2AliciaKeysTeacherMusic37118.01.00YesInstr. musicVocal music
3AdaLovelaceTeacherNaN27515.01.00YesPENDINGComputers
4DesusNiceAdministrationDean41431.01.00YesPENDINGNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12Physics
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12Physics
7JamesJoyceTeacherEnglish32994.00.50NoNaNEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDINGNaN
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical edNaN
10YoungBoozerCoachNaN34700.0NaNNoNaNPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal musicEnglish
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -595,12 +586,19 @@ "9 NaN No Physical ed NaN \n", "10 NaN No NaN Political sci. \n", "11 0.80 No Vocal music English " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertificationcertification_1
0JasonBourneTeacherPE39690.00.75YesPhysical edTheater
1JasonBourneTeacherDrafting39690.00.25YesPhysical edTheater
2AliciaKeysTeacherMusic37118.01.00YesInstr. musicVocal music
3AdaLovelaceTeacherNaN27515.01.00YesPENDINGComputers
4DesusNiceAdministrationDean41431.01.00YesPENDINGNaN
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12Physics
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12Physics
7JamesJoyceTeacherEnglish32994.00.50NoNaNEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDINGNaN
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical edNaN
10YoungBoozerCoachNaN34700.0NaNNoNaNPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal musicEnglish
\n
" + ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" } + ], + "source": [ + "df = (\n", + " df.rename_column(\"%_allocated\", \"percent_allocated\")\n", + " .rename_column(\"full_time_\", \"full_time\")\n", + ")\n", + "df" ] }, { @@ -611,17 +609,13 @@ ] }, { - "source": [ - "df = df.coalesce([\"certification\", \"certification_1\"], \"certification\")\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 8, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE39690.00.75YesPhysical ed
1JasonBourneTeacherDrafting39690.00.25YesPhysical ed
2AliciaKeysTeacherMusic37118.01.00YesInstr. music
3AdaLovelaceTeacherNaN27515.01.00YesPENDING
4DesusNiceAdministrationDean41431.01.00YesPENDING
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12
7JamesJoyceTeacherEnglish32994.00.50NoEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDING
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical ed
10YoungBoozerCoachNaN34700.0NaNNoPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal music
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 39690.0 \n", @@ -650,12 +644,16 @@ "9 NaN No Physical ed \n", "10 NaN No Political sci. \n", "11 0.80 No Vocal music " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE39690.00.75YesPhysical ed
1JasonBourneTeacherDrafting39690.00.25YesPhysical ed
2AliciaKeysTeacherMusic37118.01.00YesInstr. music
3AdaLovelaceTeacherNaN27515.01.00YesPENDING
4DesusNiceAdministrationDean41431.01.00YesPENDING
5Chien-ShiungWuTeacherPhysics11037.00.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry11037.00.50YesScience 6-12
7JamesJoyceTeacherEnglish32994.00.50NoEnglish 6-12
8HedyLamarrTeacherScience27919.00.50NoPENDING
9CarlosBoozerCoachBasketball42221.0NaNNoPhysical ed
10YoungBoozerCoachNaN34700.0NaNNoPolitical sci.
11MichealLarsenTeacherEnglish40071.00.80NoVocal music
\n
" + ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } + ], + "source": [ + "df = df.coalesce([\"certification\", \"certification_1\"], \"certification\")\n", + "df" ] }, { @@ -666,15 +664,11 @@ ] }, { - "source": [ - "df.dtypes" - ], "cell_type": "code", - "metadata": {}, "execution_count": 9, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "first_name object\n", @@ -688,22 +682,21 @@ "dtype: object" ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } + ], + "source": [ + "df.dtypes" ] }, { - "source": [ - "df.encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n", - "df.dtypes" - ], "cell_type": "code", - "metadata": {}, "execution_count": 10, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "first_name object\n", @@ -717,9 +710,14 @@ "dtype: object" ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } + ], + "source": [ + "df.encode_categorical([\"subject\", \"employee_status\", \"full_time\"])\n", + "df.dtypes" ] }, { @@ -730,17 +728,13 @@ ] }, { - "source": [ - "df.convert_excel_date(\"hire_date\")\n", - "df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 11, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING
4DesusNiceAdministrationDean2013-06-061.00YesPENDING
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-12
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-12
8HedyLamarrTeacherScience1976-06-080.50NoPENDING
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 2008-08-30 \n", @@ -769,12 +763,16 @@ "9 NaN No Physical ed \n", "10 NaN No Political sci. \n", "11 0.80 No Vocal music " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertification
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING
4DesusNiceAdministrationDean2013-06-061.00YesPENDING
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-12
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-12
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-12
8HedyLamarrTeacherScience1976-06-080.50NoPENDING
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music
\n
" + ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } + ], + "source": [ + "df.convert_excel_date(\"hire_date\")\n", + "df" ] }, { @@ -792,26 +790,13 @@ ] }, { - "source": [ - "data_df = (\n", - " cleaned_df\n", - " .copy()\n", - " .add_columns(\n", - " lucky_number=np.random.randint(0, 10, len(cleaned_df)),\n", - " age=np.random.randint(10, 100, len(cleaned_df)),\n", - " employee_of_month_count=np.random.randint(0, 5, len(cleaned_df))\n", - " )\n", - ")\n", - "\n", - "data_df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 12, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertificationlucky_numberageemployee_of_month_count
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed6740
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed6904
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music2470
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING9760
4DesusNiceAdministrationDean2013-06-061.00YesPENDING4692
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-127613
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-124470
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-129343
8HedyLamarrTeacherScience1976-06-080.50NoPENDING6230
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed1213
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.1831
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music5812
\n
", "text/plain": [ " first_name last_name employee_status subject hire_date \\\n", "0 Jason Bourne Teacher PE 2008-08-30 \n", @@ -854,12 +839,25 @@ "9 3 \n", "10 1 \n", "11 2 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
first_namelast_nameemployee_statussubjecthire_datepercent_allocatedfull_timecertificationlucky_numberageemployee_of_month_count
0JasonBourneTeacherPE2008-08-300.75YesPhysical ed6740
1JasonBourneTeacherDrafting2008-08-300.25YesPhysical ed6904
2AliciaKeysTeacherMusic2001-08-151.00YesInstr. music2470
3AdaLovelaceTeacherNaN1975-05-011.00YesPENDING9760
4DesusNiceAdministrationDean2013-06-061.00YesPENDING4692
5Chien-ShiungWuTeacherPhysics1930-03-200.50YesScience 6-127613
6Chien-ShiungWuTeacherChemistry1930-03-200.50YesScience 6-124470
7JamesJoyceTeacherEnglish1990-05-010.50NoEnglish 6-129343
8HedyLamarrTeacherScience1976-06-080.50NoPENDING6230
9CarlosBoozerCoachBasketball2015-08-05NaNNoPhysical ed1213
10YoungBoozerCoachNaN1995-01-01NaNNoPolitical sci.1831
11MichealLarsenTeacherEnglish2009-09-150.80NoVocal music5812
\n
" + ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } + ], + "source": [ + "data_df = (\n", + " cleaned_df\n", + " .copy()\n", + " .add_columns(\n", + " lucky_number=np.random.randint(0, 10, len(cleaned_df)),\n", + " age=np.random.randint(10, 100, len(cleaned_df)),\n", + " employee_of_month_count=np.random.randint(0, 5, len(cleaned_df))\n", + " )\n", + ")\n", + "\n", + "data_df" ] }, { @@ -870,23 +868,13 @@ ] }, { - "source": [ - "stats_df = (\n", - " data_df.groupby('employee_status')\n", - " .agg(['mean', 'median'])\n", - " .collapse_levels()\n", - " .reset_index()\n", - ")\n", - "\n", - "stats_df" - ], "cell_type": "code", - "metadata": {}, "execution_count": 13, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
employee_statuspercent_allocated_meanpercent_allocated_medianlucky_number_meanlucky_number_medianage_meanage_medianemployee_of_month_count_meanemployee_of_month_count_median
0Administration1.0000001.04469.000000692.0000002
1CoachNaNNaN1152.000000522.0000002
2Teacher0.6444440.56659.222222611.3333330
\n
", "text/plain": [ " employee_status percent_allocated_mean percent_allocated_median \\\n", "0 Administration 1.000000 1.0 \n", @@ -902,12 +890,22 @@ "0 2.000000 2 \n", "1 2.000000 2 \n", "2 1.333333 0 " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
employee_statuspercent_allocated_meanpercent_allocated_medianlucky_number_meanlucky_number_medianage_meanage_medianemployee_of_month_count_meanemployee_of_month_count_median
0Administration1.0000001.04469.000000692.0000002
1CoachNaNNaN1152.000000522.0000002
2Teacher0.6444440.56659.222222611.3333330
\n
" + ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } + ], + "source": [ + "stats_df = (\n", + " data_df.groupby('employee_status')\n", + " .agg(['mean', 'median'])\n", + " .collapse_levels()\n", + " .reset_index()\n", + ")\n", + "\n", + "stats_df" ] } ], diff --git a/examples/notebooks/select_columns.ipynb b/examples/notebooks/select_columns.ipynb index 88a9e1a37..09d410833 100644 --- a/examples/notebooks/select_columns.ipynb +++ b/examples/notebooks/select_columns.ipynb @@ -13,13 +13,14 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import janitor\n", - "import numpy as np\n", "import datetime\n", "import re\n", - "from janitor import patterns\n", - "from pandas.api.types import is_datetime64_dtype" + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from pandas.api.types import is_datetime64_dtype\n", + "\n", + "from janitor import patterns" ] }, { diff --git a/examples/notebooks/sort_columns.ipynb b/examples/notebooks/sort_columns.ipynb index efbb2bc30..c143e2f24 100644 --- a/examples/notebooks/sort_columns.ipynb +++ b/examples/notebooks/sort_columns.ipynb @@ -13,9 +13,8 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd \n", - "import janitor\n", - "from numpy import nan" + "import pandas as pd\n", + "from numpy import nan\n" ] }, { diff --git a/examples/notebooks/sort_naturally.ipynb b/examples/notebooks/sort_naturally.ipynb index 672683650..806392c6c 100644 --- a/examples/notebooks/sort_naturally.ipynb +++ b/examples/notebooks/sort_naturally.ipynb @@ -13,9 +13,7 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas_flavor as pf\n", - "import pandas as pd\n", - "import janitor" + "import pandas as pd" ] }, { diff --git a/examples/notebooks/teacher_pupil.ipynb b/examples/notebooks/teacher_pupil.ipynb index f2a38206e..3c62df006 100644 --- a/examples/notebooks/teacher_pupil.ipynb +++ b/examples/notebooks/teacher_pupil.ipynb @@ -151,7 +151,6 @@ } ], "source": [ - "import janitor\n", "import pandas as pd\n", "import pandas_flavor as pf\n", "\n", diff --git a/examples/notebooks/transform_column.ipynb b/examples/notebooks/transform_column.ipynb index fe53a2550..5a2b112f0 100644 --- a/examples/notebooks/transform_column.ipynb +++ b/examples/notebooks/transform_column.ipynb @@ -36,9 +36,8 @@ "metadata": {}, "outputs": [], "source": [ - "import janitor\n", - "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "import pandas as pd" ] }, { @@ -148,6 +147,7 @@ "source": [ "from random import choice\n", "\n", + "\n", "def make_strings(length: int):\n", " return \"\".join(choice(\"ABCDEFGHIJKLMNOPQRSTUVWXYZ\") for _ in range(length))\n", "\n",