diff --git a/talks/scipy2019/slides.ipynb b/talks/scipy2019/slides.ipynb index cead8b889..bf14fb799 100644 --- a/talks/scipy2019/slides.ipynb +++ b/talks/scipy2019/slides.ipynb @@ -398,8 +398,8 @@ } ], "source": [ - "from pyprojroot import here\n", "import pandas as pd\n", + "from pyprojroot import here\n", "\n", "df = pd.read_excel(here() / \"examples/notebooks/dirty_data.xlsx\")\n", "df\n", @@ -1142,22 +1142,17 @@ } ], "source": [ - "import datetime as dt \n", + "import datetime as dt\n", "\n", "# Get the \"hire date\" into shape.\n", - "df[\"hire_date\"] = (\n", - " pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") \n", - " + dt.datetime(1899, 12, 30)\n", + "df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(\n", + " 1899, 12, 30\n", ")\n", "\n", - "# Those certification columns don't look particularly good. Should just have one of them. \n", - "df['certification'] = df['certification'].combine_first(df['Certification.1'])\n", - "df = (\n", - " df\n", - " .drop(\n", - " [\"Certification.1\", \"Certification.2\"], \n", - " axis=1\n", - " )\n", + "# Those certification columns don't look particularly good.\n", + "# Should just have one of them.\n", + "df[\"certification\"] = df[\"certification\"].combine_first(df[\"Certification.1\"])\n", + "df = df.drop([\"Certification.1\", \"Certification.2\"], axis=1)\n", "df\n", "\n", "# Next problem: Missing a column" @@ -1389,9 +1384,13 @@ } ], "source": [ - "# Add a column for \"gratitude points\" given by students to the teachers.\n", + "# Add a column for \"gratitude points\"\n", + "# given by students to the teachers.\n", + "\n", "gratitude_points = [10, 50, 20, 1000, 392, 115, 12, 182, 1190, 582, 25, 317]\n", + "\n", "df = df.assign(gratitude_points=gratitude_points)\n", + "\n", "df\n", "\n", "# Next problem: Might want to log-transform." @@ -1653,7 +1652,9 @@ "import numpy as np\n", "\n", "# Finally, log10 transform the gratitude_points column.\n", + "\n", "df[\"gratitude_points_log\"] = df[\"gratitude_points\"].apply(np.log10)\n", + "\n", "df\n", "\n", "# So what does this code look like in totality?" @@ -1672,7 +1673,8 @@ "df = (\n", " pd.read_excel(\"../../examples/notebooks/dirty_data.xlsx\")\n", " # Remove the empty column and empty row\n", - " .drop(\"do not edit! --->\", axis=1).drop(7, axis=0)\n", + " .drop(\"do not edit! --->\", axis=1)\n", + " .drop(7, axis=0)\n", " .rename(\n", " mapper={\n", " \"First Name\": \"first_name\",\n", @@ -1684,16 +1686,17 @@ " \"Full time?\": \"full_time\",\n", " \"Certification\": \"certification\",\n", " },\n", - " axis=1\n", + " axis=1,\n", " )\n", ")\n", "# Correct hire date.\n", - "df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(1899, 12, 30)\n", + "df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(\n", + " 1899, 12, 30\n", + ")\n", "# Squash certification columns\n", - "df['certification'] = df['certification'].combine_first(df['Certification.1'])\n", + "df[\"certification\"] = df[\"certification\"].combine_first(df[\"Certification.1\"])\n", "df = (\n", - " df\n", - " .drop([\"Certification.1\", \"Certification.2\"], axis=1)\n", + " df.drop([\"Certification.1\", \"Certification.2\"], axis=1)\n", " # Add gratidude points.\n", " .assign(gratitude_points=gratitude_points)\n", ")\n", @@ -1759,7 +1762,6 @@ }, "outputs": [], "source": [ - "import janitor\n", "# Yes, the import name is \"janitor\", \n", "# but the package is \"pyjanitor\"" ] @@ -2133,10 +2135,11 @@ }, "outputs": [], "source": [ - "import pandas_flavor as pf\n", - "from numbers import Number\n", "from functools import partial\n", "\n", + "import pandas_flavor as pf\n", + "\n", + "\n", "@pf.register_dataframe_method\n", "def log_transform(df, column_name, base, dest_column_name=None):\n", " \"\"\"\n", @@ -2726,7 +2729,6 @@ } ], "source": [ - "import janitor.biology\n", "\n", "sequences = (\n", " pd.read_csv(here() / \"tests/test_data/sequences.tsv\", sep=\"\\t\")\n", @@ -2845,7 +2847,6 @@ } ], "source": [ - "import janitor.chemistry\n", "\n", "smiles = (\n", " pd.read_csv(here() / \"tests/test_data/corrected_smiles.txt\", sep=\"\\t\", header=None)\n", @@ -3154,9 +3155,9 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "pyjanitor-dev", + "display_name": "base", "language": "python", - "name": "pyjanitor-dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -3168,7 +3169,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.16" }, "rise": { "enable_chalkboard": true,