Skip to content

Commit

Permalink
fix scipy2019 slides ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
samuel.oranyeli committed Sep 7, 2024
1 parent 8c4384d commit de3f958
Showing 1 changed file with 29 additions and 28 deletions.
57 changes: 29 additions & 28 deletions talks/scipy2019/slides.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,8 @@
}
],
"source": [
"from pyprojroot import here\n",
"import pandas as pd\n",
"from pyprojroot import here\n",
"\n",
"df = pd.read_excel(here() / \"examples/notebooks/dirty_data.xlsx\")\n",
"df\n",
Expand Down Expand Up @@ -1142,22 +1142,17 @@
}
],
"source": [
"import datetime as dt \n",
"import datetime as dt\n",
"\n",
"# Get the \"hire date\" into shape.\n",
"df[\"hire_date\"] = (\n",
" pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") \n",
" + dt.datetime(1899, 12, 30)\n",
"df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(\n",
" 1899, 12, 30\n",
")\n",
"\n",
"# Those certification columns don't look particularly good. Should just have one of them. \n",
"df['certification'] = df['certification'].combine_first(df['Certification.1'])\n",
"df = (\n",
" df\n",
" .drop(\n",
" [\"Certification.1\", \"Certification.2\"], \n",
" axis=1\n",
" )\n",
"# Those certification columns don't look particularly good.\n",
"# Should just have one of them.\n",
"df[\"certification\"] = df[\"certification\"].combine_first(df[\"Certification.1\"])\n",
"df = df.drop([\"Certification.1\", \"Certification.2\"], axis=1)\n",
"df\n",
"\n",
"# Next problem: Missing a column"
Expand Down Expand Up @@ -1389,9 +1384,13 @@
}
],
"source": [
"# Add a column for \"gratitude points\" given by students to the teachers.\n",
"# Add a column for \"gratitude points\"\n",
"# given by students to the teachers.\n",
"\n",
"gratitude_points = [10, 50, 20, 1000, 392, 115, 12, 182, 1190, 582, 25, 317]\n",
"\n",
"df = df.assign(gratitude_points=gratitude_points)\n",
"\n",
"df\n",
"\n",
"# Next problem: Might want to log-transform."
Expand Down Expand Up @@ -1653,7 +1652,9 @@
"import numpy as np\n",
"\n",
"# Finally, log10 transform the gratitude_points column.\n",
"\n",
"df[\"gratitude_points_log\"] = df[\"gratitude_points\"].apply(np.log10)\n",
"\n",
"df\n",
"\n",
"# So what does this code look like in totality?"
Expand All @@ -1672,7 +1673,8 @@
"df = (\n",
" pd.read_excel(\"../../examples/notebooks/dirty_data.xlsx\")\n",
" # Remove the empty column and empty row\n",
" .drop(\"do not edit! --->\", axis=1).drop(7, axis=0)\n",
" .drop(\"do not edit! --->\", axis=1)\n",
" .drop(7, axis=0)\n",
" .rename(\n",
" mapper={\n",
" \"First Name\": \"first_name\",\n",
Expand All @@ -1684,16 +1686,17 @@
" \"Full time?\": \"full_time\",\n",
" \"Certification\": \"certification\",\n",
" },\n",
" axis=1\n",
" axis=1,\n",
" )\n",
")\n",
"# Correct hire date.\n",
"df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(1899, 12, 30)\n",
"df[\"hire_date\"] = pd.TimedeltaIndex(df[\"hire_date\"], unit=\"d\") + dt.datetime(\n",
" 1899, 12, 30\n",
")\n",
"# Squash certification columns\n",
"df['certification'] = df['certification'].combine_first(df['Certification.1'])\n",
"df[\"certification\"] = df[\"certification\"].combine_first(df[\"Certification.1\"])\n",
"df = (\n",
" df\n",
" .drop([\"Certification.1\", \"Certification.2\"], axis=1)\n",
" df.drop([\"Certification.1\", \"Certification.2\"], axis=1)\n",
" # Add gratidude points.\n",
" .assign(gratitude_points=gratitude_points)\n",
")\n",
Expand Down Expand Up @@ -1759,7 +1762,6 @@
},
"outputs": [],
"source": [
"import janitor\n",
"# Yes, the import name is \"janitor\", \n",
"# but the package is \"pyjanitor\""
]
Expand Down Expand Up @@ -2133,10 +2135,11 @@
},
"outputs": [],
"source": [
"import pandas_flavor as pf\n",
"from numbers import Number\n",
"from functools import partial\n",
"\n",
"import pandas_flavor as pf\n",
"\n",
"\n",
"@pf.register_dataframe_method\n",
"def log_transform(df, column_name, base, dest_column_name=None):\n",
" \"\"\"\n",
Expand Down Expand Up @@ -2726,7 +2729,6 @@
}
],
"source": [
"import janitor.biology\n",
"\n",
"sequences = (\n",
" pd.read_csv(here() / \"tests/test_data/sequences.tsv\", sep=\"\\t\")\n",
Expand Down Expand Up @@ -2845,7 +2847,6 @@
}
],
"source": [
"import janitor.chemistry\n",
"\n",
"smiles = (\n",
" pd.read_csv(here() / \"tests/test_data/corrected_smiles.txt\", sep=\"\\t\", header=None)\n",
Expand Down Expand Up @@ -3154,9 +3155,9 @@
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "pyjanitor-dev",
"display_name": "base",
"language": "python",
"name": "pyjanitor-dev"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -3168,7 +3169,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.9.16"
},
"rise": {
"enable_chalkboard": true,
Expand Down

0 comments on commit de3f958

Please sign in to comment.