diff --git a/docs/examples/dplyr/dplyr.ipynb b/docs/examples/dplyr/dplyr.ipynb
index 394b79a..621285f 100644
--- a/docs/examples/dplyr/dplyr.ipynb
+++ b/docs/examples/dplyr/dplyr.ipynb
@@ -5,12 +5,6 @@
"metadata": {},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
@@ -36,7 +30,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"library(dplyr)"
@@ -58,7 +56,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> summarize(avg = mean(mpg)) "
@@ -75,7 +77,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> count(cyl) "
@@ -141,7 +147,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> filter(mpg > 20)"
@@ -157,7 +167,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> distinct(gear)"
@@ -173,7 +187,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> slice(10:15)"
@@ -189,7 +207,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> slice_sample(n = 5, replace = TRUE)"
@@ -205,7 +227,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> slice_min(mpg, prop = 0.25)"
@@ -221,7 +247,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> slice_head(n = 5) "
@@ -257,7 +287,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> arrange(mpg)\n",
@@ -277,7 +311,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"cars |> add_row(speed = 1, dist = 1)"
@@ -301,7 +339,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> pull(wt)"
@@ -317,7 +359,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> select(mpg, wt)"
@@ -333,7 +379,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> relocate(mpg, cyl, after = last_col())"
@@ -349,7 +399,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> select(mpg:cyl)"
@@ -375,7 +429,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"df <- tibble(x_1 = c(1, 2), x_2 = c(3, 4), y = c(4, 5))"
@@ -391,7 +449,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"df |> summarize(across(everything(), mean))"
@@ -407,7 +469,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"df |> \n",
@@ -429,7 +495,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> mutate(gpm = 1 / mpg)\n",
@@ -446,7 +516,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"mtcars |> rename(miles_per_gallon = mpg)"
@@ -580,7 +654,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"a <- rownames_to_column(mtcars, var = \"C\")"
@@ -596,7 +674,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"column_to_rownames(a, var = \"C\")"
@@ -620,7 +702,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"x <- tribble(\n",
@@ -677,7 +763,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"left_join(x, y, by = join_by(A))\n",
@@ -694,7 +784,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"left_join(x, y, by = join_by(C == D))"
@@ -710,7 +804,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"left_join(x, y, by = join_by(C == D), suffix = c(\"1\", \"2\"))"
diff --git a/docs/examples/dplyr/dplyr.md b/docs/examples/dplyr/dplyr.md
new file mode 100644
index 0000000..5585ec8
--- /dev/null
+++ b/docs/examples/dplyr/dplyr.md
@@ -0,0 +1,437 @@
+
+
+# Dplyr
+
+
![numpy logo](https://rstudio.github.io/cheatsheets/html/images/logo-dplyr.png)
+
+
+[dplyr](https://dplyr.tidyverse.org/) functions work with pipes and expect **tidy data**. In tidy data:
+
+* Each **variable** is in its own **column**
+* Each **observation**, or **case**, is in its own **row**
+* **pipes** `x |> f(y)` becomes `f(x,y)`
+
+
+```R
+library(dplyr)
+```
+
+Summarize Cases
+-----------------------------------
+
+Apply **summary** functions to columns to create a new table of summary statistics. Summary functions take vectors as input and return one value back (see Summary Functions).
+
+* `summarize(.data, ...)`: Compute table of summaries.
+
+
+
+```R
+mtcars |> summarize(avg = mean(mpg))
+```
+
+* `count(.data, ..., wt = NULL, sort = FLASE, name = NULL)`: Count number of rows in each group defined by the variables in `...`. Also `tally()`, `add_count()`, and `add_tally()`.
+
+
+
+```R
+mtcars |> count(cyl)
+```
+
+Group Cases
+---------------------------
+
+* Use `group_by(.data, ..., .add = FALSE, .drop = TRUE)` to created a “grouped” copy of a table grouped by columns in `...`. dplyr functions will manipulate each “group” separately and combine the results.
+
+
+mtcars |>
+ group_by(cyl) |>
+ summarize(avg = mean(mpg))
+
+* Use `rowwise(.data, ...)` to group data into individual rows. dplyr functions will compute results for each row. Also apply functions to list-columns. See tidyr cheatsheet for list-column workflow.
+
+starwars |>
+ rowwise() |>
+ mutate(film_count = length(films))
+
+* `ungroup(x, ...)`: Returns ungrouped copy of table.
+
+Manipulate Cases
+-------------------------------------
+
+### Extract Cases
+
+Row functions return a subset of rows as a new table.
+
+* `filter(.data, ..., .preserve = FALSE)`: Extract rows that meet logical criteria.
+
+
+```R
+mtcars |> filter(mpg > 20)
+```
+
+* `distinct(.data, ..., .keep_all = FALSE)`: Remove rows with duplicate values.
+
+
+```R
+mtcars |> distinct(gear)
+```
+
+* `slice(.data, ...,, .preserve = FALSE)`: Select rows by position.
+
+
+```R
+mtcars |> slice(10:15)
+```
+
+* `slice_sample(.data, ..., n, prop, weight_by = NULL, replace = FALSE)`: Randomly select rows. Use `n` to select a number of rows and `prop` to select a fraction of rows.
+
+
+```R
+mtcars |> slice_sample(n = 5, replace = TRUE)
+```
+
+* `slice_min(.data, order_by, ..., n, prop, with_ties = TRUE)` and `slice_max()`: Select rows with the lowest and highest values.
+
+
+```R
+mtcars |> slice_min(mpg, prop = 0.25)
+```
+
+* `slice_head(.data, ..., n, prop)` and `slice_tail()`: Select the first or last rows.
+
+
+```R
+mtcars |> slice_head(n = 5)
+```
+
+#### Logical and boolean operations to use with `filter()`
+
+* `==`
+* `<`
+* `<=`
+* `is.na()`
+* `%in%`
+* `|`
+* `xor()`
+* `!=`
+* `>`
+* `>=`
+* `!is.na()`
+* `!`
+* `&`
+* See `?base::Logic` and `?Comparison` for help.
+
+### Arrange cases
+
+* `arrange(.data, ..., .by_group = FALSE)`: Order rows by values of a column or columns (low to high), use with `desc()` to order from high to low.
+
+
+
+```R
+mtcars |> arrange(mpg)
+mtcars |> arrange(desc(mpg))
+```
+
+### Add Cases
+
+* `add_row(.data, ..., .before = NULL, .after = NULL)`: Add one or more rows to a table.
+
+
+
+```R
+cars |> add_row(speed = 1, dist = 1)
+```
+
+Manipulate Variables
+---------------------------------------------
+
+### Extract Variables
+
+Column functions return a set of columns as a new vector or table.
+
+* `pull(.data, var = -1, name = NULL, ...)`: Extract column values as a vector, by name or index.
+
+
+
+```R
+mtcars |> pull(wt)
+```
+
+* `select(.data, ...)`: Extract columns as a table.
+
+
+```R
+mtcars |> select(mpg, wt)
+```
+
+* `relocate(.data, ..., .before = NULL, .after = NULL)`: Move columns to new position.
+
+
+```R
+mtcars |> relocate(mpg, cyl, after = last_col())
+```
+
+#### Use these helpers with `select()` and `across()`
+
+
+```R
+mtcars |> select(mpg:cyl)
+```
+
+* `contains(match)`
+* `num_range(prefix, range)`
+* `:`, e.g., `mpg:cyl`
+* `ends_with(match)`
+* `all_of(x)` or `any_of(x, ..., vars)`
+* `!`, e.g., `!gear`
+* `starts_with(match)`
+* `matches(match)`
+* `everything()`
+
+### Manipulate Multiple Variables at Once
+
+
+```R
+df <- tibble(x_1 = c(1, 2), x_2 = c(3, 4), y = c(4, 5))
+```
+
+* `across(.cols, .fun, ..., .name = NULL)`: summarize or mutate multiple columns in the same way.
+
+
+```R
+df |> summarize(across(everything(), mean))
+```
+
+* `c_across(.cols)`: Compute across columns in row-wise data.
+
+
+```R
+df |>
+ rowwise() |>
+ mutate(x_total = sum(c_across(1:2)))
+```
+
+### Make New Variables
+
+Apply **vectorized functions** to columns. Vectorized functions take vectors as input and return vectors of the same length as output (see Vectorized Functions).
+
+* `mutate(.data, ..., .keep = "all", .before = NULL, .after = NULL)`: Compute new column(s). Also `add_column()`.
+
+
+```R
+mtcars |> mutate(gpm = 1 / mpg)
+mtcars |> mutate(mtcars, gpm = 1 / mpg, .keep = "none")
+```
+
+* `rename(.data, ...)`: Rename columns. Use `rename_with()` to rename with a function.
+
+
+```R
+mtcars |> rename(miles_per_gallon = mpg)
+```
+
+Vectorized Functions
+---------------------------------------------
+
+### To Use with `mutate()`
+
+`mutate()` applies vectorized functions to columns to create new columns. Vectorized functions take vectors as input and return vectors of the same length as output.
+
+### Offset
+
+* `dplyr::lag()`: offset elements by 1
+* `dplyr::lead()`: offset elements by -1
+
+### Cumulative Aggregate
+
+* `dplyr::cumall()`: cumulative `all()`
+* `dply::cumany()`: cumulative `any()`
+* `cummax()`: cumulative `max()`
+* `dplyr::cummean()`: cumulative `mean()`
+* `cummin()`: cumulative `min()`
+* `cumprod()`: cumulative `prod()`
+* `cumsum()`: cumulative `sum()`
+
+### Ranking
+
+* `dplyr::cume_dist()`: proportion of all values <=
+* `dplyr::dense_rank()`: rank with ties = min, no gaps
+* `dplyr::min_rank()`: rank with ties = min
+* `dplyr::ntile()`: bins into n bins
+* `dplyr::percent_rank()`: `min_rank()` scaled to \[0,1\]
+* `dplyr::row_number()`: rank with ties = “first”
+
+### Math
+
+* `+`, `-`, `/`, `^`, `%/%`, `%%`: arithmetic ops
+* `log()`, `log2()`, `log10()`: logs
+* `<`, `<=`, `>`, `>=`, `!=`, `==`: logical comparisons
+* `dplyr::between()`: x >= left & x <= right
+* `dplyr::near()`: safe `==` for floating point numbers
+
+### Miscellaneous
+
+* `dplyr::case_when()`: multi-case `if_else()`
+
+ starwars |>
+ mutate(type = case_when(
+ height > 200 | mass > 200 ~ "large",
+ species == "Droid" ~ "robot",
+ TRUE ~ "other"
+ ))
+
+* `dplyr::coalesce()`: first non-NA values by element across a set of vectors
+
+* `dplyr::if_else()`: element-wise if() + else()
+
+* `dplyr::na_if()`: replace specific values with NA
+
+* `pmax()`: element-wise max()
+
+* `pmin()`: element-wise min()
+
+
+Summary Functions
+---------------------------------------
+
+### To Use with `summarize()`
+
+`summarize()` applies summary functions to columns to create a new table. Summary functions take vectors as input and return single values as output.
+
+### Count
+
+* `dplyr::n()`: number of values/rows
+* `dplyr::n_distinct()`: # of uniques
+* `sum(!is.na())`: # of non-NAs
+
+### Position
+
+* `mean()`: mean, also `mean(!is.na())`
+* `median()`: median
+
+### Logical
+
+* `mean()`: proportion of TRUEs
+* `sum()`: # of TRUEs
+
+### Order
+
+* `dplyr::first()`: first value
+* `dplyr::last()`: last value
+* `dplyr::nth()`: value in the nth location of vector
+
+### Rank
+
+* `quantile()`: nth quantile
+* `min()`: minimum value
+* `max()`: maximum value
+
+### Spread
+
+* `IQR()`: Inter-Quartile Range
+* `mad()`: median absolute deviation
+* `sd()`: standard deviation
+* `var()`: variance
+
+Row Names
+-----------------------
+
+Tidy data does not use rownames, which store a variable outside of the columns. To work with the rownames, first move them into a column.
+
+* `tibble::rownames_to_column()`: Move row names into col.
+
+
+```R
+a <- rownames_to_column(mtcars, var = "C")
+```
+
+* `tibble::columns_to_rownames()`: Move col into row names.
+
+
+```R
+column_to_rownames(a, var = "C")
+```
+
+* Also `tibble::has_rownames()` and `tibble::remove_rownames()`.
+
+Combine Tables
+---------------------------------
+
+
+```R
+x <- tribble(
+ ~A, ~B, ~C,
+ "a", "t", 1,
+ "b", "u", 2,
+ "c", "v", 3
+)
+
+y <- tribble(
+ ~A, ~B, ~D,
+ "a", "t", 3,
+ "b", "u", 2,
+ "d", "w", 1
+)
+```
+
+### Combine Variables
+
+* `bind_cols(..., .name_repair)`: Returns tables placed side by side as a single table. Column lengths must be equal. Columns will NOT be matched by id (to do that look at Relational Data below), so be sure to check that both tables are ordered the way you want before binding.
+
+### Combine Cases
+
+* `bind_rows(..., .id = NULL)`: Returns tables one on top of the other as a single table. Set `.id` to a column name to add a column of the original table names.
+
+### Relational Data
+
+Use a **“Mutating Join”** to join one table to columns from another, matching values with the rows that the correspond to. Each join retains a different combination of values from the tables.
+
+* `left_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = "na")`: Join matching values from `y` to `x`.
+* `right_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = "na")`: Join matching values from `x` to `y`.
+* `inner_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = "na")`: Join data. retain only rows with matches.
+* `full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = "na")`: Join data. Retain all values, all rows.
+
+Use a **“Filtering Join”** to filter one table against the rows of another.
+
+* `semi_join(x, y, by = NULL, copy = FALSE, ..., na_matches = "na")`: Return rows of `x` that have a match in `y`. Use to see what will be included in a join.
+* `anti_join(x, y, by = NULL, copy = FALSE, ..., na_matches = "na")`: Return rows of `x` that do not have a match in `y`. Use to see what will not be included in a join.
+
+Use a **“Nest Join”** to inner join one table to another into a nested data frame.
+
+* `nest_join(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...)`: Join data, nesting matches from `y` in a single new data frame column.
+
+### Column Matching for Joins
+
+* Use `by = join_by(col1, col2, …)` to specify one or more common columns to match on.
+
+
+
+```R
+left_join(x, y, by = join_by(A))
+left_join(x, y, by = join_by(A, B))
+```
+
+* Use a logical statement, `by = join_by(col1 == col2)`, to match on columns that have different names in each table.
+
+
+```R
+left_join(x, y, by = join_by(C == D))
+```
+
+* Use `suffix` to specify the suffix to give to unmatched columns that have the same name in both tables.
+
+
+```R
+left_join(x, y, by = join_by(C == D), suffix = c("1", "2"))
+```
+
+### Set Operations
+
+* `intersect(x, y, ...)`: Rows that appear in both `x` and `y`.
+* `setdiff(x, y, ...)`: Rows that appear in `x` but not `y`.
+* `union(x, y, ...)`: Rows that appear in x or y, duplicates removed. `union_all()` retains duplicates.
+* Use `setequal()` to test whether two data sets contain the exact same rows (in any order).
diff --git a/docs/examples/forcats/forcats.ipynb b/docs/examples/forcats/forcats.ipynb
index ea6146c..d6bcd39 100644
--- a/docs/examples/forcats/forcats.ipynb
+++ b/docs/examples/forcats/forcats.ipynb
@@ -7,12 +7,6 @@
},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
@@ -37,7 +31,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -83,7 +80,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -105,7 +105,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -131,7 +134,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -151,7 +157,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -171,7 +180,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -195,7 +207,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -218,7 +233,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -241,7 +259,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -262,7 +283,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -284,7 +308,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -305,7 +332,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -327,7 +357,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -348,7 +381,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -369,7 +405,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -390,7 +429,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -418,7 +460,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -440,7 +485,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -461,7 +509,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -482,7 +533,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -503,7 +557,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -527,7 +584,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -549,7 +609,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
@@ -570,7 +633,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "YuptQWIi4uk1"
+ "id": "YuptQWIi4uk1",
+ "vscode": {
+ "languageId": "r"
+ }
},
"outputs": [],
"source": [
diff --git a/docs/examples/forcats/forcats.md b/docs/examples/forcats/forcats.md
new file mode 100644
index 0000000..1ff8943
--- /dev/null
+++ b/docs/examples/forcats/forcats.md
@@ -0,0 +1,252 @@
+
+
+# Forcats
+
+
![numpy logo](https://rstudio.github.io/cheatsheets/html/images/logo-forcats.png)
+
+
+The [forcats](https://forcats.tidyverse.org/) package provides tools for working with factors, which are R’s data structure for categorical data.
+
+
+```R
+library(forcats)
+```
+
+Factors
+-------------------
+
+R represents categorical data with factors. A **factor** is an integer vector with a **levels** attribute that stores a set of mappings between integers and categorical values. When you view a factor, R displays not the integers but the levels associated with them.
+
+For example, R will display `c("a", "c", "b", "a")` with levels `c("a", "b", "c")` but will store `c(1, 3, 2, 1)` where 1 = a, 2 = b, and 3 = c.
+
+R will display:
+
+```R
+[1] a c b a
+Levels: a b c
+```
+
+
+R will store:
+```R
+[1] 1 3 2 1
+attr(,"levels")
+[1] "a" "b" "c"
+```
+
+
+Create a factor with `factor()`:
+
+* `factor(x = character(), levels, labels = levels, exclude = NA, ordered = is.ordered(x), nmax = NA)`: Convert a vector to a factor. Also `as_factor()`.
+
+
+
+```R
+f <- factor(c("a", "c", "b", "a"), levels = c("a", "b", "c"))
+```
+
+Return its levels with `levels()`:
+
+* `levels(x)`: Return/set the levels of a factor.
+
+
+```R
+levels(f)
+levels(f) <- c("x", "y", "z")
+```
+
+Use `unclass()` to see its structure.
+
+Inspect Factors
+-----------------------------------
+
+* `fct_count(f, sort = FALSE, prop = FALSE)`: Count the number of values with each level.
+
+
+```R
+fct_count(f)
+```
+
+* `fct_match(f, lvls)`: Check for `lvls` in `f`.
+
+
+```R
+fct_match(f, "a")
+```
+
+* `fct_unique(f)`: Return the unique values, removing duplicates.
+
+
+```R
+fct_unique(f)
+```
+
+Combine Factors
+-----------------------------------
+
+* `fct_c(...)`: Combine factors with different levels. Also `fct_cross()`.
+
+
+
+```R
+f1 <- factor(c("a", "c"))
+f2 <- factor(c("b", "a"))
+fct_c(f1, f2)
+```
+
+* `fct_unify(fs, levels = lvls_union(fs))`: Standardize levels across a list of factors.
+
+
+
+```R
+fct_unify(list(f2, f1))
+```
+
+Change the order of levels
+---------------------------------------------------------
+
+* `fct_relevel(.f, ..., after = 0L)`: Manually reorder factor levels.
+
+
+```R
+fct_relevel(f, c("b", "c", "a"))
+```
+
+* `fct_infreq(f, ordered = NA)`: Reorder levels by the frequency in which they appear in the data (highest frequency first). Also `fct_inseq()`.
+
+
+
+```R
+f3 <- factor(c("c", "c", "a"))
+fct_infreq(f3)
+```
+
+* `fct_inorder(f, ordered = NA)`: Reorder levels by order in which they appear in the data.
+
+
+
+```R
+fct_inorder(f2)
+```
+
+* `fct_rev(f)`: Reverse level order.
+
+
+
+```R
+f4 <- factor(c("a","b","c"))
+fct_rev(f4)
+```
+
+* `fct_shift(f)`: Shift levels to left or right, wrapping around end.
+
+
+
+```R
+fct_shift(f4)
+```
+
+* `fct_shuffle(f, n = 1L)`: Randomly permute order of factor levels.
+
+
+
+```R
+fct_shuffle(f4)
+```
+
+* `fct_reorder(.f, .x, .fun = median, ..., .desc = FALSE)`: Reorder levels by their relationship with another variable.
+
+
+
+```R
+boxplot(PlantGrowth, weight ~ fct_reorder(group, weight))
+```
+
+* `fct_reorder2(.f, .x, .y, .fun = last2, ..., .desc = TRUE)`: Reorder levels by their final values when plotted with two other variables.
+
+
+
+```R
+ggplot(
+ diamonds,
+ aes(carat, price, color = fct_reorder2(color, carat, price))
+ ) +
+ geom_smooth()
+```
+
+Change the value of levels
+---------------------------------------------------------
+
+* `fct_recode(.f, ...)`: Manually change levels. Also `fct_relabel()` which obeys `purrr::map` syntax to apply a function or expression to each level.
+
+
+
+```R
+fct_recode(f, v = "a", x = "b", z = "c")
+fct_relabel(f, ~ paste0("x", .x))
+```
+
+* `fct_anon(f, prefix = "")`: Anonymize levels with random integers.
+
+
+
+```R
+fct_anon(f)
+```
+
+* `fct_collapse(.f, …, other_level = NULL)`: Collapse levels into manually defined groups.
+
+
+
+```R
+fct_collapse(f, x = c("a", "b"))
+```
+
+* `fct_lump_min(f, min, w = NULL, other_level = "Other")`: Lumps together factors that appear fewer than `min` times. Also `fct_lump_n()`, `fct_lump_prop()`, and `fct_lump_lowfreq()`.
+
+
+
+```R
+fct_lump_min(f, min = 2)
+```
+
+* `fct_other(f, keep, drop, other_level = "Other")`: Replace levels with “other.”
+
+
+
+```R
+fct_other(f, keep = c("a", "b"))
+```
+
+Add or drop levels
+-----------------------------------------
+
+* `fct_drop(f, only)`: Drop unused levels.
+
+
+
+```R
+f5 <- factor(c("a","b"),c("a","b","x"))
+f6 <- fct_drop(f5)
+```
+
+* `fct_expand(f, ...)`: Add levels to a factor.
+
+
+
+```R
+fct_expand(f6, "x")
+```
+
+* `fct_na_value_to_level(f, level = "(Missing)")`: Assigns a level to NAs to ensure they appear in plots, etc.
+
+
+
+```R
+f <- factor(c("a", "b", NA))
+fct_na_value_to_level(f, level = "(Missing)")
+```
diff --git a/docs/examples/ggplot2/ggplot2.ipynb b/docs/examples/ggplot2/ggplot2.ipynb
index e9a4c06..4806241 100644
--- a/docs/examples/ggplot2/ggplot2.ipynb
+++ b/docs/examples/ggplot2/ggplot2.ipynb
@@ -5,12 +5,6 @@
"metadata": {},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
@@ -31,7 +25,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"library(ggplot2)"
@@ -93,7 +91,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"a <- ggplot(economics, aes(date, unemploy))"
@@ -102,7 +104,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"b <- ggplot(seals, aes(x = long, y = lat))"
@@ -146,7 +152,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"c <- ggplot(mpg, aes(hwy))\n",
@@ -176,7 +186,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"d <- ggplot(mpg, aes(fl))"
@@ -194,7 +208,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"e <- ggplot(mpg, aes(cty, hwy))"
@@ -223,7 +241,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"f <- ggplot(mpg, aes(class, hwy))"
@@ -248,7 +270,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"g <- ggplot(diamonds, aes(cut, color))"
@@ -269,7 +295,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"h <- ggplot(diamonds, aes(carat, price))"
@@ -292,7 +322,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"i <- ggplot(economics, aes(date, unemploy))"
@@ -315,7 +349,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"df <- data.frame(grp = c(\"A\", \"B\"), fit = 4:5, se = 1:2)\n",
@@ -341,7 +379,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"murder_data <- data.frame(\n",
@@ -364,7 +406,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"seals$z <- with(seals, sqrt(delta_long^2 + delta_lat^2))\n",
@@ -397,7 +443,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"i + stat_density_2d(aes(fill = after_stat(level)), geom = \"polygon\")"
@@ -465,7 +515,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"n <- d + geom_bar(aes(fill = fl))\n",
@@ -530,7 +584,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"o <- c + geom_dotplot(aes(fill = ..x..))"
@@ -555,7 +613,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"p <- e + geom_point(aes(shape = fl, size = cyl))"
@@ -583,7 +645,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"u <- d + geom_bar()"
@@ -615,7 +681,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"s <- ggplot(mpg, aes(fl, fill = drv))"
@@ -642,7 +712,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"s + geom_bar(position = position_dodge(width = 1))"
@@ -678,7 +752,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"r + ggtitle(\"Title\") + theme(plot.title.postion = \"plot\")"
@@ -687,7 +765,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"r + theme(panel.background = element_rect(fill = \"blue\"))"
@@ -706,7 +788,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
"outputs": [],
"source": [
"t <- ggplot(mpg, aes(cty, hwy)) + geom_point()"
diff --git a/docs/examples/ggplot2/ggplot2.md b/docs/examples/ggplot2/ggplot2.md
new file mode 100644
index 0000000..b948c2e
--- /dev/null
+++ b/docs/examples/ggplot2/ggplot2.md
@@ -0,0 +1,560 @@
+
+
+# Ggplot2
+
+
![numpy logo](https://rstudio.github.io/cheatsheets/html/images/logo-ggplot2.png)
+
+
+
+Basics
+-----------------
+
+[ggplot2](https://ggplot2.tidyverse.org/) is based on the **grammar of graphics**, the idea that you can build every graph from the same components: a **data** set, a **coordinate system**, and **geoms**—visual marks that represent data points.
+
+
+```R
+library(ggplot2)
+```
+
+To display values, map variables in the data to visual properties of the geom (**aesthetics**) like **size**, **color**, and **x** and **y** locations.
+
+Complete the template below to build a graph.
+```R
+ ggplot(data =
) +
+ (mapping = aes(),
+ stat = ,
+ position = ) +
+ +
+ +
+ +
+
+```
+
+
+Data, a Geom Function, and Aes Mappings are required. Stat, Position, and the Coordinate, Facet, Scale, and Theme functions are not required and will supply sensible defaults.
+
+* `ggplot(data = mpg, aes(x = cty, y = hwy))`: Begins a plot that you finish by adding layers to. Add one geom function per layer.
+
+* `last_plot()`: Returns the last plot.
+
+* `ggsave("plot.png", width = 5, height = 5)`: Saves last plot as 5’ x 5’ file named “plot.png” in working directory. Matches file type to file extension.
+
+
+Aes
+-----------
+
+Common aesthetic values.
+
+* `color` and `fill`: String (`"red"`, `"#RRGGBB"`).
+
+* `linetype`: Integer or string (0 = `"blank"`, 1 = `"solid"`, 2 = `"dashed"`, 3 = `"dotted"`, 4 = `"dotdash"`, 5 = `"longdash"`, 6 = `"twodash"`).
+
+* `size`: Integer (line width in mm for outlines).
+
+* `linewidth`: Integer (line width in mm for lines).
+
+* `shape`: Integer/shape name or a single character (`"a"`).
+
+ * `shape` integer/name pairs: 0 = `"square open"`, 1 = `"circle open"`, 2 = `"triangle open"`, 3 = `"plus"`, 4 = `"cross"`, 5 = `"diamond open"`, 6 = `"triangle down open"`, 7 = `"square cross"`, 8 = `"asterisk"`, 9 = `"diamond plus"`, 10 = `"circle plus"`, 11 = `"star"`, 12 = `"square plus"`, 13 = `"circle cross"`, 14 = `"square triangle"`, 15 = `"square"`, 16 = `"circle"`, 17 = `"triangle"`, 18 = `"diamond"`, 19 = `"circle small"`, 20 = `"bullet"`, 21 = `"circle filled"`, 22 = `"square filled"`, 23 = `"diamond filled"`, 24 = `"triangle filled"`, 25 = `"triangle down filled"`
+
+Geoms
+---------------
+
+Use a geom function to represent data points, use the geom’s aesthetic properties to represent variables. Each function returns a layer.
+
+### Graphical Primitives
+
+
+```R
+a <- ggplot(economics, aes(date, unemploy))
+```
+
+
+```R
+b <- ggplot(seals, aes(x = long, y = lat))
+```
+
+* `a + geom_blank()` and `a + expand_limits()`: Ensure limits include values across all plots.
+
+* `b + geom_curve(aes(yend = lat + 1, xend = long + 1), curvature = 1)`: Draw a curved line from `(x, y)` to `(xend, yend)`. `aes()` arguments: `x`, `xend`, `y`, `yend`, `alpha`, `angle`, `color`, `curvature`, `linetype`, `size`.
+
+* `a + geom_path(lineend = "butt", linejoin = "round", linemitre = 1)`: Connect observations in the order they appear. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `size`.
+
+* `a + geom_polygon(aes(alpha = 50))`: Connect points into polygons. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `subgroup`, `linetype`, `size`.
+
+* `b + geom_rect(aes(xmin = long, ymin = lat, xmax = long + 1, ymax = lat + 1))`: Draw a rectangle by connecting four corners (`xmin`, `xmax`, `ymin`, `ymax`). `aes()` arguments: `xmax`, `xmin`, `ymax`, `ymin`, `alpha`, `color`, `fill`, `linetype`, `size`.
+
+* `a + geom_ribbon(aes(ymin = unemploy - 900, ymax = unemploy + 900)`: For each `x`, plot an interval from `ymin` to `ymax`. `aes()` arguments: `x`, `ymax`, `ymin`, `alpha`, `color`, `fill`, `group`, `linetype`, `size`.
+
+
+#### Line Segments
+
+Common aesthetics: `x`, `y`, `alpha`, `color`, `linetype`, `size`, `linewidth`.
+
+* `b + geom_abline(aes(intercept = 0, slope = 1))`: Draw a diagonal reference line with a given `slope` and `intercept`.
+
+* `b + geom_hline(aes(yintercept = lat))`: Draw a horizontal reference line with a given `yintercept`.
+
+* `b + geom_vline(aes(xintercept = long))`: Draw a vertical reference line with a given `xintercept`.
+
+* `b + geom_segment(aes(yend = lat + 1, xend = long + 1))`: Draw a straight line from `(x, y)` to `(xend, yend)`.
+
+* `b + geom_spoke(aes(angle = 1:1155, radius = 1))`: Draw line segments using polar coordinates (`angle` and `radius`).
+
+
+### One Variable - Continuous
+
+
+```R
+c <- ggplot(mpg, aes(hwy))
+c2 <- ggplot(mpg)
+```
+
+* `c + geom_area(stat = "bin")`: Draw an area plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `linewidth`.
+
+* `c + geom_density(kernel = "gaussian")`: Compute and draw kernel density estimates. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`, `weight`.
+
+* `c + geom_dotplot()`: Draw a dot plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`.
+
+* `c + geom_freqpoly()`: Draw a frequency polygon. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `linewidth`.
+
+* `c + geom_histogram(binwidth = 5)`: Draw a histogram. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `linewidth`, `weight`.
+
+* `c2 + geom_qq(aes(sample = hwy))`: Draw a quantile-quantile plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `size`, `weight`.
+
+
+### One Variable - Discrete
+
+
+```R
+d <- ggplot(mpg, aes(fl))
+```
+
+* `d + geom_bar()`: Draw a bar chart. `aes()` arguments: `x`, `alpha`, `color`, `fill`, `linetype`, `linewidth`, `weight`.
+
+### Two Variables - Both Continuous
+
+
+```R
+e <- ggplot(mpg, aes(cty, hwy))
+```
+
+* `e + geom_label(aes(label = cty), nudge_x = 1, nudge_y = 1)`: Add text with a rectangle background. `aes()` arguments: - `x`, `y`, `label`, `alpha`, `angle`, `color`, `family`, `fontface`, `hjust`, `lineheight`, `size`, `vjust`.
+
+* `e + geom_point()`: Draw a scatter plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `shape`, `size`, `stroke`.
+
+* `e + geom_quantile()`: Fit and draw quantile regression for the plot data. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `linewidth`, `weight`.
+
+* `e + geom_rug(sides = "bl")`: Draw a rug plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `linetype`, `linewidth`.
+
+* `e + geom_smooth(method = lm)`: Plot smoothed conditional means. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`, `weight`.
+
+* `e + geom_text(aes(label = cty), nudge_x = 1, nudge_y = 1)`: Add text to a plot. `aes()` arguments: `x`, `y`, `label`, `alpha`, `angle`, `color`, `family`, `fontface`, `hjust`, `lineheight`, `size`, `vjust`.
+
+
+### Two Variables - One Discrete, One Continuous
+
+
+```R
+f <- ggplot(mpg, aes(class, hwy))
+```
+
+* `f + geom_col()`: Draw a bar plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`.
+
+* `f + geom_boxplot()`: Draw a box plot. `aes()` arguments: `x`, `y`, `lower`, `middle`, `upper`, `ymax`, `ymin`, `alpha`, `color`, `fill`, `group`, `linetype`, `shape`, `linewidth`, `weight`.
+
+* `f + geom_dotplot(binaxis ="y", stackdir = "center")`: Draw a dot plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`.
+
+* `f + geom_violin(scale = "area")`: Draw a violin plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`, `weight`.
+
+
+### Two Variables - Both Discrete
+
+
+```R
+g <- ggplot(diamonds, aes(cut, color))
+```
+
+* `g + geom_count()`: Plot a count of points in an area to address over plotting. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `shape`, `size`, `stroke`.
+
+* `e + geom_jitter(height = 2, width = 2)`: Jitter points in a plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `shape`, `size`.
+
+
+### Two Variables - Continuous Bivariate Distribution
+
+
+```R
+h <- ggplot(diamonds, aes(carat, price))
+```
+
+* `h + geom_bin2d(binwidth = c(0.25, 500))`: Draw a heatmap of 2D rectangular bin counts. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `size`, `weight`.
+
+* `h + geom_density_2d()`: Plot contours from 2D kernel density estimation. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `linewidth`.
+
+* `h + geom_hex()`: Draw a heatmap of 2D hexagonal bin counts. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linewidth`.
+
+
+### Two Variables - Continuous Function
+
+
+```R
+i <- ggplot(economics, aes(date, unemploy))
+```
+
+* `i + geom_area()`: Draw an area plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `linewidth`.
+
+* `i + geom_line()`: Connect data points, ordered by the x axis variable. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `linewidth`.
+
+* `i + geom_step(direction = "hv"`: Draw a stairstep plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `group`, `linetype`, `linewidth`.
+
+
+### Two Variables - Visualizing Error
+
+
+```R
+df <- data.frame(grp = c("A", "B"), fit = 4:5, se = 1:2)
+j <- ggplot(df, aes(grp, fit, ymin = fit - se, ymax = fit + se))
+```
+
+* `j + geom_crossbar(fatten = 2)`: Draw a crossbar. `aes()` arguments: `x`, `y`, `ymax`, `ymin`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`.
+
+* `j + geom_errorbar()`: Draw an errorbar. Also `geom_errorbarh()`. `aes()` arguments: `x`, `ymax`, `ymin`, `alpha`, `color`, `group`, `linetype`, `linewidth`, `width`.
+
+* `j + geom_linerange()`: Draw a line range. `aes()` arguments: `x`, `ymin`, `ymax`, `alpha`, `color`, `group`, `linetype`, `linewidth`.
+
+* `j + geom_pointrange()`: Draw a point range. `aes()` arguments: `x`, `y`, `ymin`, `ymax`, `alpha`, `color`, `fill`, `group`, `linetype`, `shape`, `linewidth`.
+
+
+### Two Variables - Maps
+
+
+```R
+murder_data <- data.frame(
+ murder = USArrests$Murder,
+ state = tolower(rownames(USArrests))
+)
+map <- map_data("state")
+k <- ggplot(murder_data, aes(fill = murder))
+```
+
+* `k + geom_map(aes(map_id = state), map = map) + expand_limits(x = map$long, y = map$lat)`: Draw polygons as a map. `aes()` arguments: `map_id`, `alpha`, `color`, `fill`, `linetype`, `linewidth`.
+
+### Three Variables[](#three-variables)
+
+
+```R
+seals$z <- with(seals, sqrt(delta_long^2 + delta_lat^2))
+l <- ggplot(seals, aes(long, lat))
+```
+
+* `l + geom_contour(aes(z = z))`: Draw 2D contour plot. `aes()` arguments: `x`, `y`, `z`, `alpha`, `color`, `group`, `linetype`, `linewidth`, `weight`.
+
+* `l + geom_contour_filled(aes(fill = z))`: Draw 2D contour plot with the space between lines filled. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `group`, `linetype`, `linewidth`, `subgroup`.
+
+* `l + geom_raster(aes(fill = z), hjust = 0.5, vjust = 0.5, interpolate = FALSE)`: Draw a raster plot. `aes()` arguments: `x`, `y`, `alpha`, `fill`.
+
+* `l + geom_tile(aes(fill = z))`: Draw a tile plot. `aes()` arguments: `x`, `y`, `alpha`, `color`, `fill`, `linetype`, `linewidth`, `width`.
+
+
+Stats
+---------------
+
+An alternative way to build a layer.
+
+A stat builds new variables to plot (e.g., count, prop).
+
+Visualize a stat by changing the default stat of a geom function, `geom_bar(stat = "count")`, or by using a stat function, `stat_count(geom = "bar")`, which calls a default geom to make a layer (equivalent to a geom function). Use `after_stat(name)` syntax to map the stat variable `name` to an aesthetic.
+
+
+```R
+i + stat_density_2d(aes(fill = after_stat(level)), geom = "polygon")
+```
+
+In this example, `"polygon"` is the geom to use, `stat_density_2d()` is the stat function, `aes()` contains the geom mappings, and `level` is the variable created by stat.
+
+* `c + stat_bin(binwidth = 1, boundary = 10)`: `x`, `y` | `count`, `ncount`, `density`, `ndensity`
+
+* `c + stat_count(width = 1)`: `x`, `y` | `count`, `density`
+
+* `c + stat_density(adjust = 1, kernel = "gaussian")`: `x`, `y` | `count`, `density`, `scaled`
+
+* `e + stat_bin_2d(bins = 30, drop = T)`: `x`, `y`, `fill` | `count`, `density`
+
+* `e + stat_bin_hex(bins =30)`: `x`, `y`, `fill` | `count`, `density`
+
+* `e + stat_density_2d(contour = TRUE, n = 100)`: `x`, `y`, `color`, `linewidth` | `level`
+
+* `e + stat_ellipse(level = 0.95, segments = 51, type = "t")`
+
+* `l + stat_contour(aes(z = z))`: `x`, `y`, `z`, `order` | `level`
+
+* `l + stat_summary_hex(aes(z = z), bins = 30, fun = max)`: `x`, `y`, `z`, `fill` | `value`
+
+* `l + stat_summary_2d(aes(z = z), bins = 30, fun = mean)`: `x`, `y`, `z`, `fill` | `value`
+
+* `f + stat_boxplot(coef = 1.5)`: `x`, `y` | `lower`, `middle`, `upper`, `width`, `ymin`, `ymax`
+
+* `f + stat_ydensity(kernel = "gaussian", scale = "area")`: `x`, `y` | `density`, `scaled`, `count`, `n`, `violinwidth`, `width`
+
+* `e + stat_ecdf(n = 40)`: `x`, `y` | `x`, `y`
+
+* `e + stat_quantile(quantiles = c(0.1, 0.9), formula = y ~ log(x), method = "rq")`: `x`, `y` | `quantile`
+
+* `e + stat_smooth(method = "lm", formula = y ~ x, se = T, level = 0.95)`: `x`, `y` | `se`, `x`, `y`, `ymin`, `ymax`
+
+* `ggplot() + xlim(-5, 5) + stat_function(fun = dnorm, n = 20, geom = "point")`: `x` | `x`, `y`
+
+* `ggplot() + stat_qq(aes(sample = 1:100))`: `x`, `y`, `sample` | `sample`, `theoretical`
+
+* `e + stat_sum()`: `x`, `y`, `size` | `n`, `prop`
+
+* `e + stat_summary(fun.data = "mean_cl_boot")`
+
+* `h + stat_summary_bin(fun = "mean", geom = "bar")`
+
+* `e + stat_identity()`
+
+* `e + stat_unique()`
+
+
+Scales
+-----------------
+
+Override defaults with **scales** package.
+
+**Scales** map data values to the visual values of an aesthetic. To change a mapping, add a new scale.
+
+
+```R
+n <- d + geom_bar(aes(fill = fl))
+
+n + scale_fill_manual(
+ value = c(),
+ limits = c(),
+ breaks = c(),
+ name = "fuel",
+ labels = c("D", "E", "P", "R")
+)
+```
+
+In this example, `scale_` specifies a scale function, `fill` is the aesthetic to adjust, and `manual` is the prepackaged scale to use.
+
+`values` contains scale-specific arguments, `limits` specifies the range of values to include in mappings, `breaks` specifies the breaks to use in legend/axis, and `name` and `labels` specify the title and labels to use in the legend/axis.
+
+### General Purpose Scales
+
+Use with most aesthetics.
+
+* `scale_*_continuous()`: Map continuous values to visual ones.
+
+* `scale_*_discrete()`: Map discrete values to visual ones.
+
+* `scale_*_binned()`: Map continuous values to discrete bins.
+
+* `scale_*_identity()`: Use data values as visual ones.
+
+* `scale_*_manual(values = c())`: Map discrete values to manually chosen visual ones.
+
+* `scale_*_date(date_labels = "%m/%d", date_breaks = "2 weeks")`: Treat data values as dates.
+
+* `scale_*_datetime()`: Treat data values as date times. Same as `scale_*_date()`. See `?strptime` for label formats.
+
+
+### X & Y Location Scales
+
+Use with x or y aesthetics (x shown here).
+
+* `scale_x_log10()`: Plot `x` on log10 scale.
+
+* `scale_x_reverse()`: Reverse the direction of the x axis.
+
+* `scale_x_sqrt()`: Plot `x` on square root scale.
+
+
+### Color and Fill Scales (Discrete)
+
+* `n + scale_fill_brewer(palette = "Blues")`: Use color scales from ColorBrewer. For palette choices `RColorBrewer::display.brewer.all()`.
+
+* `n + scale_fill_grey(start = 0.2, end = 0.8, na.value = "red")`: Use a grey gradient color scale.
+
+
+### Color and Fill Scales (Continuous)
+
+
+```R
+o <- c + geom_dotplot(aes(fill = ..x..))
+```
+
+* `o + scale_fill_distiller(palette = "Blues")`: Interpolate a palette into a continuous scale.
+
+* `o + scale_fill_gradient(low = "red", high = "yellow")`: Create a two color gradient.
+
+* `o + scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 25)`: Create a diverging color gradient.
+
+* `o + scale_fill_gradientn(colors = topo.colors(6))`: Create a n-color gradient. Also `rainbow()`, `heat.colors()`, `terrain.colors()`, `cm.colors()`, `RColorBrewer::brewer.pal()`.
+
+
+### Shape and Size Scales
+
+
+```R
+p <- e + geom_point(aes(shape = fl, size = cyl))
+```
+
+* `p + scale_shape() + scale_size()`: Map discrete values to shape and size aesthetics.
+
+* `p + scale_shape_manual(values = c(3:7))`: Map discrete values to specified shape values.
+
+* `p + scale_radius(range = c(1,6))`: Map values to a shape’s radius.
+
+* `p + scale_size_area(max_size = 6)`: Like `scale_size()` but maps zero values to zero size.
+
+
+Shapes used here are the same as the ones listed in the Aes section.
+
+Coordinate Systems
+-----------------------------------------
+
+
+```R
+u <- d + geom_bar()
+```
+
+* `u + coord_cartesian(xlim = c(0, 5))`: `xlim`, `ylim`. The default Cartesian coordinate system.
+
+* `u + coord_fixed(ratio = 1/2)`: `ratio`, `xlim`, `ylim`. Cartesian coordinates with fixed aspect ration between x and y units.
+
+* `ggplot(mpg, aes(y = fl)) + geom_bar()`: Flip Cartesian coordinates by switching x and y aesthetic mappings.
+
+* `u + coord_polar(theta = "x", direction = 1)`: `theta`, `start`, `direction`. Polar coordinates.
+
+* `u + coord_trans(y = "sqrt")`: `x`, `y`, `xlim`, `ylim`. Transformed Cartesian coordinates. Set `xtrans` and `ytrans` to the name of a window function.
+
+* `π + coord_quickmap(); π + coord_map(projection = "ortho", orientation = c(41, -74, 0))`: `projection`, `xlim`, `ylim`. Map projections from the **mapproj** packages (`mercator` (default), `azequalarea`, `lagrange`, etc.).
+
+
+Position Adjustments
+---------------------------------------------
+
+Position adjustments determine how to arrange geoms that would otherwise occupy the same space.
+
+
+```R
+s <- ggplot(mpg, aes(fl, fill = drv))
+```
+
+* `s + geom_bar(position = "dodge")`: Arrange elements side by side.
+
+* `s + geom_bar(position = "fill")`: Stack elements on top of one another, normalize height.
+
+* `e + geom_point(position = "jitter")`: Add random noise to X and Y position of each element to avoid over plotting.
+
+* `e + geom_label(position = "nudge")`: Nudge labels away from points.
+
+* `s + geom_bar(position = "stack")`: Stack elements on top of one another.
+
+
+Each position adjustment can be recast as a function with manual `width` and `height` arguments:
+
+
+```R
+s + geom_bar(position = position_dodge(width = 1))
+```
+
+Themes
+-----------------
+
+* `u + theme_bw()`: White background with grid lines.
+
+* `u + theme_gray()`: Grey background with white grid lines (default theme).
+
+* `u + theme_dark()`: Dark grey background and grid lines for contrast.
+
+* `u + theme_classic()`: No grid lines.
+
+* `u + theme_light()`: Light grey axes and grid lines.
+
+* `u + theme_linedraw()`: Uses only black lines.
+
+* `u + theme_minimal()`: Minimal theme.
+
+* `u + theme_void()`: Empty theme.
+
+* `u + theme()`: Customize aspects of the theme such as axis, legend, panel, and facet properties.
+
+
+
+```R
+r + ggtitle("Title") + theme(plot.title.postion = "plot")
+```
+
+
+```R
+r + theme(panel.background = element_rect(fill = "blue"))
+```
+
+Faceting
+---------------------
+
+Facets divide a plot into subplots based on the values of one or more discrete variables.
+
+
+```R
+t <- ggplot(mpg, aes(cty, hwy)) + geom_point()
+```
+
+* `t + facet_grid(. ~ fl)`: Facet into a column based on fl.
+
+* `t + facet_grid(year ~ .)`: Facet into rows based on year.
+
+* `t + facet_grid(year ~ fl)`: Facet into both rows and columns.
+
+* `t + facet_wrap(~ fl)`: Wrap facets into a rectangular layout.
+
+* `t + facet_grid(drv ~ fl, scales = "free")`: Set **scales** to let axis limits vary across facets. Also `"free_x"` for x axis limits adjust to individual facets and `"free_y"` for y axis limits adjust to individual facets.
+
+
+Set **labeller** to adjust facet label:
+
+* `t + facet_grid(. ~ fl, labeller = label_both)`: Labels each facet as “fl: c”, “fl: d”, etc.
+
+* `t + facet_grid(fl ~ ., labeller = label_bquote(alpha ^ .(fl)))`: Labels each facet as “𝛼c”, “𝛼d”, etc.
+
+
+Labels and Legends
+-----------------------------------------
+
+Use `labs()` to label elements of your plot.
+
+```R
+t + labs(x = "New x axis label",
+ y = "New y axis label",
+ title ="Add a title above the plot",
+ subtitle = "Add a subtitle below title",
+ caption = "Add a caption below plot",
+ alt = "Add alt text to the plot",
+ = "New legend title")
+```
+
+* `t + annotate(geom = "text", x = 8, y = 9, label = "A")`: Places a geom with manually selected aesthetics.
+
+* `p + guides(x = guide_axis(n.dodge = 2))`: Avoid crowded or overlapping labels with `guide_axis(n.dodge or angle)`.
+
+* `n + guides(fill = "none")`: Set legend type for each aesthetic: `colorbar`, `legend`, or `none` (no legend).
+
+* `n + theme(legend.position = "bottom")`: Place legend at “bottom”, “top”, “left”, or “right”.
+
+* `n + scale_fill_discrete(name = "Title", labels = c("A", "B", "C", "D", "E"))`: Set legend title and labels with a scale function.
+
+
+Zooming
+-------------------
+
+* `t + coord_cartesian(xlim = c(0, 100), ylim = c(10,20))`: Zoom without clipping (preferred).
+
+* `t + xlim(0, 100) + ylim(10, 20)` or `t + scale_x_continuous(limits = c(0, 100)) + scale_y_continuous(limits = c(0, 100))`: Zoom with clipping (removes unseen data points).
+
diff --git a/docs/examples/matplotlib/matplotlib.ipynb b/docs/examples/matplotlib/matplotlib.ipynb
index 1e30d04..aee053b 100644
--- a/docs/examples/matplotlib/matplotlib.ipynb
+++ b/docs/examples/matplotlib/matplotlib.ipynb
@@ -6,12 +6,6 @@
"metadata": {},
"source": [
"\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/matplotlib/matplotlib.md b/docs/examples/matplotlib/matplotlib.md
new file mode 100644
index 0000000..9826f30
--- /dev/null
+++ b/docs/examples/matplotlib/matplotlib.md
@@ -0,0 +1,618 @@
+
+
+# Matplotlib
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/matplotlib/matplotlib.png)
+
+[Matplotlib](https://matplotlib.org/) is a Python 2D plotting library which produces publication-quality
+figures in a variety of hardcopy formats and interactive environments across platforms.
+
+## Install and import Matplotlib
+
+`
+$ pip install matplotlib
+`
+
+
+```python
+# Import matplotlib convention
+import matplotlib.pyplot as plt
+```
+
+## Anatomy of a figure
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/matplotlib/mlp_01.png)
+
+In Matplotlib, a figure refers to the overall canvas or window that contains one or more individual plots or subplots.
+ Understanding the anatomy of a Matplotlib figure is crucial for creating and customizing your visualizations effectively.
+
+
+```python
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Prepare Data
+x = np.linspace(0, 2*np.pi, 100)
+y = np.sin(x)
+
+# Create Plot
+fig, ax = plt.subplots()
+
+# Plot Data
+ax.plot(x, y)
+
+# Customize Plot
+ax.set_xlabel('X-axis')
+ax.set_ylabel('Y-axis')
+ax.set_title('Sine Function')
+ax.grid(True)
+
+# Save Plot
+plt.savefig('sine_plot.png')
+
+# Show Plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_5_0.png)
+
+
+
+## Basic Plots
+
+
+```python
+# Create a scatter plot
+X = np.random.uniform(0, 1, 100)
+Y = np.random.uniform(0, 1, 100)
+plt.scatter(X, Y)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_7_0.png)
+
+
+
+
+```python
+# Create a bar plot
+X = np.arange(10)
+Y = np.random.uniform(1, 10, 10)
+plt.bar(X, Y)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_8_0.png)
+
+
+
+
+```python
+# Create an image plot using imshow
+Z = np.random.uniform(0, 1, (8, 8))
+plt.imshow(Z)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_9_0.png)
+
+
+
+
+```python
+# Create a contour plot
+Z = np.random.uniform(0, 1, (8, 8))
+plt.contourf(Z)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_10_0.png)
+
+
+
+
+```python
+# Create a pie chart
+Z = np.random.uniform(0, 1, 4)
+plt.pie(Z)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_11_0.png)
+
+
+
+
+```python
+# Create a histogram
+Z = np.random.normal(0, 1, 100)
+plt.hist(Z)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_12_0.png)
+
+
+
+
+```python
+# Create an error bar plot
+X = np.arange(5)
+Y = np.random.uniform(0, 1, 5)
+plt.errorbar(X, Y, Y / 4)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_13_0.png)
+
+
+
+
+```python
+# Create a box plot
+Z = np.random.normal(0, 1, (100, 3))
+plt.boxplot(Z)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_14_0.png)
+
+
+
+## Tweak
+
+
+```python
+# Create a plot with a black solid line
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y, color="black")
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_16_0.png)
+
+
+
+
+```python
+# Create a plot with a dashed line
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y, linestyle="--")
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_17_0.png)
+
+
+
+
+```python
+# Create a plot with a thicker line
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y, linewidth=5)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_18_0.png)
+
+
+
+
+```python
+# Create a plot with markers
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y, marker="o")
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_19_0.png)
+
+
+
+## Organize
+
+
+```python
+# Create a plot with two lines on the same axes
+X = np.linspace(0, 10, 100)
+Y1, Y2 = np.sin(X), np.cos(X)
+plt.plot(X, Y1, X, Y2)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_21_0.png)
+
+
+
+
+```python
+# Create a figure with two subplots (vertically stacked)
+X = np.linspace(0, 10, 100)
+Y1, Y2 = np.sin(X), np.cos(X)
+fig, (ax1, ax2) = plt.subplots(2, 1)
+ax1.plot(X, Y1, color="C1")
+ax2.plot(X, Y2, color="C0")
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_22_0.png)
+
+
+
+
+```python
+# Create a figure with two subplots (horizontally aligned)
+X = np.linspace(0, 10, 100)
+Y1, Y2 = np.sin(X), np.cos(X)
+fig, (ax1, ax2) = plt.subplots(1, 2)
+ax1.plot(Y1, X, color="C1")
+ax2.plot(Y2, X, color="C0")
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_23_0.png)
+
+
+
+## Label
+
+
+```python
+# Create data and plot a sine wave
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_25_0.png)
+
+
+
+
+```python
+# Modify plot properties
+X = np.linspace(0, 10, 100)
+Y = np.sin(X)
+plt.plot(X, Y)
+plt.title("A Sine wave")
+plt.xlabel("Time")
+plt.ylabel(None)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_26_0.png)
+
+
+
+## Figure, axes & spines
+
+
+```python
+# Create a 3x3 grid of subplots
+fig, axs = plt.subplots(3, 3)
+
+# Set face colors for specific subplots
+axs[0, 0].set_facecolor("#ddddff")
+axs[2, 2].set_facecolor("#ffffdd")
+```
+
+
+
+![png](matplotlib_files/matplotlib_28_0.png)
+
+
+
+
+```python
+# Create a 3x3 grid of subplots
+fig, axs = plt.subplots(3, 3)
+
+# Add a grid specification and set face color for a specific subplot
+gs = fig.add_gridspec(3, 3)
+ax = fig.add_subplot(gs[0, :])
+ax.set_facecolor("#ddddff")
+```
+
+
+
+![png](matplotlib_files/matplotlib_29_0.png)
+
+
+
+
+```python
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+# Remove top and right spines from the subplot
+ax.spines["top"].set_color("None")
+ax.spines["right"].set_color("None")
+```
+
+
+
+![png](matplotlib_files/matplotlib_30_0.png)
+
+
+
+## Ticks & labels
+
+
+
+```python
+# Import the necessary libraries
+from matplotlib.ticker import MultipleLocator as ML
+from matplotlib.ticker import ScalarFormatter as SF
+
+
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+# Set minor tick locations and formatter for the x-axis
+ax.xaxis.set_minor_locator(ML(0.2))
+ax.xaxis.set_minor_formatter(SF())
+
+# Rotate minor tick labels on the x-axis
+ax.tick_params(axis='x', which='minor', rotation=90)
+```
+
+
+
+![png](matplotlib_files/matplotlib_32_0.png)
+
+
+
+## Lines & markers
+
+
+
+```python
+# Generate data and create a plot
+X = np.linspace(0.1, 10 * np.pi, 1000)
+Y = np.sin(X)
+plt.plot(X, Y, "C1o:", markevery=25, mec="1.0")
+
+# Display the plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_34_0.png)
+
+
+
+## Scales & projections
+
+
+
+```python
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+# Set x-axis scale to logarithmic
+ax.set_xscale("log")
+
+# Plot data with specified formatting
+ax.plot(X, Y, "C1o-", markevery=25, mec="1.0")
+
+# Display the plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_36_0.png)
+
+
+
+## Text & ornaments
+
+
+
+```python
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+# Fill the area between horizontal lines with a curve
+ax.fill_betweenx([-1, 1], [0], [2*np.pi])
+
+# Add a text annotation to the plot
+ax.text(0, -1, r" Period $\Phi$")
+
+# Display the plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_38_0.png)
+
+
+
+## Legend
+
+
+```python
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+# Plot sine and cosine curves with specified colors and labels
+ax.plot(X, np.sin(X), "C0", label="Sine")
+ax.plot(X, np.cos(X), "C1", label="Cosine")
+
+# Add a legend with customized positioning and formatting
+ax.legend(bbox_to_anchor=(0, 1, 1, 0.1), ncol=2, mode="expand", loc="lower left")
+
+# Display the plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_40_0.png)
+
+
+
+## Annotation
+
+
+```python
+# Create a figure with a single subplot
+fig, ax = plt.subplots()
+
+ax.plot(X, Y, "C1o:", markevery=25, mec="1.0")
+
+# Add an annotation "A" with an arrow
+ax.annotate("A", (X[250], Y[250]), (X[250], -1),
+ ha="center", va="center",
+ arrowprops={"arrowstyle": "->", "color": "C1"})
+
+# Display the plot
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_42_0.png)
+
+
+
+## Colors
+
+
+```python
+import math
+
+from matplotlib.patches import Rectangle
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+
+
+def plot_colortable(colors, *, ncols=4, sort_colors=True):
+
+ cell_width = 212
+ cell_height = 22
+ swatch_width = 48
+ margin = 12
+
+ # Sort colors by hue, saturation, value and name.
+ if sort_colors is True:
+ names = sorted(
+ colors, key=lambda c: tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(c))))
+ else:
+ names = list(colors)
+
+ n = len(names)
+ nrows = math.ceil(n / ncols)
+
+ width = cell_width * 4 + 2 * margin
+ height = cell_height * nrows + 2 * margin
+ dpi = 72
+
+ fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
+ fig.subplots_adjust(margin/width, margin/height,
+ (width-margin)/width, (height-margin)/height)
+ ax.set_xlim(0, cell_width * 4)
+ ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
+ ax.yaxis.set_visible(False)
+ ax.xaxis.set_visible(False)
+ ax.set_axis_off()
+
+ for i, name in enumerate(names):
+ row = i % nrows
+ col = i // nrows
+ y = row * cell_height
+
+ swatch_start_x = cell_width * col
+ text_pos_x = cell_width * col + swatch_width + 7
+
+ ax.text(text_pos_x, y, name, fontsize=14,
+ horizontalalignment='left',
+ verticalalignment='center')
+
+ ax.add_patch(
+ Rectangle(xy=(swatch_start_x, y-9), width=swatch_width,
+ height=18, facecolor=colors[name], edgecolor='0.7')
+ )
+
+ return fig
+```
+
+
+```python
+# CSS Colors
+plot_colortable(mcolors.CSS4_COLORS)
+plt.show()
+```
+
+
+
+![png](matplotlib_files/matplotlib_45_0.png)
+
+
+
+
+```python
+# Get a list of named colors
+named_colors = plt.colormaps()
+print("Colors:",named_colors)
+```
+
+ Colors: ['magma', 'inferno', 'plasma', 'viridis', 'cividis', 'twilight', 'twilight_shifted', 'turbo', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'CMRmap', 'GnBu', 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr', 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn', 'Reds', 'Spectral', 'Wistia', 'YlGn', 'YlGnBu', 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone', 'brg', 'bwr', 'cool', 'coolwarm', 'copper', 'cubehelix', 'flag', 'gist_earth', 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow', 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray', 'hot', 'hsv', 'jet', 'nipy_spectral', 'ocean', 'pink', 'prism', 'rainbow', 'seismic', 'spring', 'summer', 'terrain', 'winter', 'Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3', 'tab10', 'tab20', 'tab20b', 'tab20c', 'magma_r', 'inferno_r', 'plasma_r', 'viridis_r', 'cividis_r', 'twilight_r', 'twilight_shifted_r', 'turbo_r', 'Blues_r', 'BrBG_r', 'BuGn_r', 'BuPu_r', 'CMRmap_r', 'GnBu_r', 'Greens_r', 'Greys_r', 'OrRd_r', 'Oranges_r', 'PRGn_r', 'PiYG_r', 'PuBu_r', 'PuBuGn_r', 'PuOr_r', 'PuRd_r', 'Purples_r', 'RdBu_r', 'RdGy_r', 'RdPu_r', 'RdYlBu_r', 'RdYlGn_r', 'Reds_r', 'Spectral_r', 'Wistia_r', 'YlGn_r', 'YlGnBu_r', 'YlOrBr_r', 'YlOrRd_r', 'afmhot_r', 'autumn_r', 'binary_r', 'bone_r', 'brg_r', 'bwr_r', 'cool_r', 'coolwarm_r', 'copper_r', 'cubehelix_r', 'flag_r', 'gist_earth_r', 'gist_gray_r', 'gist_heat_r', 'gist_ncar_r', 'gist_rainbow_r', 'gist_stern_r', 'gist_yarg_r', 'gnuplot_r', 'gnuplot2_r', 'gray_r', 'hot_r', 'hsv_r', 'jet_r', 'nipy_spectral_r', 'ocean_r', 'pink_r', 'prism_r', 'rainbow_r', 'seismic_r', 'spring_r', 'summer_r', 'terrain_r', 'winter_r', 'Accent_r', 'Dark2_r', 'Paired_r', 'Pastel1_r', 'Pastel2_r', 'Set1_r', 'Set2_r', 'Set3_r', 'tab10_r', 'tab20_r', 'tab20b_r', 'tab20c_r']
+
+
+## Save
+
+```python
+# Save the figure as a PNG file with higher resolution (300 dpi)
+fig.savefig("my-first-figure.png", dpi=300)
+
+# Save the figure as a PDF file
+fig.savefig("my-first-figure.pdf")
+```
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_10_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_10_0.png
new file mode 100644
index 0000000..58410b2
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_10_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_11_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_11_0.png
new file mode 100644
index 0000000..ace6aec
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_11_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_12_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_12_0.png
new file mode 100644
index 0000000..a7061bc
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_12_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_13_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_13_0.png
new file mode 100644
index 0000000..31061cf
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_13_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_14_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_14_0.png
new file mode 100644
index 0000000..64507d4
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_14_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_16_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_16_0.png
new file mode 100644
index 0000000..39a7fd3
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_16_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_17_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_17_0.png
new file mode 100644
index 0000000..5297425
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_17_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_18_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_18_0.png
new file mode 100644
index 0000000..409760f
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_18_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_19_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_19_0.png
new file mode 100644
index 0000000..ad8bd64
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_19_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_21_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_21_0.png
new file mode 100644
index 0000000..6ee3393
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_21_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_22_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_22_0.png
new file mode 100644
index 0000000..ad1d3e0
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_22_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_23_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_23_0.png
new file mode 100644
index 0000000..b5532ad
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_23_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_25_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_25_0.png
new file mode 100644
index 0000000..fa2b5b2
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_25_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_26_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_26_0.png
new file mode 100644
index 0000000..7443abc
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_26_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_28_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_28_0.png
new file mode 100644
index 0000000..1475438
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_28_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_29_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_29_0.png
new file mode 100644
index 0000000..eadc619
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_29_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_30_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_30_0.png
new file mode 100644
index 0000000..41f62e0
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_30_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_32_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_32_0.png
new file mode 100644
index 0000000..63ba5f2
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_32_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_34_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_34_0.png
new file mode 100644
index 0000000..bf2b678
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_34_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_36_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_36_0.png
new file mode 100644
index 0000000..c68c18c
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_36_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_38_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_38_0.png
new file mode 100644
index 0000000..4f30d23
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_38_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_40_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_40_0.png
new file mode 100644
index 0000000..940a37a
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_40_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_42_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_42_0.png
new file mode 100644
index 0000000..ae5b481
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_42_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_45_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_45_0.png
new file mode 100644
index 0000000..e6fba48
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_45_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_5_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_5_0.png
new file mode 100644
index 0000000..957282d
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_5_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_7_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_7_0.png
new file mode 100644
index 0000000..9139dab
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_7_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_8_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_8_0.png
new file mode 100644
index 0000000..7ff224e
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_8_0.png differ
diff --git a/docs/examples/matplotlib/matplotlib_files/matplotlib_9_0.png b/docs/examples/matplotlib/matplotlib_files/matplotlib_9_0.png
new file mode 100644
index 0000000..123f6ad
Binary files /dev/null and b/docs/examples/matplotlib/matplotlib_files/matplotlib_9_0.png differ
diff --git a/docs/examples/numpy/numpy.ipynb b/docs/examples/numpy/numpy.ipynb
index 210767a..57e1552 100644
--- a/docs/examples/numpy/numpy.ipynb
+++ b/docs/examples/numpy/numpy.ipynb
@@ -6,12 +6,6 @@
"metadata": {},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/numpy/numpy.md b/docs/examples/numpy/numpy.md
new file mode 100644
index 0000000..885a895
--- /dev/null
+++ b/docs/examples/numpy/numpy.md
@@ -0,0 +1,927 @@
+
+
+# NumPy
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/numpy/numpy2.png)
+
+[NumPy](https://numpy.org/) is the core library for scientific computing in
+Python. It provides a high-performance multidimensional array
+object, and tools for working with these arrays.
+
+## Install and import NumPy
+
+`
+$ pip install numpy
+`
+
+
+```python
+# Import NumPy convention
+import numpy as np
+```
+
+## NumPy Arrays
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/numpy/np_01.png)
+
+
+
+```python
+# Create a 1D array
+a = np.array([1, 2, 3])
+
+# Create a 2D array with specified dtype
+b = np.array([
+ (1.5, 2, 3),
+ (4, 5, 6)
+], dtype=float)
+
+# Create a 3D array with specified dtype
+c = np.array([
+ [(1.5, 2, 3), (4, 5, 6)],
+ [(3, 2, 1), (4, 5, 6)]
+], dtype=float)
+
+print("Array a:")
+print(a)
+
+print("\nArray b:")
+print(b)
+
+print("\nArray c:")
+print(c)
+```
+
+ Array a:
+ [1 2 3]
+
+ Array b:
+ [[1.5 2. 3. ]
+ [4. 5. 6. ]]
+
+ Array c:
+ [[[1.5 2. 3. ]
+ [4. 5. 6. ]]
+
+ [[3. 2. 1. ]
+ [4. 5. 6. ]]]
+
+
+## Initial Placeholders
+
+
+
+```python
+# Create an array of zeros
+zeros_arr = np.zeros((3, 4))
+
+# Create an array of ones
+ones_arr = np.ones((2, 3, 4))
+
+# Create an array of evenly spaced values (step value)
+d = np.arange(10, 25, 5)
+
+# Create an array of evenly spaced values (number of samples)
+e = np.linspace(0, 2, 9)
+
+# Create a constant array
+f = np.full((2, 2), 7)
+
+# Create a 2x2 identity matrix
+g = np.eye(2)
+
+# Create an array with random values
+random_arr = np.random.random((2, 2))
+
+# Create an empty array
+empty_arr = np.empty((3, 2))
+
+print("zeros_arr:")
+print(zeros_arr)
+
+print("\nones_arr:")
+print(ones_arr)
+
+print("\nd:")
+print(d)
+
+print("\ne:")
+print(e)
+
+print("\nf:")
+print(f)
+
+print("\ng:")
+print(g)
+
+print("\nrandom_arr:")
+print(random_arr)
+
+print("\nempty_arr:")
+print(empty_arr)
+
+```
+
+ zeros_arr:
+ [[0. 0. 0. 0.]
+ [0. 0. 0. 0.]
+ [0. 0. 0. 0.]]
+
+ ones_arr:
+ [[[1. 1. 1. 1.]
+ [1. 1. 1. 1.]
+ [1. 1. 1. 1.]]
+
+ [[1. 1. 1. 1.]
+ [1. 1. 1. 1.]
+ [1. 1. 1. 1.]]]
+
+ d:
+ [10 15 20]
+
+ e:
+ [0. 0.25 0.5 0.75 1. 1.25 1.5 1.75 2. ]
+
+ f:
+ [[7 7]
+ [7 7]]
+
+ g:
+ [[1. 0.]
+ [0. 1.]]
+
+ random_arr:
+ [[0.00439382 0.02702873]
+ [0.19578698 0.34798592]]
+
+ empty_arr:
+ [[1.5 2. ]
+ [3. 4. ]
+ [5. 6. ]]
+
+
+## NumPy Data Types
+
+
+
+```python
+# Signed 64-bit integer types
+int64_type = np.int64
+
+# Standard double-precision floating point
+float32_type = np.float32
+
+# Complex numbers represented by 128 floats
+complex_type = np.complex128
+
+# Boolean type storing TRUE and FALSE values
+bool_type = np.bool_
+
+# Python object type
+object_type = np.object_
+
+# Fixed-length string type
+string_type = np.string_
+
+# Fixed-length unicode type
+unicode_type = np.unicode_
+
+print("int64_type:", int64_type)
+print("float32_type:", float32_type)
+print("complex_type:", complex_type)
+print("bool_type:", bool_type)
+print("object_type:", object_type)
+print("string_type:", string_type)
+print("unicode_type:", unicode_type)
+```
+
+ int64_type:
+ float32_type:
+ complex_type:
+ bool_type:
+ object_type:
+ string_type:
+ unicode_type:
+
+
+## Inspecting Array Properties
+
+
+```python
+# Array dimensions
+a_shape = a.shape
+
+# Length of array
+a_length = len(a)
+
+# Number of array dimensions
+b_ndim = b.ndim
+
+# Number of array elements
+e_size = e.size
+
+# Data type of array elements
+b_dtype = b.dtype
+
+# Name of data type
+b_dtype_name = b.dtype.name
+
+# Convert an array to a different type
+b_as_int = b.astype(int)
+
+print("a_shape:")
+print(a_shape)
+
+print("\na_length:")
+print(a_length)
+
+print("\nb_ndim:")
+print(b_ndim)
+
+print("\ne_size:")
+print(e_size)
+
+print("\nb_dtype:")
+print(b_dtype)
+
+print("\nb_dtype_name:")
+print(b_dtype_name)
+
+print("\nb_as_int:")
+print(b_as_int)
+
+```
+
+ a_shape:
+ (3,)
+
+ a_length:
+ 3
+
+ b_ndim:
+ 2
+
+ e_size:
+ 9
+
+ b_dtype:
+ float64
+
+ b_dtype_name:
+ float64
+
+ b_as_int:
+ [[1 2 3]
+ [4 5 6]]
+
+
+## Arithmetic Operations
+
+
+```python
+# Example values for arrays
+a = np.array([1, 2, 3])
+b = np.array([
+ (1.5, 2, 3),
+ (4, 5, 6)
+], dtype=float)
+e = np.array([2, 3, 4])
+d = np.arange(10, 25, 5)
+
+# Subtraction
+subtraction_result = a - b
+subtraction_np = np.subtract(a, b)
+
+# Addition
+addition_result = b + a
+addition_np = np.add(b, a)
+
+# Division
+division_result = a / b
+division_np = np.divide(a, b)
+
+# Multiplication
+multiplication_result = a * b
+multiplication_np = np.multiply(a, b)
+
+# Exponentiation
+exponentiation_result = np.exp(b)
+
+# Square root
+sqrt_result = np.sqrt(b)
+
+# Sine of an array
+sin_result = np.sin(a)
+
+# Element-wise cosine
+cos_result = np.cos(b)
+
+# Element-wise natural logarithm
+log_result = np.log(a)
+
+# Dot product
+dot_product_result = np.dot(e, d)
+
+print("subtraction_result:")
+print(subtraction_result)
+
+print("\nsubtraction_np:")
+print(subtraction_np)
+
+print("\naddition_result:")
+print(addition_result)
+
+print("\naddition_np:")
+print(addition_np)
+
+print("\ndivision_result:")
+print(division_result)
+
+print("\ndivision_np:")
+print(division_np)
+
+print("\nmultiplication_result:")
+print(multiplication_result)
+
+print("\nmultiplication_np:")
+print(multiplication_np)
+
+print("\nexponentiation_result:")
+print(exponentiation_result)
+
+print("\nsqrt_result:")
+print(sqrt_result)
+
+print("\nsin_result:")
+print(sin_result)
+
+print("\ncos_result:")
+print(cos_result)
+
+print("\nlog_result:")
+print(log_result)
+
+print("\ndot_product_result:")
+print(dot_product_result)
+```
+
+ subtraction_result:
+ [[-0.5 0. 0. ]
+ [-3. -3. -3. ]]
+
+ subtraction_np:
+ [[-0.5 0. 0. ]
+ [-3. -3. -3. ]]
+
+ addition_result:
+ [[2.5 4. 6. ]
+ [5. 7. 9. ]]
+
+ addition_np:
+ [[2.5 4. 6. ]
+ [5. 7. 9. ]]
+
+ division_result:
+ [[0.66666667 1. 1. ]
+ [0.25 0.4 0.5 ]]
+
+ division_np:
+ [[0.66666667 1. 1. ]
+ [0.25 0.4 0.5 ]]
+
+ multiplication_result:
+ [[ 1.5 4. 9. ]
+ [ 4. 10. 18. ]]
+
+ multiplication_np:
+ [[ 1.5 4. 9. ]
+ [ 4. 10. 18. ]]
+
+ exponentiation_result:
+ [[ 4.48168907 7.3890561 20.08553692]
+ [ 54.59815003 148.4131591 403.42879349]]
+
+ sqrt_result:
+ [[1.22474487 1.41421356 1.73205081]
+ [2. 2.23606798 2.44948974]]
+
+ sin_result:
+ [0.84147098 0.90929743 0.14112001]
+
+ cos_result:
+ [[ 0.0707372 -0.41614684 -0.9899925 ]
+ [-0.65364362 0.28366219 0.96017029]]
+
+ log_result:
+ [0. 0.69314718 1.09861229]
+
+ dot_product_result:
+ 145
+
+
+## Comparison Operations
+
+
+
+```python
+# Element-wise comparison for equality
+equality_comparison = a == b
+
+# Element-wise comparison for less than
+less_than_comparison = a < 2
+
+# Array-wise comparison using np.array_equal
+np_equal = np.array_equal(a, b)
+
+print("equality_comparison:")
+print(equality_comparison)
+
+print("\nless_than_comparison:")
+print(less_than_comparison)
+
+print("\nnp_equal:")
+print(np_equal)
+```
+
+ equality_comparison:
+ [[False True True]
+ [False False False]]
+
+ less_than_comparison:
+ [ True False False]
+
+ np_equal:
+ False
+
+
+## Aggregate Functions
+
+
+
+```python
+# Array-wise sum
+array_sum = a.sum()
+
+# Array-wise minimum value
+array_min = a.min()
+
+# Maximum value of an array row
+row_max = b.max(axis=0)
+
+# Cumulative sum of the elements
+cumulative_sum = b.cumsum(axis=1)
+
+# Mean
+array_mean = a.mean()
+
+# Median
+array_median = np.median(b)
+
+# Correlation coefficient (not valid for 1D array)
+corr_coefficient = np.corrcoef(a, b[0])
+
+# Standard deviation
+std_deviation = np.std(b)
+
+print("array_sum:")
+print(array_sum)
+
+print("\narray_min:")
+print(array_min)
+
+print("\nrow_max:")
+print(row_max)
+
+print("\ncumulative_sum:")
+print(cumulative_sum)
+
+print("\narray_mean:")
+print(array_mean)
+
+print("\narray_median:")
+print(array_median)
+
+print("\ncorr_coefficient:")
+print(corr_coefficient)
+
+print("\nstd_deviation:")
+print(std_deviation)
+```
+
+ array_sum:
+ 6
+
+ array_min:
+ 1
+
+ row_max:
+ [4. 5. 6.]
+
+ cumulative_sum:
+ [[ 1.5 3.5 6.5]
+ [ 4. 9. 15. ]]
+
+ array_mean:
+ 2.0
+
+ array_median:
+ 3.5
+
+ corr_coefficient:
+ [[1. 0.98198051]
+ [0.98198051 1. ]]
+
+ std_deviation:
+ 1.5920810978785667
+
+
+## Copying Arrays
+
+
+
+```python
+# Create a view of the array with the same data
+array_view = a.view()
+
+# Create a copy of the array
+array_copy = np.copy(a)
+
+# Create a deep copy of the array
+array_deep_copy = a.copy()
+
+# Sort an array
+a.sort()
+
+# Sort the elements of an array's axis
+c.sort(axis=0)
+
+print("array_view:")
+print(array_view)
+
+print("\narray_copy:")
+print(array_copy)
+
+print("\narray_deep_copy:")
+print(array_deep_copy)
+```
+
+ array_view:
+ [1 2 3]
+
+ array_copy:
+ [1 2 3]
+
+ array_deep_copy:
+ [1 2 3]
+
+
+## Sorting Arrays
+
+
+
+```python
+# Sort an array
+a.sort()
+
+# Sort the elements of an array's axis
+c.sort(axis=0)
+
+print("Sorted a:")
+print(a)
+
+print("\nSorted c (axis=0):")
+print(c)
+```
+
+ Sorted a:
+ [1 2 3]
+
+ Sorted c (axis=0):
+ [[[1.5 2. 1. ]
+ [4. 5. 6. ]]
+
+ [[3. 2. 3. ]
+ [4. 5. 6. ]]]
+
+
+## Subsetting, Slicing, and Indexing
+
+
+```python
+# Subsetting
+element_at_2nd_index = a[2]
+
+# Select the element at row 1, column 2
+element_row_1_col_2 = b[1, 2]
+
+# Slicing
+sliced_a = a[0:2]
+
+# Select items at rows 0 and 1 in column 1
+sliced_b = b[0:2, 1]
+
+# Select all items at row 0
+sliced_c = b[:1]
+
+# Reversed array
+reversed_a = a[::-1]
+
+# Boolean Indexing
+a_less_than_2 = a[a < 2]
+
+# Fancy Indexing
+fancy_indexing_result = b[
+ [1, 0, 1, 0],
+ [0, 1, 2, 0]
+]
+fancy_indexing_subset = b[[1, 0, 1, 0]][:, [0, 1, 2, 0]]
+
+print("element_at_2nd_index:")
+print(element_at_2nd_index)
+
+print("\nelement_row_1_col_2:")
+print(element_row_1_col_2)
+
+print("\nsliced_a:")
+print(sliced_a)
+
+print("\nsliced_b:")
+print(sliced_b)
+
+print("\nsliced_c:")
+print(sliced_c)
+
+print("\nreversed_a:")
+print(reversed_a)
+
+print("\na_less_than_2:")
+print(a_less_than_2)
+
+print("\nfancy_indexing_result:")
+print(fancy_indexing_result)
+
+print("\nfancy_indexing_subset:")
+print(fancy_indexing_subset)
+```
+
+ element_at_2nd_index:
+ 3
+
+ element_row_1_col_2:
+ 6.0
+
+ sliced_a:
+ [1 2]
+
+ sliced_b:
+ [2. 5.]
+
+ sliced_c:
+ [[1.5 2. 3. ]]
+
+ reversed_a:
+ [3 2 1]
+
+ a_less_than_2:
+ [1]
+
+ fancy_indexing_result:
+ [4. 2. 6. 1.5]
+
+ fancy_indexing_subset:
+ [[4. 5. 6. 4. ]
+ [1.5 2. 3. 1.5]
+ [4. 5. 6. 4. ]
+ [1.5 2. 3. 1.5]]
+
+
+## Array Manipulation
+
+
+
+```python
+# Example values for arrays
+a = np.array([3, 1, 2])
+b = np.array([
+ (1.5, 2, 3),
+ (4, 5, 6)
+], dtype=float)
+h = np.array([
+ [1, 2, 3],
+ [4, 5, 6]
+])
+g = np.array([7, 8, 9])
+d = np.array([4, 5, 6])
+e = np.array([10, 11])
+f = np.array([12, 13])
+c = np.array([
+ (3, 1, 2),
+ (6, 4, 5)
+], dtype=int)
+
+# Transposing Array
+transposed_b = np.transpose(b)
+transposed_b_T = transposed_b.T
+
+# Changing Array Shape
+flattened_h = h.ravel()
+reshaped_g = g.reshape(3, -2)
+
+# Adding/Removing Elements
+resized_h = np.resize(h, (2, 6)) # Using np.resize to avoid the error
+appended_array = np.append(h, g)
+inserted_array = np.insert(a, 1, 5)
+deleted_array = np.delete(a, [1])
+
+# Combining Arrays
+concatenated_arrays = np.concatenate((a, d), axis=0)
+vstacked_arrays = np.vstack((a, b))
+hstacked_arrays = np.hstack((e, f))
+column_stacked_arrays = np.column_stack((a, d))
+c_stacked_arrays = np.c_[a, d]
+
+# Splitting Arrays
+hsplit_array = np.hsplit(a, 3)
+vsplit_array = np.vsplit(c, 2)
+
+print("transposed_b:")
+print(transposed_b)
+
+print("\ntransposed_b_T:")
+print(transposed_b_T)
+
+print("\nflattened_h:")
+print(flattened_h)
+
+print("\nreshaped_g:")
+print(reshaped_g)
+
+print("\nresized_h:")
+print(resized_h)
+
+print("\nappended_array:")
+print(appended_array)
+
+print("\ninserted_array:")
+print(inserted_array)
+
+print("\ndeleted_array:")
+print(deleted_array)
+
+print("\nconcatenated_arrays:")
+print(concatenated_arrays)
+
+print("\nvstacked_arrays:")
+print(vstacked_arrays)
+
+print("\nhstacked_arrays:")
+print(hstacked_arrays)
+
+print("\ncolumn_stacked_arrays:")
+print(column_stacked_arrays)
+
+print("\nc_stacked_arrays:")
+print(c_stacked_arrays)
+
+print("\nhsplit_array:")
+print(hsplit_array)
+
+print("\nvsplit_array:")
+print(vsplit_array)
+
+```
+
+ transposed_b:
+ [[1.5 4. ]
+ [2. 5. ]
+ [3. 6. ]]
+
+ transposed_b_T:
+ [[1.5 2. 3. ]
+ [4. 5. 6. ]]
+
+ flattened_h:
+ [1 2 3 4 5 6]
+
+ reshaped_g:
+ [[7]
+ [8]
+ [9]]
+
+ resized_h:
+ [[1 2 3 4 5 6]
+ [1 2 3 4 5 6]]
+
+ appended_array:
+ [1 2 3 4 5 6 7 8 9]
+
+ inserted_array:
+ [3 5 1 2]
+
+ deleted_array:
+ [3 2]
+
+ concatenated_arrays:
+ [3 1 2 4 5 6]
+
+ vstacked_arrays:
+ [[3. 1. 2. ]
+ [1.5 2. 3. ]
+ [4. 5. 6. ]]
+
+ hstacked_arrays:
+ [10 11 12 13]
+
+ column_stacked_arrays:
+ [[3 4]
+ [1 5]
+ [2 6]]
+
+ c_stacked_arrays:
+ [[3 4]
+ [1 5]
+ [2 6]]
+
+ hsplit_array:
+ [array([3]), array([1]), array([2])]
+
+ vsplit_array:
+ [array([[3, 1, 2]]), array([[6, 4, 5]])]
+
+
+## Asking for Help
+
+
+
+```python
+# Get information about a NumPy function or object
+np.info(np.ndarray.dtype)
+```
+
+ Data-type of the array's elements.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ d : numpy dtype object
+
+ See Also
+ --------
+ numpy.dtype
+
+ Examples
+ --------
+ >>> x
+ array([[0, 1],
+ [2, 3]])
+ >>> x.dtype
+ dtype('int32')
+ >>> type(x.dtype)
+
+
+
+## Saving & Loading
+
+**On Disk**
+
+``` python
+# Save a NumPy array to a file
+a = np.array([1, 2, 3])
+np.save('my_array', a)
+
+# Save multiple NumPy arrays to a compressed file
+b = np.array([
+ (1.5, 2, 3),
+ (4, 5, 6)
+ ], dtype=float)
+np.savez('array.npz', a=a, b=b)
+
+# Load a NumPy array from a file
+loaded_array = np.load('my_array.npy')
+```
+
+**Text Files**
+
+``` python
+# Load data from a text file
+loaded_txt = np.loadtxt("myfile.txt")
+
+# Load data from a CSV file with specified delimiter
+loaded_csv = np.genfromtxt(
+ "my_file.csv",
+ delimiter=',')
+
+# Save a NumPy array to a text file
+a = np.array([1, 2, 3])
+np.savetxt(
+ "myarray.txt",
+ a,
+ delimiter=" ")
+```
diff --git a/docs/examples/pandas/pandas.ipynb b/docs/examples/pandas/pandas.ipynb
index 0a38b45..3695510 100644
--- a/docs/examples/pandas/pandas.ipynb
+++ b/docs/examples/pandas/pandas.ipynb
@@ -6,12 +6,6 @@
"metadata": {},
"source": [
"\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/pandas/pandas.md b/docs/examples/pandas/pandas.md
new file mode 100644
index 0000000..24fbcb6
--- /dev/null
+++ b/docs/examples/pandas/pandas.md
@@ -0,0 +1,1393 @@
+
+
+# Pandas
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/pandas/pandas.png)
+
+[Pandas](https://pandas.pydata.org/) is built on NumPy and provides easy-to-use
+data structures and data analysis tools for the Python
+programming language.
+
+## Install and import Pandas
+
+`
+$ pip install pandas
+`
+
+
+```python
+# Import Pandas convention
+import pandas as pd
+```
+
+## Pandas Data Structures
+
+**Series**
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/pandas/serie.png)
+
+A **one-dimensional** labeled array a capable of holding any data type.
+
+
+```python
+# Create a pandas Series
+s = pd.Series(
+ [3, -5, 7, 4],
+ index=['a', 'b', 'c', 'd']
+)
+
+# Print the pandas Series
+print("s:")
+s
+```
+
+ s:
+
+
+
+
+
+ a 3
+ b -5
+ c 7
+ d 4
+ dtype: int64
+
+
+
+**DataFrame**
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/pandas/df.png)
+
+**two-dimensional** labeled data structure with columns of potentially different types.
+
+
+```python
+# Create a pandas DataFrame
+data = {
+ 'Country': ['Belgium', 'India', 'Brazil'],
+ 'Capital': ['Brussels', 'New Delhi', 'Brasília'],
+ 'Population': [11190846, 1303171035, 207847528]
+}
+df = pd.DataFrame(
+ data,
+ columns=['Country', 'Capital', 'Population']
+)
+
+# Print the DataFrame 'df'
+print("\ndf:")
+df
+```
+
+
+ df:
+
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ Belgium |
+ Brussels |
+ 11190846 |
+
+
+ 1 |
+ India |
+ New Delhi |
+ 1303171035 |
+
+
+ 2 |
+ Brazil |
+ Brasília |
+ 207847528 |
+
+
+
+
+
+
+
+## Getting Elements
+
+
+
+```python
+# Get one element from a Series
+s['b']
+```
+
+
+
+
+ -5
+
+
+
+
+```python
+# Get subset of a DataFrame
+df[1:]
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 1 |
+ India |
+ New Delhi |
+ 1303171035 |
+
+
+ 2 |
+ Brazil |
+ Brasília |
+ 207847528 |
+
+
+
+
+
+
+
+## Selecting, Boolean Indexing & Setting
+
+
+
+```python
+# Select single value by row & 'Belgium' column
+df.iloc[[0],[0]]
+# Output: 'Belgium'
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+
+
+
+
+ 0 |
+ Belgium |
+
+
+
+
+
+
+
+
+```python
+# Select single value by row & 'Belgium' column labels
+df.loc[[0], ['Country']]
+# Output: 'Belgium'
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+
+
+
+
+ 0 |
+ Belgium |
+
+
+
+
+
+
+
+
+```python
+# Select single row of subset of rows
+df.loc[2]
+# Output:
+# Country Brazil
+# Capital Brasília
+# Population 207847528
+```
+
+
+
+
+ Country Brazil
+ Capital Brasília
+ Population 207847528
+ Name: 2, dtype: object
+
+
+
+
+```python
+# Select a single column of subset of columns
+df.loc[:,'Capital']
+# Output:
+# 0 Brussels
+# 1 New Delhi
+# 2 Brasília
+```
+
+
+
+
+ 0 Brussels
+ 1 New Delhi
+ 2 Brasília
+ Name: Capital, dtype: object
+
+
+
+
+```python
+# Boolean indexing - Series s where value is not > 1
+s[~(s > 1)]
+```
+
+
+
+
+ b -5
+ dtype: int64
+
+
+
+
+```python
+# Boolean indexing - s where value is <-1 or >2
+s[(s < -1) | (s > 2)]
+```
+
+
+
+
+ a 3
+ b -5
+ c 7
+ d 4
+ dtype: int64
+
+
+
+
+```python
+# Use filter to adjust DataFrame
+df[df['Population'] > 1200000000]
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 1 |
+ India |
+ New Delhi |
+ 1303171035 |
+
+
+
+
+
+
+
+
+```python
+# Setting index a of Series s to 6
+s['a'] = 6
+s
+```
+
+
+
+
+ a 6
+ b -5
+ c 7
+ d 4
+ dtype: int64
+
+
+
+## Dropping
+
+
+
+```python
+# Drop values from rows (axis=0)
+s.drop(['a', 'c'])
+```
+
+
+
+
+ b -5
+ d 4
+ dtype: int64
+
+
+
+
+```python
+# Drop values from columns (axis=1)
+df.drop('Country', axis=1)
+```
+
+
+
+
+
+
+
+
+
+ |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ Brussels |
+ 11190846 |
+
+
+ 1 |
+ New Delhi |
+ 1303171035 |
+
+
+ 2 |
+ Brasília |
+ 207847528 |
+
+
+
+
+
+
+
+## Sort & Rank
+
+
+
+```python
+# Sort by labels along an axis
+df.sort_index()
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ Belgium |
+ Brussels |
+ 11190846 |
+
+
+ 1 |
+ India |
+ New Delhi |
+ 1303171035 |
+
+
+ 2 |
+ Brazil |
+ Brasília |
+ 207847528 |
+
+
+
+
+
+
+
+
+```python
+# Sort by the values along an axis
+df.sort_values(by='Country')
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ Belgium |
+ Brussels |
+ 11190846 |
+
+
+ 2 |
+ Brazil |
+ Brasília |
+ 207847528 |
+
+
+ 1 |
+ India |
+ New Delhi |
+ 1303171035 |
+
+
+
+
+
+
+
+
+```python
+# Assign ranks to entries
+df.rank()
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ 1.0 |
+ 2.0 |
+ 1.0 |
+
+
+ 1 |
+ 3.0 |
+ 3.0 |
+ 3.0 |
+
+
+ 2 |
+ 2.0 |
+ 1.0 |
+ 2.0 |
+
+
+
+
+
+
+
+## Applying Functions
+
+
+
+```python
+# Define a function
+f = lambda x: x*2
+```
+
+
+```python
+# Apply function to DataFrame
+df.apply(f)
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ BelgiumBelgium |
+ BrusselsBrussels |
+ 22381692 |
+
+
+ 1 |
+ IndiaIndia |
+ New DelhiNew Delhi |
+ 2606342070 |
+
+
+ 2 |
+ BrazilBrazil |
+ BrasíliaBrasília |
+ 415695056 |
+
+
+
+
+
+
+
+
+```python
+# Apply function element-wise
+df.applymap(f)
+```
+
+
+
+
+
+
+
+
+
+ |
+ Country |
+ Capital |
+ Population |
+
+
+
+
+ 0 |
+ BelgiumBelgium |
+ BrusselsBrussels |
+ 22381692 |
+
+
+ 1 |
+ IndiaIndia |
+ New DelhiNew Delhi |
+ 2606342070 |
+
+
+ 2 |
+ BrazilBrazil |
+ BrasíliaBrasília |
+ 415695056 |
+
+
+
+
+
+
+
+## Basic Information
+
+
+
+```python
+# Get the shape (rows, columns)
+df.shape
+```
+
+
+
+
+ (3, 3)
+
+
+
+
+```python
+# Describe index
+df.index
+```
+
+
+
+
+ RangeIndex(start=0, stop=3, step=1)
+
+
+
+
+```python
+# Describe DataFrame columns
+df.columns
+```
+
+
+
+
+ Index(['Country', 'Capital', 'Population'], dtype='object')
+
+
+
+
+```python
+# Info on DataFrame
+df.info()
+```
+
+
+ RangeIndex: 3 entries, 0 to 2
+ Data columns (total 3 columns):
+ # Column Non-Null Count Dtype
+ --- ------ -------------- -----
+ 0 Country 3 non-null object
+ 1 Capital 3 non-null object
+ 2 Population 3 non-null int64
+ dtypes: int64(1), object(2)
+ memory usage: 200.0+ bytes
+
+
+
+```python
+# Number of non-NA values
+df.count()
+```
+
+
+
+
+ Country 3
+ Capital 3
+ Population 3
+ dtype: int64
+
+
+
+## Summary
+
+
+```python
+# Sum of values
+sum_values = df['Population'].sum()
+
+# Cumulative sum of values
+cumulative_sum_values = df['Population'].cumsum()
+
+# Minimum/maximum values
+min_values = df['Population'].min()
+max_values = df['Population'].max()
+
+# Index of minimum/maximum values
+idx_min_values = df['Population'].idxmin()
+idx_max_values = df['Population'].idxmax()
+
+# Summary statistics
+summary_stats = df['Population'].describe()
+
+# Mean of values
+mean_values = df['Population'].mean()
+
+# Median of values
+median_values = df['Population'].median()
+
+print("Example DataFrame:")
+print(df)
+
+print("\nSum of values:")
+print(sum_values)
+
+print("\nCumulative sum of values:")
+print(cumulative_sum_values)
+
+print("\nMinimum values:")
+print(min_values)
+
+print("\nMaximum values:")
+print(max_values)
+
+print("\nIndex of minimum values:")
+print(idx_min_values)
+
+print("\nIndex of maximum values:")
+print(idx_max_values)
+
+print("\nSummary statistics:")
+print(summary_stats)
+
+print("\nMean values:")
+print(mean_values)
+
+print("\nMedian values:")
+print(median_values)
+
+```
+
+ Example DataFrame:
+ Country Capital Population
+ 0 Belgium Brussels 11190846
+ 1 India New Delhi 1303171035
+ 2 Brazil Brasília 207847528
+
+ Sum of values:
+ 1522209409
+
+ Cumulative sum of values:
+ 0 11190846
+ 1 1314361881
+ 2 1522209409
+ Name: Population, dtype: int64
+
+ Minimum values:
+ 11190846
+
+ Maximum values:
+ 1303171035
+
+ Index of minimum values:
+ 0
+
+ Index of maximum values:
+ 1
+
+ Summary statistics:
+ count 3.000000e+00
+ mean 5.074031e+08
+ std 6.961346e+08
+ min 1.119085e+07
+ 25% 1.095192e+08
+ 50% 2.078475e+08
+ 75% 7.555093e+08
+ max 1.303171e+09
+ Name: Population, dtype: float64
+
+ Mean values:
+ 507403136.3333333
+
+ Median values:
+ 207847528.0
+
+
+## Internal Data Alignment
+
+
+
+```python
+# Create Series with different indices
+s3 = pd.Series([7, -2, 3], index=['a', 'c', 'd'])
+s3
+```
+
+
+
+
+ a 7
+ c -2
+ d 3
+ dtype: int64
+
+
+
+
+```python
+# Add two Series with different indices
+result = s + s3
+result
+```
+
+
+
+
+ a 13.0
+ b NaN
+ c 5.0
+ d 7.0
+ dtype: float64
+
+
+
+## Arithmetic Operations with Fill Methods
+
+
+```python
+# Example Series
+s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])
+s3 = pd.Series([10, 2, 4, 8], index=['a', 'b', 'd', 'e'])
+
+# Perform arithmetic operations with fill methods
+result_add = s.add(s3, fill_value=0)
+result_sub = s.sub(s3, fill_value=2)
+result_div = s.div(s3, fill_value=4)
+result_mul = s.mul(s3, fill_value=3)
+
+print("result_add:")
+print(result_add)
+
+print("\nresult_sub:")
+print(result_sub)
+
+print("\nresult_div:")
+print(result_div)
+
+print("\nresult_mul:")
+print(result_mul)
+```
+
+ result_add:
+ a 13.0
+ b -3.0
+ c 7.0
+ d 8.0
+ e 8.0
+ dtype: float64
+
+ result_sub:
+ a -7.0
+ b -7.0
+ c 5.0
+ d 0.0
+ e -6.0
+ dtype: float64
+
+ result_div:
+ a 0.30
+ b -2.50
+ c 1.75
+ d 1.00
+ e 0.50
+ dtype: float64
+
+ result_mul:
+ a 30.0
+ b -10.0
+ c 21.0
+ d 16.0
+ e 24.0
+ dtype: float64
+
+
+## Asking For Help
+
+
+```python
+# Display help for a function or object
+help(pd.Series.loc)
+```
+
+ Help on property:
+
+ Access a group of rows and columns by label(s) or a boolean array.
+
+ ``.loc[]`` is primarily label based, but may also be used with a
+ boolean array.
+
+ Allowed inputs are:
+
+ - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
+ interpreted as a *label* of the index, and **never** as an
+ integer position along the index).
+ - A list or array of labels, e.g. ``['a', 'b', 'c']``.
+ - A slice object with labels, e.g. ``'a':'f'``.
+
+ .. warning:: Note that contrary to usual python slices, **both** the
+ start and the stop are included
+
+ - A boolean array of the same length as the axis being sliced,
+ e.g. ``[True, False, True]``.
+ - An alignable boolean Series. The index of the key will be aligned before
+ masking.
+ - An alignable Index. The Index of the returned selection will be the input.
+ - A ``callable`` function with one argument (the calling Series or
+ DataFrame) and that returns valid output for indexing (one of the above)
+
+ See more at :ref:`Selection by Label `.
+
+ Raises
+ ------
+ KeyError
+ If any items are not found.
+ IndexingError
+ If an indexed key is passed and its index is unalignable to the frame index.
+
+ See Also
+ --------
+ DataFrame.at : Access a single value for a row/column label pair.
+ DataFrame.iloc : Access group of rows and columns by integer position(s).
+ DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
+ Series/DataFrame.
+ Series.loc : Access group of values using labels.
+
+ Examples
+ --------
+ **Getting values**
+
+ >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
+ ... index=['cobra', 'viper', 'sidewinder'],
+ ... columns=['max_speed', 'shield'])
+ >>> df
+ max_speed shield
+ cobra 1 2
+ viper 4 5
+ sidewinder 7 8
+
+ Single label. Note this returns the row as a Series.
+
+ >>> df.loc['viper']
+ max_speed 4
+ shield 5
+ Name: viper, dtype: int64
+
+ List of labels. Note using ``[[]]`` returns a DataFrame.
+
+ >>> df.loc[['viper', 'sidewinder']]
+ max_speed shield
+ viper 4 5
+ sidewinder 7 8
+
+ Single label for row and column
+
+ >>> df.loc['cobra', 'shield']
+ 2
+
+ Slice with labels for row and single label for column. As mentioned
+ above, note that both the start and stop of the slice are included.
+
+ >>> df.loc['cobra':'viper', 'max_speed']
+ cobra 1
+ viper 4
+ Name: max_speed, dtype: int64
+
+ Boolean list with the same length as the row axis
+
+ >>> df.loc[[False, False, True]]
+ max_speed shield
+ sidewinder 7 8
+
+ Alignable boolean Series:
+
+ >>> df.loc[pd.Series([False, True, False],
+ ... index=['viper', 'sidewinder', 'cobra'])]
+ max_speed shield
+ sidewinder 7 8
+
+ Index (same behavior as ``df.reindex``)
+
+ >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
+ max_speed shield
+ foo
+ cobra 1 2
+ viper 4 5
+
+ Conditional that returns a boolean Series
+
+ >>> df.loc[df['shield'] > 6]
+ max_speed shield
+ sidewinder 7 8
+
+ Conditional that returns a boolean Series with column labels specified
+
+ >>> df.loc[df['shield'] > 6, ['max_speed']]
+ max_speed
+ sidewinder 7
+
+ Callable that returns a boolean Series
+
+ >>> df.loc[lambda df: df['shield'] == 8]
+ max_speed shield
+ sidewinder 7 8
+
+ **Setting values**
+
+ Set value for all items matching the list of labels
+
+ >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
+ >>> df
+ max_speed shield
+ cobra 1 2
+ viper 4 50
+ sidewinder 7 50
+
+ Set value for an entire row
+
+ >>> df.loc['cobra'] = 10
+ >>> df
+ max_speed shield
+ cobra 10 10
+ viper 4 50
+ sidewinder 7 50
+
+ Set value for an entire column
+
+ >>> df.loc[:, 'max_speed'] = 30
+ >>> df
+ max_speed shield
+ cobra 30 10
+ viper 30 50
+ sidewinder 30 50
+
+ Set value for rows matching callable condition
+
+ >>> df.loc[df['shield'] > 35] = 0
+ >>> df
+ max_speed shield
+ cobra 30 10
+ viper 0 0
+ sidewinder 0 0
+
+ **Getting values on a DataFrame with an index that has integer labels**
+
+ Another example using integers for the index
+
+ >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
+ ... index=[7, 8, 9], columns=['max_speed', 'shield'])
+ >>> df
+ max_speed shield
+ 7 1 2
+ 8 4 5
+ 9 7 8
+
+ Slice with integer labels for rows. As mentioned above, note that both
+ the start and stop of the slice are included.
+
+ >>> df.loc[7:9]
+ max_speed shield
+ 7 1 2
+ 8 4 5
+ 9 7 8
+
+ **Getting values with a MultiIndex**
+
+ A number of examples using a DataFrame with a MultiIndex
+
+ >>> tuples = [
+ ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
+ ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
+ ... ('viper', 'mark ii'), ('viper', 'mark iii')
+ ... ]
+ >>> index = pd.MultiIndex.from_tuples(tuples)
+ >>> values = [[12, 2], [0, 4], [10, 20],
+ ... [1, 4], [7, 1], [16, 36]]
+ >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
+ >>> df
+ max_speed shield
+ cobra mark i 12 2
+ mark ii 0 4
+ sidewinder mark i 10 20
+ mark ii 1 4
+ viper mark ii 7 1
+ mark iii 16 36
+
+ Single label. Note this returns a DataFrame with a single index.
+
+ >>> df.loc['cobra']
+ max_speed shield
+ mark i 12 2
+ mark ii 0 4
+
+ Single index tuple. Note this returns a Series.
+
+ >>> df.loc[('cobra', 'mark ii')]
+ max_speed 0
+ shield 4
+ Name: (cobra, mark ii), dtype: int64
+
+ Single label for row and column. Similar to passing in a tuple, this
+ returns a Series.
+
+ >>> df.loc['cobra', 'mark i']
+ max_speed 12
+ shield 2
+ Name: (cobra, mark i), dtype: int64
+
+ Single tuple. Note using ``[[]]`` returns a DataFrame.
+
+ >>> df.loc[[('cobra', 'mark ii')]]
+ max_speed shield
+ cobra mark ii 0 4
+
+ Single tuple for the index with a single label for the column
+
+ >>> df.loc[('cobra', 'mark i'), 'shield']
+ 2
+
+ Slice from index tuple to single label
+
+ >>> df.loc[('cobra', 'mark i'):'viper']
+ max_speed shield
+ cobra mark i 12 2
+ mark ii 0 4
+ sidewinder mark i 10 20
+ mark ii 1 4
+ viper mark ii 7 1
+ mark iii 16 36
+
+ Slice from index tuple to index tuple
+
+ >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
+ max_speed shield
+ cobra mark i 12 2
+ mark ii 0 4
+ sidewinder mark i 10 20
+ mark ii 1 4
+ viper mark ii 7 1
+
+
+
+## Read and Write
+
+**CSV**
+
+```python
+# Read from CSV
+df_read = pd.read_csv(
+ 'file.csv',
+ header=None,
+ nrows=5
+)
+
+# Write to CSV
+df.to_csv('myDataFrame.csv')
+```
+
+**Excel**
+
+
+```python
+# Read from Excel
+df_read_excel = pd.read_excel('file.xlsx')
+
+# Write to Excel
+df.to_excel(
+ 'dir/myDataFrame.xlsx',
+ sheet_name='Sheet1'
+)
+
+# Read multiple sheets from the same file
+xlsx = pd.ExcelFile('file.xls')
+df_from_sheet1 = pd.read_excel(xlsx, 'Sheet1')
+```
+
+**SQL Query**
+
+```python
+from sqlalchemy import create_engine
+engine = create_engine('sqlite:///:memory:')
+
+# Read from SQL Query
+pd.read_sql("SELECT * FROM my_table;", engine)
+
+# Read from Database Table
+pd.read_sql_table('my_table', engine)
+
+# Read from SQL Query using read_sql_query()
+pd.read_sql_query("SELECT * FROM my_table;", engine)
+
+# Write DataFrame to SQL Table
+pd.to_sql('myDf', engine)
+```
diff --git a/docs/examples/polars/polars.ipynb b/docs/examples/polars/polars.ipynb
index f5fbf46..d538a84 100644
--- a/docs/examples/polars/polars.ipynb
+++ b/docs/examples/polars/polars.ipynb
@@ -8,12 +8,6 @@
},
"source": [
"\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/polars/polars.md b/docs/examples/polars/polars.md
new file mode 100644
index 0000000..56760d2
--- /dev/null
+++ b/docs/examples/polars/polars.md
@@ -0,0 +1,454 @@
+
+
+# Polars
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/polars/polars.png)
+
+[Polars](https://pola-rs.github.io/polars-book/) is a highly performant DataFrame library for manipulating structured data. The core is written in Rust, but the library is also available in Python.
+
+## Install and import Polars
+
+`
+$ pip install polars
+`
+
+
+```python
+# Import Polars convention
+import polars as pl
+```
+
+### Creating/reading DataFrames
+
+
+```python
+# Create DataFrame
+df = pl.DataFrame(
+ {
+ "nrs": [1, 2, 3, None, 5],
+ "names": ["foo", "ham", "spam", "egg", None],
+ "random": [0.3, 0.7, 0.1, 0.9, 0.6],
+ "groups": ["A", "A", "B", "C", "B"],
+ }
+)
+```
+
+
+```python
+# Read CSV
+df = pl.read_csv("https://j.mp/iriscsv", has_header=True)
+```
+
+
+```python
+# Read parquet
+df = pl.read_parquet("path.parquet", columns=["select", "columns"])
+```
+
+### Expressions
+Polars expressions can be performed in sequence. This improves readability of code.
+
+
+```python
+df.filter(pl.col("nrs") < 4).groupby("groups").agg(pl.all().sum())
+```
+
+### Subset Observations - rows
+
+
+```python
+# Filter: Extract rows that meet logical criteria.
+df.filter(pl.col("random") > 0.5)
+df.filter((pl.col("groups") == "B") & (pl.col("random") > 0.5))
+```
+
+
+```python
+# Sample
+# Randomly select fraction of rows.
+#df.sample(frac=0.5)
+
+# Randomly select n rows.
+df.sample(n=2)
+```
+
+
+```python
+# Select first n rows
+df.head(n=2)
+
+# Select last n rows.
+df.tail(n=2)
+```
+
+### Subset Variables - columns
+
+
+```python
+# Select multiple columns with specific names.
+df.select(["nrs", "names"])
+```
+
+
+```python
+# Select columns whose name matches regular expression regex.
+df.select(pl.col("^n.*$"))
+```
+
+### Subsets - rows and columns
+
+
+```python
+# Select rows 2-4.
+df[2:4, :]
+```
+
+
+```python
+# Select columns in positions 1 and 3 (first column is 0).
+df[:, [1, 3]]
+```
+
+
+```python
+# Select rows meeting logical condition, and only the specific columns.
+df[df["random"] > 0.5, ["names", "groups"]]
+```
+
+### Reshaping Data – Change layout, sorting, renaming
+
+
+```python
+df2 = pl.DataFrame(
+ {
+ "nrs": [6],
+ "names": ["wow"],
+ "random": [0.9],
+ "groups": ["B"],
+ }
+)
+
+df3 = pl.DataFrame(
+ {
+ "primes": [2, 3, 5, 7, 11],
+ }
+)
+```
+
+
+```python
+# Append rows of DataFrames.
+pl.concat([df, df2])
+```
+
+
+```python
+# Append columns of DataFrames
+pl.concat([df, df3], how="horizontal")
+```
+
+
+```python
+# Gather columns into rows
+df.melt(id_vars="nrs", value_vars=["names", "groups"])
+```
+
+
+```python
+# Spread rows into columns
+df.pivot(values="nrs", index="groups", columns="names")
+```
+
+
+```python
+# Order rows by values of a column (low to high)
+df.sort("random")
+```
+
+
+```python
+# Order rows by values of a column (high to low)
+df.sort("random", reverse=True)
+```
+
+
+```python
+# Rename the columns of a DataFrame
+df.rename({"nrs": "idx"})
+```
+
+
+```python
+# Drop columns from DataFrame
+df.drop(["names", "random"])
+```
+
+### Summarize Data
+
+
+```python
+# Count number of rows with each unique value of variable
+df["groups"].value_counts()
+```
+
+
+```python
+# # of rows in DataFrame
+len(df)
+# or
+df.height
+```
+
+
+```python
+# Tuple of # of rows, # of columns in DataFrame
+df.shape
+```
+
+
+```python
+# # of distinct values in a column
+df["groups"].n_unique()
+```
+
+
+```python
+# Basic descriptive and statistics for each column
+df.describe()
+```
+
+
+```python
+# Aggregation functions
+df.select(
+ [
+ # Sum values
+ pl.sum("random").alias("sum"),
+ # Minimum value
+ pl.min("random").alias("min"),
+ # Maximum value
+ pl.max("random").alias("max"),
+ # or
+ pl.col("random").max().alias("other_max"),
+ # Standard deviation
+ pl.std("random").alias("std_dev"),
+ # Variance
+ pl.var("random").alias("variance"),
+ # Median
+ pl.median("random").alias("median"),
+ # Mean
+ pl.mean("random").alias("mean"),
+ # Quantile
+ pl.quantile("random", 0.75).alias("quantile_0.75"),
+ # or
+ pl.col("random").quantile(0.75).alias("other_quantile_0.75"),
+ # First value
+ pl.first("random").alias("first"),
+ ]
+)
+```
+
+### Group Data
+
+
+```python
+# Group by values in column named "col", returning a GroupBy object
+df.groupby("groups")
+```
+
+
+```python
+# All of the aggregation functions from above can be applied to a group as well
+df.groupby(by="groups").agg(
+ [
+ # Sum values
+ pl.sum("random").alias("sum"),
+ # Minimum value
+ pl.min("random").alias("min"),
+ # Maximum value
+ pl.max("random").alias("max"),
+ # or
+ pl.col("random").max().alias("other_max"),
+ # Standard deviation
+ pl.std("random").alias("std_dev"),
+ # Variance
+ pl.var("random").alias("variance"),
+ # Median
+ pl.median("random").alias("median"),
+ # Mean
+ pl.mean("random").alias("mean"),
+ # Quantile
+ pl.quantile("random", 0.75).alias("quantile_0.75"),
+ # or
+ pl.col("random").quantile(0.75).alias("other_quantile_0.75"),
+ # First value
+ pl.first("random").alias("first"),
+ ]
+)
+```
+
+
+```python
+# Additional GroupBy functions
+df.groupby(by="groups").agg(
+ [
+ # Count the number of values in each group
+ pl.count("random").alias("size"),
+ # Sample one element in each group
+ pl.col("names").apply(lambda group_df: group_df.sample(1)),
+ ]
+)
+```
+
+### Handling Missing Data
+
+
+```python
+# Drop rows with any column having a null value
+df.drop_nulls()
+```
+
+
+```python
+# Replace null values with given value
+df.fill_null(42)
+```
+
+
+```python
+# Replace null values using forward strategy
+df.fill_null(strategy="forward")
+# Other fill strategies are "backward", "min", "max", "mean", "zero" and "one"
+```
+
+
+```python
+# Replace floating point NaN values with given value
+df.fill_nan(42)
+```
+
+### Make New Columns
+
+
+```python
+# Add a new column to the DataFrame
+df.with_column((pl.col("random") * pl.col("nrs")).alias("product"))
+```
+
+
+```python
+# Add several new columns to the DataFrame
+df.with_columns(
+ [
+ (pl.col("random") * pl.col("nrs")).alias("product"),
+ pl.col("names").str.lengths().alias("names_lengths"),
+ ]
+)
+```
+
+
+```python
+# Add a column at index 0 that counts the rows
+df.with_row_count()
+```
+
+### Rolling Functions
+
+
+```python
+# The following rolling functions are available
+import numpy as np
+
+df.select(
+ [
+ pl.col("random"),
+ # Rolling maximum value
+ pl.col("random").rolling_max(window_size=2).alias("rolling_max"),
+ # Rolling mean value
+ pl.col("random").rolling_mean(window_size=2).alias("rolling_mean"),
+ # Rolling median value
+ pl.col("random")
+ .rolling_median(window_size=2, min_periods=2)
+ .alias("rolling_median"),
+ # Rolling minimum value
+ pl.col("random").rolling_min(window_size=2).alias("rolling_min"),
+ # Rolling standard deviation
+ pl.col("random").rolling_std(window_size=2).alias("rolling_std"),
+ # Rolling sum values
+ pl.col("random").rolling_sum(window_size=2).alias("rolling_sum"),
+ # Rolling variance
+ pl.col("random").rolling_var(window_size=2).alias("rolling_var"),
+ # Rolling quantile
+ pl.col("random")
+ .rolling_quantile(quantile=0.75, window_size=2, min_periods=2)
+ .alias("rolling_quantile"),
+ # Rolling skew
+ pl.col("random").rolling_skew(window_size=2).alias("rolling_skew"),
+ # Rolling custom function
+ pl.col("random")
+ .rolling_apply(function=np.nanstd, window_size=2)
+ .alias("rolling_apply"),
+ ]
+)
+```
+
+### Window functions
+
+
+```python
+# Window functions allow to group by several columns simultaneously
+df.select(
+ [
+ "names",
+ "groups",
+ pl.col("random").sum().over("names").alias("sum_by_names"),
+ pl.col("random").sum().over("groups").alias("sum_by_groups"),
+ ]
+)
+```
+
+### Combine Data Sets
+
+
+```python
+df4 = pl.DataFrame(
+ {
+ "nrs": [1, 2, 5, 6],
+ "animals": ["cheetah", "lion", "leopard", "tiger"],
+ }
+)
+```
+
+
+```python
+# Inner join
+# Retains only rows with a match in the other set.
+df.join(df4, on="nrs")
+# or
+df.join(df4, on="nrs", how="inner")
+```
+
+
+```python
+# Left join
+# Retains each row from "left" set (df).
+df.join(df4, on="nrs", how="left")
+```
+
+
+```python
+# Outer join
+# Retains each row, even if no other matching row exists.
+df.join(df4, on="nrs", how="outer")
+```
+
+
+```python
+# Anti join
+# Contains all rows from df that do not have a match in df4.
+df.join(df4, on="nrs", how="anti")
+```
diff --git a/docs/examples/python/python.ipynb b/docs/examples/python/python.ipynb
index 86e0111..5fef061 100644
--- a/docs/examples/python/python.ipynb
+++ b/docs/examples/python/python.ipynb
@@ -6,12 +6,6 @@
"metadata": {},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/python/python.md b/docs/examples/python/python.md
new file mode 100644
index 0000000..f80c601
--- /dev/null
+++ b/docs/examples/python/python.md
@@ -0,0 +1,455 @@
+
+
+# Python
+
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/python/python.png)
+
+[Python](https://www.python.org/) is a high-level, interpreted programming
+ language known for its simplicity, readability, and versatility.
+ Created by Guido van Rossum and first released in 1991,
+ Python has gained immense popularity in various domains,
+ including web development, data science, automation, and more.
+
+## Why Python?
+
+Python has experienced a remarkable surge in popularity over the years and has become one of the most
+ widely used programming languages across various fields.
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/python/survey2.png)
+
+## Hello, World!
+
+
+
+```python
+print("Hello, World!")
+```
+
+ Hello, World!
+
+
+## Comments
+
+
+```python
+# This is a comment
+```
+
+## Variables and Data Types
+
+
+
+```python
+# Define an integer variable
+x = 5 # Integer
+
+# Define a float variable
+y = 3.14 # Float
+
+# Define a string variable
+name = "John" # String
+
+# Define a boolean variable
+is_student = True # Boolean
+
+# Print the values of the variables
+print("x:", x)
+print("y:", y)
+print("name:", name)
+print("is_student:", is_student)
+```
+
+ x: 5
+ y: 3.14
+ name: John
+ is_student: True
+
+
+## Basic Operations
+
+
+
+```python
+# Perform basic arithmetic operations
+sum_result = x + y # Add x and y
+sub_result = x - y # Subtract y from x
+mul_result = x * y # Multiply x and y
+div_result = x / y # Divide x by y
+
+# Print the results of the arithmetic operations
+print("sum_result:", sum_result)
+print("sub_result:", sub_result)
+print("mul_result:", mul_result)
+print("div_result:", div_result)
+```
+
+ sum_result: 8.14
+ sub_result: 1.8599999999999999
+ mul_result: 15.700000000000001
+ div_result: 1.592356687898089
+
+
+## String Operations
+
+
+
+
+```python
+# String concatenation
+full_name = name + " Doe" # Concatenate 'name' and " Doe"
+
+# Formatted string
+formatted_string = f"Hello, {name}!" # Create a formatted string using 'name'
+
+# Print the results of string operations
+print("full_name:", full_name)
+print("formatted_string:", formatted_string)
+```
+
+ full_name: John Doe
+ formatted_string: Hello, John!
+
+
+## Conditional Statements
+
+
+
+```python
+# Define the values of x and y
+x = 7
+y = 3.5
+
+# Check if x is greater than y
+if x > y:
+ print("x is greater")
+# If not, check if x is less than y
+elif x < y:
+ print("y is greater")
+# If neither condition is true, they must be equal
+else:
+ print("x and y are equal")
+```
+
+ x is greater
+
+
+## Lists
+
+### Creating Lists
+
+
+
+```python
+# Create a list with elements 1, 3, and 2
+x = [1, 3, 2]
+
+# Print the list
+print("x:", x)
+```
+
+ x: [1, 3, 2]
+
+
+### List Functions and Methods
+
+
+
+```python
+# Return a sorted copy of the list
+sorted_list = sorted(x) # Creates a new list with elements in sorted order
+
+# Sorts the list in place (replaces x)
+x.sort() # Modifies the existing list x to be sorted
+
+# Reverse the order of elements in x
+reversed_list = list(reversed(x)) # Creates a new list with elements in reversed order
+
+# Reverse the list in place
+x.reverse() # Modifies the existing list x to be reversed
+
+# Count the number of element 2 in the list
+count_2 = x.count(2) # Counts the occurrences of element 2 in the list x
+
+# Print the results of list operations
+print("sorted_list:", sorted_list)
+print("reversed_list:", reversed_list)
+print("count_2:", count_2)
+```
+
+ sorted_list: [1, 2, 3]
+ reversed_list: [3, 2, 1]
+ count_2: 1
+
+
+### Selecting List Elements
+
+
+
+```python
+# Select the 0th element in the list
+element_0 = x[0] # Assigns the first element of x to element_0
+
+# Select the last element in the list
+last_element = x[-1] # Assigns the last element of x to last_element
+
+# Select 1st (inclusive) to 3rd (exclusive)
+subset_1_to_3 = x[1:3] # Creates a subset containing elements from index 1 to 2
+
+# Select the 2nd to the end
+subset_2_to_end = x[2:] # Creates a subset containing elements from index 2 to the end
+
+# Select 0th to 3rd (exclusive)
+subset_0_to_3 = x[:3] # Creates a subset containing elements from index 0 to 2
+
+# Print the selected elements and subsets
+print("element_0:", element_0)
+print("last_element:", last_element)
+print("subset_1_to_3:", subset_1_to_3)
+print("subset_2_to_end:", subset_2_to_end)
+print("subset_0_to_3:", subset_0_to_3)
+```
+
+ element_0: 3
+ last_element: 1
+ subset_1_to_3: [2, 1]
+ subset_2_to_end: [1]
+ subset_0_to_3: [3, 2, 1]
+
+
+### Concatenating Lists
+
+
+
+```python
+# Define the x and y lists
+x = [1, 3, 6]
+y = [10, 15, 21]
+
+# Concatenate lists using '+'
+concatenated_list = x + y # Creates a new list by concatenating x and y
+
+# Replicate elements in a list using '*'
+replicated_list = 3 * x # Creates a new list with elements of x replicated 3 times
+
+# Print the results of list operations
+print("concatenated_list:", concatenated_list)
+print("replicated_list:", replicated_list)
+```
+
+ concatenated_list: [1, 3, 6, 10, 15, 21]
+ replicated_list: [1, 3, 6, 1, 3, 6, 1, 3, 6]
+
+
+## Dictionaries
+
+### Creating Dictionaries
+
+
+
+```python
+# Create a dictionary with key-value pairs
+my_dict = {'a': 1, 'b': 2, 'c': 3}
+
+# Print the dictionary
+print("my_dict:", my_dict)
+```
+
+ my_dict: {'a': 1, 'b': 2, 'c': 3}
+
+
+### Dictionary Functions and Methods
+
+
+
+```python
+# Get the keys of a dictionary
+keys = my_dict.keys() # Returns dict_keys(['a', 'b', 'c'])
+
+# Get the values of a dictionary
+values = my_dict.values() # Returns dict_values([1, 2, 3])
+
+# Get a value from a dictionary by specifying the key
+value_a = my_dict['a'] # Returns 1
+
+# Print the results of dictionary operations
+print("keys:", keys)
+print("values:", values)
+print("value_a:", value_a)
+```
+
+ keys: dict_keys(['a', 'b', 'c'])
+ values: dict_values([1, 2, 3])
+ value_a: 1
+
+
+## Loops
+
+
+```python
+# Define a list of numbers
+numbers = [1, 2, 3, 4, 5]
+
+# For loop
+print("Using a for loop:")
+for num in numbers:
+ print(num)
+```
+
+ Using a for loop:
+ 1
+ 2
+ 3
+ 4
+ 5
+
+
+
+```python
+# While loop
+x = 5
+print("Using a while loop:")
+while x > 0:
+ print(x)
+ x -= 1
+```
+
+ Using a while loop:
+ 5
+ 4
+ 3
+ 2
+ 1
+
+
+## Functions
+
+
+```python
+# Define a function that takes a name parameter
+def greet(name):
+ return f"Hello, {name}!"
+
+# Call the greet function with the argument "Alice"
+greeting = greet("Alice") # Calls the greet function and stores the result in greeting
+
+# Print the greeting
+print("greeting:", greeting)
+```
+
+ greeting: Hello, Alice!
+
+
+## Built-in Functions
+
+
+
+```python
+# Define a list of fruits and numbers
+fruits = ['apple', 'banana', 'orange', 'kiwi']
+numbers = [14, 27, 8, 42, 5]
+
+# Get the length of the list 'fruits'
+len_fruits = len(fruits)
+
+# Find the maximum value in the list 'numbers'
+max_number = max(numbers)
+
+# Find the minimum value in the list 'numbers'
+min_number = min(numbers)
+
+# Print the results
+print("Length of fruits list:", len_fruits)
+print("Maximum value in numbers list:", max_number)
+print("Minimum value in numbers list:", min_number)
+```
+
+ Length of fruits list: 4
+ Maximum value in numbers list: 42
+ Minimum value in numbers list: 5
+
+
+## Importing Modules
+
+
+
+```python
+import math
+
+# Calculate square root using math module
+sqrt_result = math.sqrt(x)
+
+# Generate a random number between 1 and 10
+from random import randint
+random_number = randint(1, 10)
+
+# Reusing the math module for another calculation
+sqrt_result_reuse = math.sqrt(x)
+
+# Print the results
+print("sqrt_result:", sqrt_result)
+print("random_number:", random_number)
+print("sqrt_result_reuse:", sqrt_result_reuse)
+```
+
+ sqrt_result: 0.0
+ random_number: 6
+ sqrt_result_reuse: 0.0
+
+
+## Classes and Objects
+
+
+
+```python
+class Dog:
+ def __init__(self, name, age):
+ self.name = name
+ self.age = age
+
+ def bark(self):
+ return "Woof!"
+
+# Create an instance of the Dog class
+my_dog = Dog("Buddy", 3)
+
+# Print the attributes of the instance
+print("my_dog name:", my_dog.name)
+print("my_dog age:", my_dog.age)
+
+# Call the bark method of the instance
+bark_result = my_dog.bark()
+print("bark_result:", bark_result)
+```
+
+ my_dog name: Buddy
+ my_dog age: 3
+ bark_result: Woof!
+
+
+## Input/Output and File Handling
+
+**Input/Output**
+
+```python
+# Get user input and display it
+user_input = input("Enter a number: ")
+print("You entered:", user_input)
+```
+
+
+**File Handling**
+
+```python
+# Read content from a file
+with open("file.txt", "r") as file:
+ content = file.read()
+
+# Write content to a new file
+with open("new_file.txt", "w") as new_file:
+ new_file.write("Hello, world!")
+```
diff --git a/docs/examples/scikit-learn/sklearn.ipynb b/docs/examples/scikit-learn/sklearn.ipynb
index a089da0..0d5b9c6 100644
--- a/docs/examples/scikit-learn/sklearn.ipynb
+++ b/docs/examples/scikit-learn/sklearn.ipynb
@@ -6,12 +6,6 @@
"metadata": {},
"source": [
"
\n",
- "
\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- " \n",
"
\n",
"
\n",
" \n",
diff --git a/docs/examples/scikit-learn/sklearn.md b/docs/examples/scikit-learn/sklearn.md
new file mode 100644
index 0000000..6f101ce
--- /dev/null
+++ b/docs/examples/scikit-learn/sklearn.md
@@ -0,0 +1,589 @@
+
+
+# Scikit-Learn
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/scikit-learn/scikit-learn.png)
+
+[Scikit-learn](https://scikit-learn.org/) is an open source Python library that
+ implements a range of
+machine learning,
+ preprocessing, cross-validation and visualization
+algorithms using a unified interface.
+
+## Install and import Scikit-Learn
+
+`
+$ pip install scikit-learn
+`
+
+
+```python
+# Import Scikit-Learn convention
+import sklearn
+```
+
+## Scikit-learn Example
+
+
![numpy logo](https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/scikit-learn/sk-tree.png)
+
+
+```python
+from sklearn import neighbors, datasets, preprocessing
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+
+# Load the Iris dataset
+iris = datasets.load_iris()
+
+# Split the dataset into features (X) and target (y)
+X, y = iris.data[:, :2], iris.target
+
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)
+
+# Standardize the features using StandardScaler
+scaler = preprocessing.StandardScaler().fit(X_train)
+X_train = scaler.transform(X_train)
+X_test = scaler.transform(X_test)
+
+# Create a K-Nearest Neighbors classifier
+knn = neighbors.KNeighborsClassifier(n_neighbors=5)
+
+# Train the classifier on the training data
+knn.fit(X_train, y_train)
+
+# Predict the target values on the test data
+y_pred = knn.predict(X_test)
+
+# Calculate the accuracy of the classifier
+accuracy = accuracy_score(y_test, y_pred)
+
+# Print the accuracy
+print("Accuracy:", accuracy)
+```
+
+ Accuracy: 0.631578947368421
+
+
+## Loading The Data
+
+
+
+```python
+from sklearn import datasets
+
+# Load the Iris dataset
+iris = datasets.load_iris()
+
+# Split the dataset into features (X) and target (y)
+X, y = iris.data, iris.target
+
+# Print the lengths of X and y
+print("Size of X:", X.shape) # (150, 4)
+print("Size of y:", y.shape) # (150, )
+```
+
+ Size of X: (150, 4)
+ Size of y: (150,)
+
+
+## Training And Test Data
+
+
+
+```python
+# Import train_test_split from sklearn
+from sklearn.model_selection import train_test_split
+
+# Split the data into training and test sets with test_size=0.2 (20% for test set)
+X, y = iris.data, iris.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+# Print the sizes of the arrays
+print("Size of X_train:", X_train.shape)
+print("Size of X_test: ", X_test.shape)
+print("Size of y_train:", y_train.shape)
+print("Size of y_test: ", y_test.shape)
+```
+
+ Size of X_train: (120, 4)
+ Size of X_test: (30, 4)
+ Size of y_train: (120,)
+ Size of y_test: (30,)
+
+
+## Create instances of the models
+
+
+
+```python
+# Import necessary classes from sklearn libraries
+from sklearn.linear_model import LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+
+# Create instances of supervised learning models
+# Logistic Regression classifier (max_iter=1000)
+lr = LogisticRegression(max_iter=1000)
+
+# k-Nearest Neighbors classifier with 5 neighbors
+knn = KNeighborsClassifier(n_neighbors=5)
+
+# Support Vector Machine classifier
+svc = SVC()
+
+# Create instances of unsupervised learning models
+# k-Means clustering with 3 clusters and 10 initialization attempts
+k_means = KMeans(n_clusters=3, n_init=10)
+
+# Principal Component Analysis with 2 components
+pca = PCA(n_components=2)
+```
+
+## Model Fitting
+
+
+
+
+```python
+# Fit models to the data
+lr.fit(X_train, y_train)
+knn.fit(X_train, y_train)
+svc.fit(X_train, y_train)
+k_means.fit(X_train)
+pca.fit_transform(X_train)
+
+# Print the instances and models
+print("lr:", lr)
+print("knn:", knn)
+print("svc:", svc)
+print("k_means:", k_means)
+print("pca:", pca)
+```
+
+ lr: LogisticRegression(max_iter=1000)
+ knn: KNeighborsClassifier()
+ svc: SVC()
+ k_means: KMeans(n_clusters=3, n_init=10)
+ pca: PCA(n_components=2)
+
+
+## Prediction
+
+
+
+```python
+# Predict using different supervised estimators
+y_pred_svc = svc.predict(X_test)
+y_pred_lr = lr.predict(X_test)
+y_pred_knn_proba = knn.predict_proba(X_test)
+
+
+# Predict labels using KMeans in clustering algorithms
+y_pred_kmeans = k_means.predict(X_test)
+
+# Print the results
+print("Supervised Estimators:")
+print("SVC predictions:", y_pred_svc)
+print("Logistic Regression predictions:", y_pred_lr)
+print("KNeighborsClassifier probabilities:\n", y_pred_knn_proba[:5],"\n ...")
+
+print("\nUnsupervised Estimators:")
+print("KMeans predictions:", y_pred_kmeans)
+```
+
+ Supervised Estimators:
+ SVC predictions: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
+ Logistic Regression predictions: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
+ KNeighborsClassifier probabilities:
+ [[0. 0. 1.]
+ [0. 1. 0.]
+ [1. 0. 0.]
+ [0. 0. 1.]
+ [1. 0. 0.]]
+ ...
+
+ Unsupervised Estimators:
+ KMeans predictions: [2 2 0 1 0 1 0 2 2 2 1 2 2 2 2 0 2 2 0 0 2 2 0 0 2 0 0 2 2 0]
+
+
+## Preprocessing The Data
+
+### Standardization
+
+
+```python
+from sklearn.preprocessing import StandardScaler
+
+# Create an instance of the StandardScaler and fit it to training data
+scaler = StandardScaler().fit(X_train)
+
+# Transform the training and test data using the scaler
+standardized_X = scaler.transform(X_train)
+standardized_X_test = scaler.transform(X_test)
+
+# Print the variables
+print("\nStandardized X_train:\n", standardized_X[:5],"\n ...")
+print("\nStandardized X_test:\n", standardized_X_test[:5],"\n ...")
+```
+
+
+ Standardized X_train:
+ [[ 0.61303014 0.10850105 0.94751783 0.736072 ]
+ [-0.56776627 -0.12400121 0.38491447 0.34752959]
+ [-0.80392556 1.03851009 -1.30289562 -1.33615415]
+ [ 0.25879121 -0.12400121 0.60995581 0.736072 ]
+ [ 0.61303014 -0.58900572 1.00377816 1.25412853]]
+ ...
+
+ Standardized X_test:
+ [[-0.09544771 -0.58900572 0.72247648 1.5131568 ]
+ [ 0.14071157 -1.98401928 0.10361279 -0.30004108]
+ [-0.44968663 2.66602591 -1.35915595 -1.33615415]
+ [ 1.6757469 -0.35650346 1.39760052 0.736072 ]
+ [-1.04008484 0.80600783 -1.30289562 -1.33615415]]
+ ...
+
+
+### Normalization
+
+
+```python
+from sklearn.preprocessing import Normalizer
+scaler = Normalizer().fit(X_train)
+normalized_X = scaler.transform(X_train)
+normalized_X_test = scaler.transform(X_test)
+
+# Print the variables
+print("\nNormalized X_train:\n", normalized_X[:5],"\n ...")
+print("\nNormalized X_test:\n", normalized_X_test[:5],"\n ...")
+```
+
+
+ Normalized X_train:
+ [[0.69804799 0.338117 0.59988499 0.196326 ]
+ [0.69333409 0.38518561 0.57777841 0.1925928 ]
+ [0.80641965 0.54278246 0.23262105 0.03101614]
+ [0.71171214 0.35002236 0.57170319 0.21001342]
+ [0.69417747 0.30370264 0.60740528 0.2386235 ]]
+ ...
+
+ Normalized X_test:
+ [[0.67767924 0.32715549 0.59589036 0.28041899]
+ [0.78892752 0.28927343 0.52595168 0.13148792]
+ [0.77867447 0.59462414 0.19820805 0.02831544]
+ [0.71366557 0.28351098 0.61590317 0.17597233]
+ [0.80218492 0.54548574 0.24065548 0.0320874 ]]
+ ...
+
+
+### Binarization
+
+
+```python
+import numpy as np
+from sklearn.preprocessing import Binarizer
+
+# Create a sample data array
+data = np.array([[1.5, 2.7, 0.8],
+ [0.2, 3.9, 1.2],
+ [4.1, 1.0, 2.5]])
+
+# Create a Binarizer instance with a threshold of 2.0
+binarizer = Binarizer(threshold=2.0)
+
+# Apply binarization to the data
+binarized_data = binarizer.transform(data)
+
+print("Original data:")
+print(data)
+print("\nBinarized data:")
+print(binarized_data)
+```
+
+ Original data:
+ [[1.5 2.7 0.8]
+ [0.2 3.9 1.2]
+ [4.1 1. 2.5]]
+
+ Binarized data:
+ [[0. 1. 0.]
+ [0. 1. 0.]
+ [1. 0. 1.]]
+
+
+### Encoding Categorical Features
+
+
+
+```python
+from sklearn.preprocessing import LabelEncoder
+
+# Sample data: categorical labels
+labels = ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'fish']
+
+# Create a LabelEncoder instance
+label_encoder = LabelEncoder()
+
+# Fit and transform the labels
+encoded_labels = label_encoder.fit_transform(labels)
+
+# Print the original labels and their encoded versions
+print("Original labels:", labels)
+print("Encoded labels:", encoded_labels)
+
+# Decode the encoded labels back to the original labels
+decoded_labels = label_encoder.inverse_transform(encoded_labels)
+print("Decoded labels:", decoded_labels)
+```
+
+ Original labels: ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'fish']
+ Encoded labels: [0 1 1 2 0 1 2]
+ Decoded labels: ['cat' 'dog' 'dog' 'fish' 'cat' 'dog' 'fish']
+
+
+### Imputing Missing Values
+
+
+
+```python
+import numpy as np
+from sklearn.impute import SimpleImputer
+
+# Sample data with missing values
+data = np.array([[1.0, 2.0, np.nan],
+ [4.0, np.nan, 6.0],
+ [7.0, 8.0, 9.0]])
+
+# Create a SimpleImputer instance with strategy='mean'
+imputer = SimpleImputer(strategy='mean')
+
+# Fit and transform the imputer on the data
+imputed_data = imputer.fit_transform(data)
+
+print("Original data:")
+print(data)
+print("\nImputed data:")
+print(imputed_data)
+```
+
+ Original data:
+ [[ 1. 2. nan]
+ [ 4. nan 6.]
+ [ 7. 8. 9.]]
+
+ Imputed data:
+ [[1. 2. 7.5]
+ [4. 5. 6. ]
+ [7. 8. 9. ]]
+
+
+### Generating Polynomial Features
+
+
+```python
+import numpy as np
+from sklearn.preprocessing import PolynomialFeatures
+
+# Sample data
+data = np.array([[1, 2],
+ [3, 4],
+ [5, 6]])
+
+# Create a PolynomialFeatures instance of degree 2
+poly = PolynomialFeatures(degree=2)
+
+# Transform the data to include polynomial features
+poly_data = poly.fit_transform(data)
+
+print("Original data:")
+print(data)
+print("\nPolynomial features:")
+print(poly_data)
+```
+
+ Original data:
+ [[1 2]
+ [3 4]
+ [5 6]]
+
+ Polynomial features:
+ [[ 1. 1. 2. 1. 2. 4.]
+ [ 1. 3. 4. 9. 12. 16.]
+ [ 1. 5. 6. 25. 30. 36.]]
+
+
+## Classification Metrics
+
+
+
+```python
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+
+# Accuracy Score
+accuracy_knn = knn.score(X_test, y_test)
+print("Accuracy Score (knn):", knn.score(X_test, y_test))
+
+accuracy_y_pred = accuracy_score(y_test, y_pred_lr)
+print("Accuracy Score (y_pred):", accuracy_y_pred)
+
+# Classification Report
+classification_rep_y_pred = classification_report(y_test, y_pred_lr)
+print("Classification Report (y_pred):\n", classification_rep_y_pred)
+
+classification_rep_y_pred_lr = classification_report(y_test, y_pred_lr)
+print("Classification Report (y_pred_lr):\n", classification_rep_y_pred_lr)
+
+# Confusion Matrix
+conf_matrix_y_pred_lr = confusion_matrix(y_test, y_pred_lr)
+print("Confusion Matrix (y_pred_lr):\n", conf_matrix_y_pred_lr)
+```
+
+ Accuracy Score (knn): 0.9666666666666667
+ Accuracy Score (y_pred): 1.0
+ Classification Report (y_pred):
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 11
+ 1 1.00 1.00 1.00 13
+ 2 1.00 1.00 1.00 6
+
+ accuracy 1.00 30
+ macro avg 1.00 1.00 1.00 30
+ weighted avg 1.00 1.00 1.00 30
+
+ Classification Report (y_pred_lr):
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 11
+ 1 1.00 1.00 1.00 13
+ 2 1.00 1.00 1.00 6
+
+ accuracy 1.00 30
+ macro avg 1.00 1.00 1.00 30
+ weighted avg 1.00 1.00 1.00 30
+
+ Confusion Matrix (y_pred_lr):
+ [[11 0 0]
+ [ 0 13 0]
+ [ 0 0 6]]
+
+
+## Regression Metrics
+
+
+
+```python
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+
+# True values (ground truth)
+y_true = [3, -0.5, 2]
+
+# Predicted values
+y_pred = [2.8, -0.3, 1.8]
+
+# Calculate Mean Absolute Error
+mae = mean_absolute_error(y_true, y_pred)
+print("Mean Absolute Error:", mae)
+
+# Calculate Mean Squared Error
+mse = mean_squared_error(y_true, y_pred)
+print("Mean Squared Error:", mse)
+
+# Calculate R² Score
+r2 = r2_score(y_true, y_pred)
+print("R² Score:", r2)
+```
+
+ Mean Absolute Error: 0.20000000000000004
+ Mean Squared Error: 0.040000000000000015
+ R² Score: 0.9815384615384616
+
+
+## Clustering Metrics
+
+
+
+```python
+from sklearn.metrics import adjusted_rand_score, homogeneity_score, v_measure_score
+
+# Adjusted Rand Index
+adjusted_rand_index = adjusted_rand_score(y_test, y_pred_kmeans)
+print("Adjusted Rand Index:", adjusted_rand_index)
+
+# Homogeneity Score
+homogeneity = homogeneity_score(y_test, y_pred_kmeans)
+print("Homogeneity Score:", homogeneity)
+
+# V-Measure Score
+v_measure = v_measure_score(y_test, y_pred_kmeans)
+print("V-Measure Score:", v_measure)
+```
+
+ Adjusted Rand Index: 0.7657144139494176
+ Homogeneity Score: 0.7553796021571243
+ V-Measure Score: 0.8005552543570766
+
+
+## Cross-Validation
+
+
+
+```python
+# Import necessary library
+from sklearn.model_selection import cross_val_score
+
+# Cross-validation with KNN estimator
+knn_scores = cross_val_score(knn, X_train, y_train, cv=4)
+print(knn_scores)
+
+# Cross-validation with Linear Regression estimator
+lr_scores = cross_val_score(lr, X, y, cv=2)
+print(lr_scores)
+```
+
+ [0.96666667 0.93333333 1. 0.93333333]
+ [0.96 0.96]
+
+
+## Grid Search
+
+
+```python
+# Import necessary library
+from sklearn.model_selection import GridSearchCV
+
+# Define parameter grid
+params = {
+ 'n_neighbors': np.arange(1, 3),
+ 'weights': ['uniform', 'distance']
+}
+
+# Create GridSearchCV object
+grid = GridSearchCV(estimator=knn, param_grid=params)
+
+# Fit the grid to the data
+grid.fit(X_train, y_train)
+
+# Print the best parameters found
+print("Best parameters:", grid.best_params_)
+
+# Print the best cross-validation score
+print("Best cross-validation score:", grid.best_score_)
+
+# Print the accuracy on the test set using the best parameters
+best_knn = grid.best_estimator_
+test_accuracy = best_knn.score(X_test, y_test)
+print("Test set accuracy:", test_accuracy)
+```
+
+ Best parameters: {'n_neighbors': 1, 'weights': 'uniform'}
+ Best cross-validation score: 0.9416666666666667
+ Test set accuracy: 1.0
+
diff --git a/jupyter2md.ipynb b/jupyter2md.ipynb
new file mode 100644
index 0000000..aa67df9
--- /dev/null
+++ b/jupyter2md.ipynb
@@ -0,0 +1,64 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Convertido: docs/examples\\dplyr\\dplyr.ipynb\n",
+ "Convertido: docs/examples\\forcats\\forcats.ipynb\n",
+ "Convertido: docs/examples\\ggplot2\\ggplot2.ipynb\n",
+ "Convertido: docs/examples\\matplotlib\\matplotlib.ipynb\n",
+ "Convertido: docs/examples\\numpy\\numpy.ipynb\n",
+ "Convertido: docs/examples\\pandas\\pandas.ipynb\n",
+ "Convertido: docs/examples\\polars\\polars.ipynb\n",
+ "Convertido: docs/examples\\python\\python.ipynb\n",
+ "Convertido: docs/examples\\scikit-learn\\sklearn.ipynb\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import glob\n",
+ "import subprocess\n",
+ "\n",
+ "base_path = \"docs/examples\"\n",
+ "notebook_files = glob.glob(f\"{base_path}/*/*.ipynb\")\n",
+ "\n",
+ "for notebook in notebook_files:\n",
+ " md_file = notebook.replace(\".ipynb\", \".md\")\n",
+ " command = [\"jupyter\", \"nbconvert\", \"--to\", \"markdown\", notebook]\n",
+ " try:\n",
+ " subprocess.run(command, check=True, capture_output=True, text=True)\n",
+ " print(f\"Convertido: {notebook}\")\n",
+ " except subprocess.CalledProcessError as e:\n",
+ " print(f\"Error al convertir {notebook}: {e.stderr}\")\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "docs-PxIqdmp3-py3.10",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pages/dplyr.py b/pages/dplyr.py
index 3e1f642..79d34d9 100644
--- a/pages/dplyr.py
+++ b/pages/dplyr.py
@@ -60,7 +60,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with dplyr examples.
"""
@@ -401,9 +401,71 @@ def cs_body():
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/dplyr/dplyr.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
diff --git a/pages/forcats.py b/pages/forcats.py
index aac20c7..7fbd291 100644
--- a/pages/forcats.py
+++ b/pages/forcats.py
@@ -56,7 +56,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with forcats examples.
"""
@@ -215,6 +215,69 @@ def cs_body():
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/forcats/forcats.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
diff --git a/pages/ggplot2.py b/pages/ggplot2.py
index a66f0f4..5354a61 100644
--- a/pages/ggplot2.py
+++ b/pages/ggplot2.py
@@ -57,7 +57,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with ggplot2 examples.
"""
@@ -272,6 +272,70 @@ def cs_body():
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/ggplot2/ggplot2.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
main()
diff --git a/pages/matplotlib.py b/pages/matplotlib.py
index a974132..d7719ef 100644
--- a/pages/matplotlib.py
+++ b/pages/matplotlib.py
@@ -93,7 +93,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with NumPy examples.
"""
@@ -390,9 +390,71 @@ def cs_body():
arrowprops={"arrowstyle": "->", "color": "C1"})
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/matplotlib/matplotlib.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
diff --git a/pages/numpy.py b/pages/numpy.py
index 19ab16d..1d0e7ed 100644
--- a/pages/numpy.py
+++ b/pages/numpy.py
@@ -78,7 +78,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with NumPy examples.
"""
@@ -387,6 +387,70 @@ def cs_body():
vsplit_array = np.vsplit(c, 2)
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/numpy/numpy.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
main()
diff --git a/pages/pandas.py b/pages/pandas.py
index 70c1a96..7ec5678 100644
--- a/pages/pandas.py
+++ b/pages/pandas.py
@@ -97,7 +97,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with Pandas examples.
"""
@@ -328,6 +328,71 @@ def cs_body():
result_mul = s.mul(s3, fill_value=3)
''')
+
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/pandas/pandas.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
main()
diff --git a/pages/polars.py b/pages/polars.py
index e93adb6..477c34e 100644
--- a/pages/polars.py
+++ b/pages/polars.py
@@ -99,7 +99,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with Polars examples.
"""
@@ -412,6 +412,70 @@ def cs_body():
)
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/polars/polars.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
diff --git a/pages/python.py b/pages/python.py
index 96acc2b..5c04dd6 100644
--- a/pages/python.py
+++ b/pages/python.py
@@ -3,6 +3,8 @@
import base64
import requests
+
+
# Initial page config
st.set_page_config(
page_title='Python Cheat Sheet',
@@ -14,6 +16,7 @@
st.markdown("# 📗 Python Cheat Sheet")
+
def main():
"""
Main function to set up the Streamlit app layout.
@@ -60,11 +63,10 @@ def cs_sidebar():
return None
-# Define the cs_body() function
-def cs_body():
- """
- Create content sections for the main body of the Streamlit cheat sheet with Python examples.
- """
+
+# Define Streamlit Body
+def st_body():
+
col1, col2, col3 = st.columns(3) # Create columns for layout
#######################################
@@ -322,6 +324,68 @@ def bark(self):
my_dog = Dog("Buddy", 3)
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/python/python.md"
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
# Run the main function if the script is executed directly
if __name__ == '__main__':
diff --git a/pages/scikit-learn.py b/pages/scikit-learn.py
index e22b2af..d327f18 100644
--- a/pages/scikit-learn.py
+++ b/pages/scikit-learn.py
@@ -98,7 +98,7 @@ def cs_sidebar():
# Define the cs_body() function
-def cs_body():
+def st_body():
"""
Create content sections for the main body of the Streamlit cheat sheet with Scikit-learn examples.
"""
@@ -414,8 +414,71 @@ def cs_body():
help(sklearn.cluster.KMeans)
''')
+def st_pdf():
+ # HTML para incrustar el iframe
+ iframe_html = """
+
+ """
+
+ # Usar st.components para mostrar el HTML
+ st.components.v1.html(iframe_html, height=500)
+
+
+
+def st_markdown():
+ # Ruta al archivo .md
+ md_file_path = "docs/examples/scikit-learn/sklearn.md"
+
+
+ # Leer el contenido del archivo .md
+ with open(md_file_path, "r", encoding="utf-8") as file:
+ md_content = file.read()
+
+ # Mostrar el contenido Markdown
+ st.markdown(md_content, unsafe_allow_html=True)
+
+# Define the cs_body() function
+def cs_body():
+ """
+ Create content sections for the main body of the Streamlit cheat sheet with Python examples.
+ """
+
+ tab1, tab2, tab3 = st.tabs(["🚀 streamlit", "📄 pdf", "💻 notebook"])
+
+ with tab1:
+ st_body()
+ with tab2:
+ st_pdf()
+ with tab3:
+ st_markdown()
+
+css = '''
+
+ '''
+
+st.markdown(css, unsafe_allow_html=True)
+
# Run the main function if the script is executed directly
if __name__ == '__main__':
main()
diff --git "a/\360\237\217\240Home.py" "b/\360\237\217\240Home.py"
index df7a677..377e69f 100644
--- "a/\360\237\217\240Home.py"
+++ "b/\360\237\217\240Home.py"
@@ -1,4 +1,7 @@
import streamlit as st
+import base64
+
+import streamlit.components.v1 as components
# Page configuration
st.set_page_config(
@@ -25,22 +28,19 @@
)
st.markdown(
"""
-Welcome to the ultimate repository of **Data Science Cheat Sheets**, carefully crafted for Python and R enthusiasts.
-Whether you're a beginner or an expert, these cheat sheets provide a **multi-dimensional learning experience** in three flexible formats:
-"""
-)
+Welcome to the **Ultimate Data Science Cheat Sheet Repository**, thoughtfully designed for Python and R enthusiasts.
+Whether you're just starting out or are an experienced professional, these cheat sheets offer a **comprehensive and flexible learning experience** in three convenient formats:
-# Cheat Sheet Formats
-st.markdown(
- """
-| **PDF** | **Streamlit** | **Google Colab** |
-|---------|---------------|------------------|
-| ![Open in PDF](https://img.shields.io/badge/Open%20in%20PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white) | ![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg) | ![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg) |
+- 🚀 **Streamlit**: Interactive and user-friendly.
+- 📄 **PDF**: Downloadable and easy to reference.
+- 💻 **Google Colab**: Ready-to-run for hands-on learning.
"""
)
+
+
# Section: Python
st.markdown("")
st.markdown("")
@@ -153,6 +153,7 @@
)
+
css = '''