Skip to content

Commit

Permalink
docs: Revise and improve 'Concepts' section (#19087)
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigogiraoserrao authored Oct 7, 2024
1 parent 1e28cc7 commit d7c93bb
Show file tree
Hide file tree
Showing 29 changed files with 963 additions and 519 deletions.
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ impl LazyFrame {

/// Return a String describing the logical plan.
///
/// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false,
/// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false`,
/// explains the naive, un-optimized plan.
pub fn explain(&self, optimized: bool) -> PolarsResult<String> {
if optimized {
Expand Down
1 change: 1 addition & 0 deletions docs/source/_build/API_REFERENCE_LINKS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ python:
is_duplicated: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.is_duplicated.html
sample: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.sample.html
head: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.head.html
glimpse: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.glimpse.html
tail: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.tail.html
describe: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.describe.html
col: https://docs.pola.rs/api/python/stable/reference/expressions/col.html
Expand Down
42 changes: 0 additions & 42 deletions docs/source/src/python/user-guide/concepts/data-structures.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# --8<-- [start:series]
import polars as pl

s = pl.Series("ints", [1, 2, 3, 4, 5])
print(s)
# --8<-- [end:series]

# --8<-- [start:series-dtype]
s1 = pl.Series("ints", [1, 2, 3, 4, 5])
s2 = pl.Series("uints", [1, 2, 3, 4, 5], dtype=pl.UInt64)
print(s1.dtype, s2.dtype)
# --8<-- [end:series-dtype]

# --8<-- [start:df]
from datetime import date

df = pl.DataFrame(
{
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
date(1997, 1, 10),
date(1985, 2, 15),
date(1983, 3, 22),
date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
}
)

print(df)
# --8<-- [end:df]

# --8<-- [start:schema]
print(df.schema)
# --8<-- [end:schema]

# --8<-- [start:head]
print(df.head(3))
# --8<-- [end:head]

# --8<-- [start:glimpse]
print(df.glimpse(return_as_string=True))
# --8<-- [end:glimpse]

# --8<-- [start:tail]
print(df.tail(3))
# --8<-- [end:tail]

# --8<-- [start:sample]
import random

random.seed(42) # For reproducibility.

print(df.sample(2))
# --8<-- [end:sample]

# --8<-- [start:describe]
print(df.describe())
# --8<-- [end:describe]
105 changes: 97 additions & 8 deletions docs/source/src/python/user-guide/concepts/expressions.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,105 @@
# --8<-- [start:expression]
import polars as pl

pl.col("weight") / (pl.col("height") ** 2)
# --8<-- [end:expression]

# --8<-- [start:print-expr]
bmi_expr = pl.col("weight") / (pl.col("height") ** 2)
print(bmi_expr)
# --8<-- [end:print-expr]

# --8<-- [start:df]
from datetime import date

df = pl.DataFrame(
{
"foo": [1, 2, 3, None, 5],
"bar": [1.5, 0.9, 2.0, 0.0, None],
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
date(1997, 1, 10),
date(1985, 2, 15),
date(1983, 3, 22),
date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
}
)

# --8<-- [start:example1]
pl.col("foo").sort().head(2)
# --8<-- [end:example1]
print(df)
# --8<-- [end:df]

# --8<-- [start:select-1]
result = df.select(
bmi=bmi_expr,
avg_bmi=bmi_expr.mean(),
ideal_max_bmi=25,
)
print(result)
# --8<-- [end:select-1]

# --8<-- [start:select-2]
result = df.select(deviation=(bmi_expr - bmi_expr.mean()) / bmi_expr.std())
print(result)
# --8<-- [end:select-2]

# --8<-- [start:with_columns-1]
result = df.with_columns(
bmi=bmi_expr,
avg_bmi=bmi_expr.mean(),
ideal_max_bmi=25,
)
print(result)
# --8<-- [end:with_columns-1]

# --8<-- [start:filter-1]
result = df.filter(
pl.col("birthdate").is_between(date(1982, 12, 31), date(1996, 1, 1)),
pl.col("height") > 1.7,
)
print(result)
# --8<-- [end:filter-1]

# --8<-- [start:group_by-1]
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
).agg(pl.col("name"))
print(result)
# --8<-- [end:group_by-1]

# --8<-- [start:group_by-2]
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
(pl.col("height") < 1.7).alias("short?"),
).agg(pl.col("name"))
print(result)
# --8<-- [end:group_by-2]

# --8<-- [start:group_by-3]
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
(pl.col("height") < 1.7).alias("short?"),
).agg(
pl.len(),
pl.col("height").max().alias("tallest"),
pl.col("weight", "height").mean().name.prefix("avg_"),
)
print(result)
# --8<-- [end:group_by-3]

# --8<-- [start:example2]
df.select(pl.col("foo").sort().head(2), pl.col("bar").filter(pl.col("foo") == 1).sum())
# --8<-- [end:example2]
# --8<-- [start:expression-expansion-1]
expr = (pl.col(pl.Float64) * 1.1).name.suffix("*1.1")
result = df.select(expr)
print(result)
# --8<-- [end:expression-expansion-1]

# --8<-- [start:expression-expansion-2]
df2 = pl.DataFrame(
{
"ints": [1, 2, 3, 4],
"letters": ["A", "B", "C", "D"],
}
)
result = df2.select(expr)
print(result)
# --8<-- [end:expression-expansion-2]
25 changes: 25 additions & 0 deletions docs/source/src/python/user-guide/concepts/lazy-vs-eager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# --8<-- [start:import]
import polars as pl

# --8<-- [end:import]

# --8<-- [start:eager]

df = pl.read_csv("docs/assets/data/iris.csv")
Expand All @@ -18,3 +21,25 @@

df = q.collect()
# --8<-- [end:lazy]

# --8<-- [start:explain]
print(q.explain())
# --8<-- [end:explain]

# --8<-- [start:explain-expression-expansion]
schema = pl.Schema(
{
"int_1": pl.Int16,
"int_2": pl.Int32,
"float_1": pl.Float64,
"float_2": pl.Float64,
"float_3": pl.Float64,
}
)

print(
pl.LazyFrame(schema=schema)
.select((pl.col(pl.Float64) * 1.1).name.suffix("*1.1"))
.explain()
)
# --8<-- [end:explain-expression-expansion]
6 changes: 3 additions & 3 deletions docs/source/src/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ path = "user-guide/getting-started.rs"
required-features = ["polars/lazy", "polars/temporal", "polars/round_series", "polars/strings"]

[[bin]]
name = "user-guide-concepts-data-structures"
path = "user-guide/concepts/data-structures.rs"
name = "user-guide-concepts-data-types-and-structures"
path = "user-guide/concepts/data-types-and-structures.rs"

[[bin]]
name = "user-guide-concepts-contexts"
Expand All @@ -41,7 +41,7 @@ required-features = ["polars/lazy"]
[[bin]]
name = "user-guide-concepts-expressions"
path = "user-guide/concepts/expressions.rs"
required-features = ["polars/lazy"]
required-features = ["polars/lazy", "polars/temporal", "polars/is_between"]
[[bin]]
name = "user-guide-concepts-lazy-vs-eager"
path = "user-guide/concepts/lazy-vs-eager.rs"
Expand Down
51 changes: 0 additions & 51 deletions docs/source/src/rust/user-guide/concepts/data-structures.rs

This file was deleted.

Loading

0 comments on commit d7c93bb

Please sign in to comment.