From 049a91ec75d6ddfe2e3f98b7174a2e5cb38c9ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodrigo=20Gir=C3=A3o=20Serr=C3=A3o?= <5621605+rodrigogiraoserrao@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:04:27 +0100 Subject: [PATCH] Simplify snippet hierarchy. Since there is now a coherent story throughout the whole 'getting started' page it no longer makes sense to have three different files with examples for each language. --- .../expressions.py => getting-started.py} | 0 .../user-guide/getting-started/joins.py | 29 -------- .../getting-started/reading-writing.py | 23 ------ docs/source/src/rust/Cargo.toml | 12 +--- .../expressions.rs => getting-started.rs} | 0 .../rust/user-guide/getting-started/joins.rs | 29 -------- .../getting-started/reading-writing.rs | 35 --------- docs/source/user-guide/getting-started.md | 72 +++++++++---------- 8 files changed, 39 insertions(+), 161 deletions(-) rename docs/source/src/python/user-guide/{getting-started/expressions.py => getting-started.py} (100%) delete mode 100644 docs/source/src/python/user-guide/getting-started/joins.py delete mode 100644 docs/source/src/python/user-guide/getting-started/reading-writing.py rename docs/source/src/rust/user-guide/{getting-started/expressions.rs => getting-started.rs} (100%) delete mode 100644 docs/source/src/rust/user-guide/getting-started/joins.rs delete mode 100644 docs/source/src/rust/user-guide/getting-started/reading-writing.rs diff --git a/docs/source/src/python/user-guide/getting-started/expressions.py b/docs/source/src/python/user-guide/getting-started.py similarity index 100% rename from docs/source/src/python/user-guide/getting-started/expressions.py rename to docs/source/src/python/user-guide/getting-started.py diff --git a/docs/source/src/python/user-guide/getting-started/joins.py b/docs/source/src/python/user-guide/getting-started/joins.py deleted file mode 100644 index fd7dcc19eb4a..000000000000 --- a/docs/source/src/python/user-guide/getting-started/joins.py +++ /dev/null @@ -1,29 +0,0 @@ -# --8<-- [start:setup] -import numpy as np -import polars as pl - -# --8<-- [end:setup] - -# --8<-- [start:join] -df = pl.DataFrame( - { - "a": range(8), - "b": np.random.rand(8), - "d": [1.0, 2.0, float("nan"), float("nan"), 0.0, -5.0, -42.0, None], - } -) - -df2 = pl.DataFrame( - { - "x": range(8), - "y": ["A", "A", "A", "B", "B", "C", "X", "X"], - } -) -joined = df.join(df2, left_on="a", right_on="x") -print(joined) -# --8<-- [end:join] - -# --8<-- [start:hstack] -stacked = df.hstack(df2) -print(stacked) -# --8<-- [end:hstack] diff --git a/docs/source/src/python/user-guide/getting-started/reading-writing.py b/docs/source/src/python/user-guide/getting-started/reading-writing.py deleted file mode 100644 index e0c250f3f9de..000000000000 --- a/docs/source/src/python/user-guide/getting-started/reading-writing.py +++ /dev/null @@ -1,23 +0,0 @@ -# --8<-- [start:setup] -import polars as pl -from datetime import datetime - -df = pl.DataFrame( - { - "date": [ - datetime(2025, 1, 1), - datetime(2025, 1, 2), - datetime(2025, 1, 3), - ], - "float": [4.0, 5.0, 6.0], - } -) - -print(df) -# --8<-- [end:setup] - -# --8<-- [start:csv] -df.write_csv("docs/assets/data/output.csv") -df_csv = pl.read_csv("docs/assets/data/output.csv") -print(df_csv) -# --8<-- [end:csv] diff --git a/docs/source/src/rust/Cargo.toml b/docs/source/src/rust/Cargo.toml index 50647bb70dd4..061c60d02948 100644 --- a/docs/source/src/rust/Cargo.toml +++ b/docs/source/src/rust/Cargo.toml @@ -26,16 +26,10 @@ path = "home/example.rs" required-features = ["polars/lazy", "polars/csv"] [[bin]] -name = "user-guide-getting-started-expressions" -path = "user-guide/getting-started/expressions.rs" +name = "user-guide-getting-started" +path = "user-guide/getting-started.rs" required-features = ["polars/lazy", "polars/temporal", "polars/round_series", "polars/strings"] -[[bin]] -name = "user-guide-getting-started-joins" -path = "user-guide/getting-started/joins.rs" -required-features = ["polars/polars-ops"] -[[bin]] -name = "user-guide-getting-started-reading-writing" -path = "user-guide/getting-started/reading-writing.rs" + [[bin]] name = "user-guide-concepts-data-structures" path = "user-guide/concepts/data-structures.rs" diff --git a/docs/source/src/rust/user-guide/getting-started/expressions.rs b/docs/source/src/rust/user-guide/getting-started.rs similarity index 100% rename from docs/source/src/rust/user-guide/getting-started/expressions.rs rename to docs/source/src/rust/user-guide/getting-started.rs diff --git a/docs/source/src/rust/user-guide/getting-started/joins.rs b/docs/source/src/rust/user-guide/getting-started/joins.rs deleted file mode 100644 index a5f36c73f342..000000000000 --- a/docs/source/src/rust/user-guide/getting-started/joins.rs +++ /dev/null @@ -1,29 +0,0 @@ -use polars::prelude::*; - -fn main() -> Result<(), Box> { - // --8<-- [start:join] - use rand::Rng; - let mut rng = rand::thread_rng(); - - let df: DataFrame = df!( - "a" => 0..8, - "b"=> (0..8).map(|_| rng.gen::()).collect::>(), - "d"=> [Some(1.0), Some(2.0), Some(f64::NAN), Some(f64::NAN), Some(0.0), Some(-5.0), Some(-42.), None] - ) - .unwrap(); - let df2: DataFrame = df!( - "x" => 0..8, - "y"=> &["A", "A", "A", "B", "B", "C", "X", "X"], - ) - .unwrap(); - let joined = df.join(&df2, ["a"], ["x"], JoinType::Left.into())?; - println!("{}", joined); - // --8<-- [end:join] - - // --8<-- [start:hstack] - let stacked = df.hstack(df2.get_columns())?; - println!("{}", stacked); - // --8<-- [end:hstack] - - Ok(()) -} diff --git a/docs/source/src/rust/user-guide/getting-started/reading-writing.rs b/docs/source/src/rust/user-guide/getting-started/reading-writing.rs deleted file mode 100644 index 31b66c4153e1..000000000000 --- a/docs/source/src/rust/user-guide/getting-started/reading-writing.rs +++ /dev/null @@ -1,35 +0,0 @@ -fn main() -> Result<(), Box> { - // --8<-- [start:setup] - use std::fs::File; - - use chrono::prelude::*; - use polars::prelude::*; - - let mut df: DataFrame = df!( - "date" => &[ - NaiveDate::from_ymd_opt(2025, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), - NaiveDate::from_ymd_opt(2025, 1, 2).unwrap().and_hms_opt(0, 0, 0).unwrap(), - NaiveDate::from_ymd_opt(2025, 1, 3).unwrap().and_hms_opt(0, 0, 0).unwrap(), - ], - "float" => &[4.0, 5.0, 6.0], - ) - .unwrap(); - println!("{}", df); - // --8<-- [end:setup] - - // --8<-- [start:csv] - let mut file = File::create("docs/assets/data/output.csv").expect("could not create file"); - CsvWriter::new(&mut file) - .include_header(true) - .with_separator(b',') - .finish(&mut df)?; - let df_csv = CsvReadOptions::default() - .with_infer_schema_length(None) - .with_has_header(true) - .try_into_reader_with_file_path(Some("docs/assets/data/output.csv".into()))? - .finish()?; - println!("{}", df_csv); - // --8<-- [end:csv] - - Ok(()) -} diff --git a/docs/source/user-guide/getting-started.md b/docs/source/user-guide/getting-started.md index 080431d41168..69aa8a02d8da 100644 --- a/docs/source/user-guide/getting-started.md +++ b/docs/source/user-guide/getting-started.md @@ -24,18 +24,18 @@ This chapter is here to help you get started with Polars. It covers all the fund Polars supports reading and writing for common file formats (e.g. csv, json, parquet), cloud storage (S3, Azure Blob, BigQuery) and databases (e.g. postgres, mysql). Below, we create a small dataframe and show how to write it to disk and read it back. -{{code_block('user-guide/getting-started/expressions','df',['DataFrame'])}} +{{code_block('user-guide/getting-started','df',['DataFrame'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:df" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:df" ``` In the example below we write the dataframe to a csv file called `output.csv`. After that, we read it back using `read_csv` and then print the result for inspection. -{{code_block('user-guide/getting-started/expressions','csv',['read_csv','write_csv'])}} +{{code_block('user-guide/getting-started','csv',['read_csv','write_csv'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:csv" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:csv" ``` For more examples on the CSV file format and other data formats, see the [IO section](io/index.md) of the user guide. @@ -67,20 +67,20 @@ For a more detailed exploration of contexts and expressions see the respective u The context `select` allows you to select and manipulate columns from a dataframe. In the simplest case, each expression you provide will map to a column in the result dataframe: -{{code_block('user-guide/getting-started/expressions','select',['select','alias','Expr.dt'])}} +{{code_block('user-guide/getting-started','select',['select','alias','Expr.dt'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:select" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:select" ``` Polars also supports a feature called “expression expansion”, in which one expression acts as shorthand for multiple expressions. In the example below, we use expression expansion to manipulate the columns “weight” and “height” with a single expression. When using expression expansion you can use `.name.suffix` to add a suffix to the names of the original columns: -{{code_block('user-guide/getting-started/expressions','expression-expansion',['select','alias','Expr.name'])}} +{{code_block('user-guide/getting-started','expression-expansion',['select','alias','Expr.name'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:expression-expansion" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:expression-expansion" ``` You can check other sections of the user guide to learn more about [basic operations](expressions/operators.md) or [column selections](expressions/column-selections.md). @@ -90,10 +90,10 @@ You can check other sections of the user guide to learn more about [basic operat The context `with_columns` is very similar to the context `select` but `with_columns` adds columns to the dataframe instead of selecting them. Notice how the resulting dataframe contains the four columns of the original dataframe plus the two new columns introduced by the expressions inside `with_columns`: -{{code_block('user-guide/getting-started/expressions','with_columns',['with_columns'])}} +{{code_block('user-guide/getting-started','with_columns',['with_columns'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:with_columns" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:with_columns" ``` In the example above we also decided to use named expressions instead of the method `alias` to specify the names of the new columns. @@ -103,18 +103,18 @@ Other contexts like `select` and `group_by` also accept named expressions. The context `filter` allows us to create a second dataframe with a subset of the rows of the original one: -{{code_block('user-guide/getting-started/expressions','filter',['filter','Expr.dt'])}} +{{code_block('user-guide/getting-started','filter',['filter','Expr.dt'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:filter" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:filter" ``` You can also provide multiple predicate expressions as separate parameters, which is more convenient than putting them all together with `&`: -{{code_block('user-guide/getting-started/expressions','filter-multiple',['filter','is_between'])}} +{{code_block('user-guide/getting-started','filter-multiple',['filter','is_between'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:filter-multiple" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:filter-multiple" ``` ### `group_by` @@ -122,10 +122,10 @@ You can also provide multiple predicate expressions as separate parameters, whic The context `group_by` can be used to group together the rows of the dataframe that share the same value across one or more expressions. The example below counts how many people were born in each decade: -{{code_block('user-guide/getting-started/expressions','group_by',['group_by','alias','Expr.dt'])}} +{{code_block('user-guide/getting-started','group_by',['group_by','alias','Expr.dt'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:group_by" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:group_by" ``` The keyword argument `maintain_order` forces Polars to present the resulting groups in the same order as they appear in the original dataframe. @@ -133,10 +133,10 @@ This slows down the grouping operation but is used here to ensure reproducibilit After using the context `group_by` we can use `agg` to compute aggregations over the resulting groups: -{{code_block('user-guide/getting-started/expressions','group_by-agg',['group_by','agg'])}} +{{code_block('user-guide/getting-started','group_by-agg',['group_by','agg'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:group_by-agg" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:group_by-agg" ``` ### More complex queries @@ -144,10 +144,10 @@ After using the context `group_by` we can use `agg` to compute aggregations over Contexts and the expressions within can be chained to create more complex queries according to your needs. In the example below we combine some of the contexts we have seen so far to create a more complex query: -{{code_block('user-guide/getting-started/expressions','complex',['group_by','agg','select','with_columns','Expr.str','Expr.list'])}} +{{code_block('user-guide/getting-started','complex',['group_by','agg','select','with_columns','Expr.str','Expr.list'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:complex" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:complex" ``` ## Combining dataframes @@ -160,10 +160,10 @@ In this section, we show an example of a join and an example of a concatenation. Polars provides many different join algorithms. The example below shows how to use a left outer join to combine two dataframes when a column can be used as a unique identifier to establish a correspondence between rows across the dataframes: -{{code_block('user-guide/getting-started/expressions','join',['join'])}} +{{code_block('user-guide/getting-started','join',['join'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:join" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:join" ``` Polars provides many different join algorithms that you can learn about in the [joins section of the user guide](transformations/joins.md). @@ -173,10 +173,10 @@ Polars provides many different join algorithms that you can learn about in the [ Concatenating dataframes creates a taller or wider dataframe, depending on the method used. Assuming we have a second dataframe with data from other people, we could use vertical concatenation to create a taller dataframe: -{{code_block('user-guide/getting-started/expressions','concat',['concat'])}} +{{code_block('user-guide/getting-started','concat',['concat'])}} -```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/getting-started/expressions.py:concat" +```python exec="on" result="text" session="getting-started" +--8<-- "python/user-guide/getting-started.py:concat" ``` Polars provides vertical and horizontal concatenation, as well as diagonal concatenation.