From 73223f13b467c6d54cdfa501e9889d61921b57ee Mon Sep 17 00:00:00 2001
From: ritchie <ritchie46@gmail.com>
Date: Thu, 6 Mar 2025 11:58:46 +0100
Subject: [PATCH] docs(python): Document `read_().lazy()` antipattern

---
 py-polars/polars/io/csv/functions.py     | 10 ++++++----
 py-polars/polars/io/ipc/functions.py     |  8 ++++++++
 py-polars/polars/io/ndjson.py            | 11 +++++++++++
 py-polars/polars/io/parquet/functions.py | 13 ++++++++++++-
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py
index 162c1080f73c..b274eb70a541 100644
--- a/py-polars/polars/io/csv/functions.py
+++ b/py-polars/polars/io/csv/functions.py
@@ -228,6 +228,12 @@ def read_csv(
     --------
     scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
 
+    Warnings
+    --------
+    Calling `read_csv().lazy()` is an antipattern as this forces Polars to materialize
+    a full csv file and therefore cannot push any optimizations into the reader.
+    Therefore always prefer ``scan_csv`` if you want to work with ``LazyFrame``s.
+
     Notes
     -----
     If the schema is inferred incorrectly (e.g. as `pl.Int64` instead of `pl.Float64`),
@@ -235,10 +241,6 @@ def read_csv(
     `infer_schema_length` or override the inferred dtype for those columns with
     `schema_overrides`.
 
-    This operation defaults to a `rechunk` operation at the end, meaning that all data
-    will be stored continuously in memory. Set `rechunk=False` if you are benchmarking
-    the csv-reader. A `rechunk` is an expensive operation.
-
     Examples
     --------
     >>> pl.read_csv("data.csv", separator="|")  # doctest: +SKIP
diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py
index c933728810c9..515150062c23 100644
--- a/py-polars/polars/io/ipc/functions.py
+++ b/py-polars/polars/io/ipc/functions.py
@@ -94,8 +94,16 @@ def read_ipc(
     -------
     DataFrame
 
+    See Also
+    --------
+    scan_ipc : Lazily read from an IPC file or multiple files via glob patterns.
+
     Warnings
     --------
+    Calling `read_ipc().lazy()` is an antipattern as this forces Polars to materialize
+    a full csv file and therefore cannot push any optimizations into the reader.
+    Therefore always prefer ``scan_ipc`` if you want to work with ``LazyFrame``s.
+
     If `memory_map` is set, the bytes on disk are mapped 1:1 to memory.
     That means that you cannot write to the same filename.
     E.g. `pl.read_ipc("my_file.arrow").write_ipc("my_file.arrow")` will fail.
diff --git a/py-polars/polars/io/ndjson.py b/py-polars/polars/io/ndjson.py
index a99e4eb4e1f5..39095240ec14 100644
--- a/py-polars/polars/io/ndjson.py
+++ b/py-polars/polars/io/ndjson.py
@@ -117,6 +117,17 @@ def read_ndjson(
     include_file_paths
         Include the path of the source file(s) as a column with this name.
 
+    See Also
+    --------
+    scan_ndjson : Lazily read from an NDJSON file or multiple files via glob patterns.
+
+    Warnings
+    --------
+    Calling `read_ndjson().lazy()` is an antipattern as this forces Polars to
+    materialize a full ndjson file and therefore cannot push any optimizations into
+    the reader. Therefore always prefer ``scan_ndjson`` if you want to work with
+    ``LazyFrame``s.
+
     Examples
     --------
     >>> from io import StringIO
diff --git a/py-polars/polars/io/parquet/functions.py b/py-polars/polars/io/parquet/functions.py
index e0a0e6ccb951..831f11d12d13 100644
--- a/py-polars/polars/io/parquet/functions.py
+++ b/py-polars/polars/io/parquet/functions.py
@@ -175,7 +175,18 @@ def read_parquet(
 
     See Also
     --------
-    scan_parquet
+    scan_parquet : Lazily read from a Parquet file or multiple files via glob patterns.
+
+    Warnings
+    --------
+    Calling `read_parquet().lazy()` is an antipattern as this forces Polars to
+    materialize a full parquet file and therefore cannot push any optimizations
+    into the reader. Therefore always prefer ``scan_parquet`` if you want to work
+    with ``LazyFrame``s.
+
+    See Also
+    --------
+    scan_parquet : Lazily read from a parquet file or multiple files via glob patterns.
     scan_pyarrow_dataset
     """
     if schema is not None: