From 4b6ab874e0a23a69ed99a803504920739534e9c3 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 25 Feb 2025 19:52:13 +0000 Subject: [PATCH] Updated documentation for DataFrame.projection_requires_validation field. --- datafusion/core/src/dataframe/mod.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 326022efc184..f4bf055eea8b 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -184,7 +184,21 @@ pub struct DataFrame { // Box the (large) SessionState to reduce the size of DataFrame on the stack session_state: Box, plan: LogicalPlan, - // whether we can skip validation for projection ops + // Whether projection ops can skip validation or not. This flag if false + // allows for an optimization in `with_column` and `with_column_renamed` functions + // where the recursive work required to columnize and normalize expressions can + // be skipped if set to false. Since these function calls are often chained or + // called many times in dataframe operations this can result in a significant + // performance gain. + // + // The conditions where this can be set to false is when the dataframe function + // call results in the last operation being a + // `LogicalPlanBuilder::from(plan).project(fields)?.build()` or + // `LogicalPlanBuilder::from(plan).project_with_validation(fields)?.build()` + // call. This requirement guarantees that the plan has had all columnization + // and normalization applied to existing expressions and only new expressions + // will require that work. Any operation that update the plan in any way + // via anything other than a `project` call should set this to true. projection_requires_validation: bool, }