Skip to content

Commit

Permalink
Added flag to indicate whether it is safe to project without validati…
Browse files Browse the repository at this point in the history
…on or not.
  • Loading branch information
Omega359 committed Feb 17, 2025
1 parent 953e722 commit 85cc464
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
38 changes: 29 additions & 9 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ pub struct DataFrame {
// Box the (large) SessionState to reduce the size of DataFrame on the stack
session_state: Box<SessionState>,
plan: LogicalPlan,
// whether we can skip validation for projection ops
projection_requires_validation: bool,
}

impl DataFrame {
Expand All @@ -195,6 +197,7 @@ impl DataFrame {
Self {
session_state: Box::new(session_state),
plan,
projection_requires_validation: true,
}
}

Expand Down Expand Up @@ -332,6 +335,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan: project_plan,
projection_requires_validation: false,
})
}

Expand Down Expand Up @@ -437,6 +441,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -477,6 +482,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -547,6 +553,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: !is_grouping_set,
})
}

Expand All @@ -559,6 +566,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -597,6 +605,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}

Expand Down Expand Up @@ -634,6 +643,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -672,6 +682,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -703,6 +714,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -744,6 +756,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -944,6 +957,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}

Expand Down Expand Up @@ -993,6 +1007,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}

Expand Down Expand Up @@ -1060,6 +1075,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -1119,6 +1135,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -1154,6 +1171,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -1425,6 +1443,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}

Expand Down Expand Up @@ -1477,6 +1496,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -1512,6 +1532,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: true,
})
}

Expand Down Expand Up @@ -1557,6 +1578,7 @@ impl DataFrame {
DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
}
.collect()
.await
Expand Down Expand Up @@ -1626,6 +1648,7 @@ impl DataFrame {
DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
}
.collect()
.await
Expand Down Expand Up @@ -1695,6 +1718,7 @@ impl DataFrame {
DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
}
.collect()
.await
Expand Down Expand Up @@ -1728,14 +1752,6 @@ impl DataFrame {

let mut col_exists = false;
let new_column = expr.alias(name);
// if the existing plan is a projection we can skip validation
// this is only really true as long as only the dataframe api
// is used. If the logical plan is built first then .with_column
// is called that wouldn't be a valid assumption
let existing_requires_validation = match plan {
LogicalPlan::Projection(_) => false,
_ => true,
};
let mut fields: Vec<(Expr, bool)> = plan
.schema()
.iter()
Expand All @@ -1749,7 +1765,7 @@ impl DataFrame {
.as_ref()
.filter(|s| *s == &e.to_string())
.is_none()
.then_some((e, existing_requires_validation))
.then_some((e, self.projection_requires_validation))
}
})
.collect();
Expand All @@ -1765,6 +1781,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan: project_plan,
projection_requires_validation: false,
})
}

Expand Down Expand Up @@ -1837,6 +1854,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan: project_plan,
projection_requires_validation: false,
})
}

Expand Down Expand Up @@ -1902,6 +1920,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}

Expand Down Expand Up @@ -1937,6 +1956,7 @@ impl DataFrame {
Ok(DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
})
}
}
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/dataframe/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ impl DataFrame {
DataFrame {
session_state: self.session_state,
plan,
projection_requires_validation: self.projection_requires_validation,
}
.collect()
.await
Expand Down

0 comments on commit 85cc464

Please sign in to comment.