From 8a4bad46540598c6acdf432bde08c2a4c76c5039 Mon Sep 17 00:00:00 2001 From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com> Date: Fri, 26 Jan 2024 09:21:38 +0300 Subject: [PATCH] Add new test (#8992) --- .../optimizer/src/optimize_projections.rs | 31 ++++++++++--------- datafusion/sqllogictest/test_files/select.slt | 20 ++++++++++++ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs index f87f5fdea99f..103599564252 100644 --- a/datafusion/optimizer/src/optimize_projections.rs +++ b/datafusion/optimizer/src/optimize_projections.rs @@ -218,6 +218,22 @@ fn optimize_projections( // Only use the absolutely necessary aggregate expressions required // by the parent: let mut new_aggr_expr = get_at_indices(&aggregate.aggr_expr, &aggregate_reqs); + + // Aggregations always need at least one aggregate expression. + // With a nested count, we don't require any column as input, but + // still need to create a correct aggregate, which may be optimized + // out later. As an example, consider the following query: + // + // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...]) + // + // which always returns 1. + if new_aggr_expr.is_empty() + && new_group_bys.is_empty() + && !aggregate.aggr_expr.is_empty() + { + new_aggr_expr = vec![aggregate.aggr_expr[0].clone()]; + } + let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter()); let schema = aggregate.input.schema(); let necessary_indices = indices_referred_by_exprs(schema, all_exprs_iter)?; @@ -238,21 +254,6 @@ fn optimize_projections( let (aggregate_input, _) = add_projection_on_top_if_helpful(aggregate_input, necessary_exprs)?; - // Aggregations always need at least one aggregate expression. - // With a nested count, we don't require any column as input, but - // still need to create a correct aggregate, which may be optimized - // out later. As an example, consider the following query: - // - // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...]) - // - // which always returns 1. - if new_aggr_expr.is_empty() - && new_group_bys.is_empty() - && !aggregate.aggr_expr.is_empty() - { - new_aggr_expr = vec![aggregate.aggr_expr[0].clone()]; - } - // Create a new aggregate plan with the updated input and only the // absolutely necessary fields: return Aggregate::try_new( diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 5216b14cb2d2..50c62eff7772 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1527,3 +1527,23 @@ SELECT to_timestamp('I AM NOT A TIMESTAMP'); query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type SELECT CAST('' AS int); + +# See issue: https://github.com/apache/arrow-datafusion/issues/8978 +statement ok +create table users (id int, name varchar); + +statement ok +insert into users values (1, 'Tom'); + +statement ok +create view v as select count(id) from users; + +query I +select * from v; +---- +1 + +query I +select count(1) from v; +---- +1