Skip to content

Commit

Permalink
Revert "Test + workaround for SanityCheck plan"
Browse files Browse the repository at this point in the history
This reverts commit 3819810.
  • Loading branch information
wiedld committed Feb 3, 2025
1 parent 6762c73 commit 7b9263f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 37 deletions.
10 changes: 0 additions & 10 deletions datafusion/physical-optimizer/src/sanity_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion_physical_plan::joins::SymmetricHashJoinExec;
use datafusion_physical_plan::sorts::sort::SortExec;
use datafusion_physical_plan::union::UnionExec;
use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};

use crate::PhysicalOptimizerRule;
Expand Down Expand Up @@ -137,14 +135,6 @@ pub fn check_plan_sanity(
plan.required_input_ordering(),
plan.required_input_distribution(),
) {
// TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
if child.as_any().downcast_ref::<UnionExec>().is_some() {
continue;
}
if child.as_any().downcast_ref::<SortExec>().is_some() {
continue;
}

let child_eq_props = child.equivalence_properties();
if let Some(sort_req) = sort_req {
if !child_eq_props.ordering_satisfy_requirement(&sort_req) {
Expand Down
76 changes: 49 additions & 27 deletions datafusion/sqllogictest/test_files/union.slt
Original file line number Diff line number Diff line change
Expand Up @@ -538,9 +538,6 @@ physical_plan
# Clean up after the test
########

statement ok
drop table t

statement ok
drop table t1;

Expand Down Expand Up @@ -781,36 +778,61 @@ select make_array(make_array(1)) x UNION ALL SELECT make_array(arrow_cast(make_a
[[-1]]
[[1]]

###
# Test for https://github.com/apache/datafusion/issues/11492
###

# Input data is
# a,b,c
# 1,2,3

statement ok
CREATE EXTERNAL TABLE t (
a INT,
b INT,
c INT
CREATE EXTERNAL TABLE aggregate_test_100 (
c1 VARCHAR NOT NULL,
c2 TINYINT NOT NULL,
c3 SMALLINT NOT NULL,
c4 SMALLINT,
c5 INT,
c6 BIGINT NOT NULL,
c7 SMALLINT NOT NULL,
c8 INT NOT NULL,
c9 BIGINT UNSIGNED NOT NULL,
c10 VARCHAR NOT NULL,
c11 FLOAT NOT NULL,
c12 DOUBLE NOT NULL,
c13 VARCHAR NOT NULL
)
STORED AS CSV
LOCATION '../core/tests/data/example.csv'
WITH ORDER (a ASC)
LOCATION '../../testing/data/csv/aggregate_test_100.csv'
OPTIONS ('format.has_header' 'true');

query T
SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a;
----
1
bar
statement ok
set datafusion.execution.batch_size = 2;

query I
SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a;
# Constant value tracking across union
query TT
explain
SELECT * FROM(
(
SELECT * FROM aggregate_test_100 WHERE c1='a'
)
UNION ALL
(
SELECT * FROM aggregate_test_100 WHERE c1='a'
))
ORDER BY c1
----
1
NULL
logical_plan
01)Sort: aggregate_test_100.c1 ASC NULLS LAST
02)--Union
03)----Filter: aggregate_test_100.c1 = Utf8("a")
04)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
05)----Filter: aggregate_test_100.c1 = Utf8("a")
06)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
physical_plan
01)CoalescePartitionsExec
02)--UnionExec
03)----CoalesceBatchesExec: target_batch_size=2
04)------FilterExec: c1@0 = a
05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
07)----CoalesceBatchesExec: target_batch_size=2
08)------FilterExec: c1@0 = a
09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true

# Clean up after the test
statement ok
drop table t
drop table aggregate_test_100;

0 comments on commit 7b9263f

Please sign in to comment.