From 2945e977190e00b18a129788561dfb3685c2d757 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Wed, 5 Mar 2025 16:18:30 +0100 Subject: [PATCH] fix: set proto-to-rel type nullability handling --- .../main/java/io/substrait/relation/Set.java | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/io/substrait/relation/Set.java b/core/src/main/java/io/substrait/relation/Set.java index 4efef01e2..0e0960450 100644 --- a/core/src/main/java/io/substrait/relation/Set.java +++ b/core/src/main/java/io/substrait/relation/Set.java @@ -2,6 +2,10 @@ import io.substrait.proto.SetRel; import io.substrait.type.Type; +import io.substrait.type.TypeCreator; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; import org.immutables.value.Value; @Value.Immutable @@ -42,7 +46,38 @@ public static Set.SetOp fromProto(SetRel.SetOp proto) { @Override protected Type.Struct deriveRecordType() { - return getInputs().get(0).getRecordType(); + // The different inputs may have schemas that differ in nullability, but not in type. + // In that case we should return a schema that is nullable where any of the inputs is nullable. + Stream inputRecordTypes = getInputs().stream().map(Rel::getRecordType); + return inputRecordTypes + .reduce( + (a, b) -> { + if (a.equals(b)) { + return a; // Short-circuit trivial case + } + + if (a.fields().size() != b.fields().size()) { + throw new IllegalArgumentException( + "Set's input records have different number of fields"); + } + + List fields = new ArrayList<>(); + for (int i = 0; i < a.fields().size(); i++) { + // We intentionally don't validate that the types match, to maintain backwards + // compatibility. We should, but then we'll need to handle things like VARCHAR + // vs FIXEDCHAR (comes up in Isthmus tests). We also don't recurse into nullability + // of the inner fields, in case the type itself is a struct or list or map. + Type typeA = a.fields().get(i); + Type typeB = b.fields().get(i); + fields.add(!typeB.nullable() ? TypeCreator.asNullable(typeA) : typeA); + } + + return Type.Struct.builder() + .fields(fields) + .nullable(a.nullable() || b.nullable()) + .build(); + }) + .orElseThrow(() -> new IllegalStateException("A Set relation needs at least one input")); } @Override