Skip to content

Commit

Permalink
Fix non-determinist behaviour of join operator with bag array as key (#…
Browse files Browse the repository at this point in the history
…5189)



Signed-off-by: Rob Syme <rob.syme@gmail.com>
Signed-off-by: Paolo Di Tommaso <paolo.ditommaso@gmail.com>
Signed-off-by: Ben Sherman <bentshermann@gmail.com>
  • Loading branch information
robsyme authored Sep 4, 2024
1 parent d59f5fa commit e7dc0d6
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 42 deletions.
97 changes: 55 additions & 42 deletions modules/nextflow/src/main/groovy/nextflow/util/ArrayBag.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -63,53 +63,66 @@ class ArrayBag<E> implements Bag<E>, List<E>, KryoSerializable {
InvokerHelper.inspect(this)
}

// E getAt( int index ) {
// target.get(index)
// }
//
// void putAt( int index, E value ) {
// target.set(index, value)
// }
//
// @Override
// int hashCode() {
// int h = 0;
// Iterator<E> i = target.iterator();
// while (i.hasNext()) {
// E obj = i.next();
// if (obj != null)
// h += obj.hashCode();
// }
// return h;
// }
//
// @Override
// boolean equals(Object o) {
// if ( o.is(this) )
// return true;
//
// if (!(o instanceof ArrayBag))
// return false;
//
// Collection other = ((ArrayBag)o).target
// if (other.size() != target.size())
// return false;
//
// try {
// return target.containsAll(other);
// }
// catch (ClassCastException unused) {
// return false;
// }
// catch (NullPointerException unused) {
// return false;
// }
// }
@Override
int hashCode() {
int h = 0;
Iterator<E> i = target.iterator();
while (i.hasNext()) {
E obj = i.next();
if (obj != null)
h += obj.hashCode();
}
return h;
}

/**
* NOTE!!! this method implements an equality is NOT used when invoking `equals` method
* or using `==` operator from Groovy code. This because the Groovy runtime implements its
* own equality logic both for {@link Map} and {@link Collection} logic.
*
* See
* https://issues.apache.org/jira/browse/GROOVY-9003
*
* However this is still applied when equality is checked by Java compiled code e.g. Java SDK.
* For this reason is necessary to implement the expected equals (and hashCode) semantic by `join`
* (and other) operators.
*
* See issue
* https://github.com/nextflow-io/nextflow/issues/5187
*
* @return
* {@code true} is the content of the bag is equals to another bag irrespective the elements order,
* {@code false} otherwise
*/
@Override
boolean equals(Object o) {
if ( o.is(this) )
return true;

if (!(o instanceof ArrayBag))
return false;

Collection other = ((ArrayBag)o).target
if (other.size() != target.size())
return false;

try {
return target.containsAll(other);
}
catch (ClassCastException unused) {
return false;
}
catch (NullPointerException unused) {
return false;
}
}

@Override
void read (Kryo kryo, Input input) {
target = kryo.readObject(input,ArrayList)
}

@Override
void write (Kryo kryo, Output output) {
kryo.writeObject(output, target)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import nextflow.Channel
import nextflow.Global
import nextflow.Session
import nextflow.exception.AbortOperationException
import nextflow.util.ArrayBag
import spock.lang.Specification

/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
Expand Down Expand Up @@ -207,6 +209,35 @@ class JoinOpTest extends Specification {

}

def 'should be able to use identical ArrayBags join key' () {
given:
def key1 = new ArrayBag(["key"])
def key2 = new ArrayBag(["key"])
def ch1 = Channel.of([key1, "foo"])
def ch2 = Channel.of([key2, "bar"])

when:
def op = new JoinOp(ch1 as DataflowReadChannel, ch2 as DataflowReadChannel)
List result = op.apply().toList().getVal()

then:
!result.isEmpty()
}

def 'should differentiate nonidentical ArrayBags join key' () {
given:
def key1 = new ArrayBag(["key", "key", "quay"])
def key2 = new ArrayBag(["quay", "quay", "key"])
def ch1 = Channel.of([key1, "foo"])
def ch2 = Channel.of([key2, "bar"])

when:
def op = new JoinOp(ch1 as DataflowReadChannel, ch2 as DataflowReadChannel)
List result = op.apply().toList().getVal()

then:
result.isEmpty()
}

def 'should not fail on mismatches' () {
given:
Expand Down
31 changes: 31 additions & 0 deletions modules/nextflow/src/test/groovy/nextflow/util/ArrayBagTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,35 @@ class ArrayBagTest extends Specification {
String.valueOf(bag) == '[1, 2, 3]'
}

def 'hashCode should be invariant to order' () {
given:
def bag1 = new ArrayBag([1,2,3])
def bag2 = new ArrayBag([3,1,2])
def bag3 = new ArrayBag([4,1,2])

expect:
bag1.hashCode() == bag2.hashCode()
bag1.hashCode() != bag3.hashCode()

/**
* NOTE!!! equality cannot be checked due to groovy overriding the equals implementation
* see {@link ArrayBag#equals(java.lang.Object)}
*/
}

def 'should access map entry using bag as key' () {
given:
def bag1 = new ArrayBag([1,2,3])
def bag2 = new ArrayBag([3,1,2])
def bag3 = new ArrayBag([4,1,2])
and:
def map = [(bag1):'foo']

expect:
map.get(bag1) == 'foo'
map.get(bag2) == 'foo'
map.get(bag3) == null

}

}

0 comments on commit e7dc0d6

Please sign in to comment.