diff --git a/convex-core/src/main/java/convex/core/data/AHashSet.java b/convex-core/src/main/java/convex/core/data/AHashSet.java index c3e3756d9..51a5a73bb 100644 --- a/convex-core/src/main/java/convex/core/data/AHashSet.java +++ b/convex-core/src/main/java/convex/core/data/AHashSet.java @@ -81,12 +81,12 @@ public ASet disjAll(ACollection b) { AHashSet result=this; long n=b.count(); for (long i=0; i excludeRef(Ref valueRef); + public abstract AHashSet excludeHash(Hash hash); public abstract AHashSet includeRef(Ref ref) ; @@ -98,7 +98,7 @@ public AHashSet conj(ACell a) { @Override public ASet exclude(ACell a) { - return excludeRef(Ref.get(a)); + return excludeHash(Cells.getHash(a)); } @Override @@ -142,8 +142,6 @@ public T getByHash(Hash hash) { if (ref==null) return null; return ref.getValue(); } - - protected abstract AHashSet includeRef(Ref e, int i); /** * Tests if this Set contains a given hash diff --git a/convex-core/src/main/java/convex/core/data/ASet.java b/convex-core/src/main/java/convex/core/data/ASet.java index f96df155b..8c51f5cd8 100644 --- a/convex-core/src/main/java/convex/core/data/ASet.java +++ b/convex-core/src/main/java/convex/core/data/ASet.java @@ -51,6 +51,12 @@ public final byte getTag() { */ public abstract ASet exclude(ACell a) ; + /** + * Gets the Hash of teh first element in this set + * @return + */ + protected abstract Hash getFirstHash(); + /** * Updates the set to include all the given elements. * Can be used to implement union of sets diff --git a/convex-core/src/main/java/convex/core/data/Format.java b/convex-core/src/main/java/convex/core/data/Format.java index 74169bf2f..278cf4144 100644 --- a/convex-core/src/main/java/convex/core/data/Format.java +++ b/convex-core/src/main/java/convex/core/data/Format.java @@ -598,7 +598,7 @@ private static T read(byte tag, Blob blob, int offset) throws } catch (BadFormatException e) { throw e; } catch (IndexOutOfBoundsException e) { - throw new BadFormatException("Read out of bounds when decoding with tag 0x"+Utils.toHexString(tag)); + throw new BadFormatException("Read out of bounds when decoding with tag 0x"+Utils.toHexString(tag),e); } catch (MissingDataException e) { throw e; } catch (Exception e) { diff --git a/convex-core/src/main/java/convex/core/data/MapLeaf.java b/convex-core/src/main/java/convex/core/data/MapLeaf.java index e1bec1e3d..6577c3557 100644 --- a/convex-core/src/main/java/convex/core/data/MapLeaf.java +++ b/convex-core/src/main/java/convex/core/data/MapLeaf.java @@ -752,6 +752,7 @@ public MapLeaf slice(long start, long end) { @Override protected Hash getFirstHash() { + if (count==0) return null; return entries[0].getKeyHash(); } diff --git a/convex-core/src/main/java/convex/core/data/MapTree.java b/convex-core/src/main/java/convex/core/data/MapTree.java index fb176e20a..e31a963d2 100644 --- a/convex-core/src/main/java/convex/core/data/MapTree.java +++ b/convex-core/src/main/java/convex/core/data/MapTree.java @@ -91,7 +91,7 @@ static MapTree create(int shift, AHashM int digit=child.getFirstHash().getHexDigit(shift); mask|=(1<(rs,shift,mask,count); diff --git a/convex-core/src/main/java/convex/core/data/Maps.java b/convex-core/src/main/java/convex/core/data/Maps.java index c2fc8c6c7..b9d46e573 100644 --- a/convex-core/src/main/java/convex/core/data/Maps.java +++ b/convex-core/src/main/java/convex/core/data/Maps.java @@ -154,6 +154,10 @@ public static AHashMap read(Blob b, int public static int MAX_ENCODING_SIZE = Math.max(MapTree.MAX_ENCODING_LENGTH, MapLeaf.MAX_ENCODING_LENGTH); + public static Hash getFirstHash(AHashMap map) { + return map.getFirstHash(); + } + diff --git a/convex-core/src/main/java/convex/core/data/SetLeaf.java b/convex-core/src/main/java/convex/core/data/SetLeaf.java index c0e8fb9ce..dbc90e518 100644 --- a/convex-core/src/main/java/convex/core/data/SetLeaf.java +++ b/convex-core/src/main/java/convex/core/data/SetLeaf.java @@ -179,8 +179,8 @@ public SetLeaf exclude(ACell key) { } @Override - public SetLeaf excludeRef(Ref key) { - int i = seekKeyRef(key.getHash()); + public SetLeaf excludeHash(Hash hash) { + int i = seekKeyRef(hash); if (i < 0) return this; // not found return excludeAt(i); } @@ -544,12 +544,7 @@ protected boolean containsAll(SetLeaf b) { } @Override - public AHashSet includeRef(Ref ref) { - return includeRef(ref,0); - } - - @Override - protected AHashSet includeRef(Ref e, int shift) { + public AHashSet includeRef(Ref e) { int n=elements.length; Hash h=e.getHash(); int pos=0; @@ -573,7 +568,7 @@ protected AHashSet includeRef(Ref e, int shift) { } else { // Maximum size exceeded, so need to expand to tree. // Shift required since this might not be the tree root! - return SetTree.create(newEntries, shift); + return SetTree.create(newEntries); } } @@ -593,7 +588,7 @@ public T get(long index) { @Override public AHashSet toCanonical() { if (count<=MAX_ELEMENTS) return this; - return SetTree.create(elements, 0); + return SetTree.create(elements); } @SuppressWarnings("unchecked") @@ -609,6 +604,12 @@ public ASet slice(long start, long end) { return new SetLeaf(nrefs); } + @Override + protected Hash getFirstHash() { + if (count==0) return null; + return elements[0].getHash(); + } + diff --git a/convex-core/src/main/java/convex/core/data/SetTree.java b/convex-core/src/main/java/convex/core/data/SetTree.java index d3d3d1ef9..737f620aa 100644 --- a/convex-core/src/main/java/convex/core/data/SetTree.java +++ b/convex-core/src/main/java/convex/core/data/SetTree.java @@ -1,5 +1,8 @@ package convex.core.data; +import java.util.Arrays; +import java.util.Comparator; + import convex.core.exceptions.BadFormatException; import convex.core.exceptions.InvalidDataException; import convex.core.exceptions.Panic; @@ -72,13 +75,14 @@ private static long computeCount(Ref>[] children) * @return New SetTree node */ @SuppressWarnings("unchecked") - public static SetTree create(Ref[] elementRefs, int shift) { + public static SetTree create(Ref[] elementRefs) { int n = elementRefs.length; if (n <= SetLeaf.MAX_ELEMENTS) { throw new IllegalArgumentException( "Insufficient distinct entries for TreeMap construction: " + elementRefs.length); } + int shift=computeShift(elementRefs); // construct full child array Ref>[] children = new Ref[16]; for (int i = 0; i < n; i++) { @@ -88,12 +92,72 @@ public static SetTree create(Ref[] elementRefs, int shif if (ref == null) { children[ix] = SetLeaf.create(e).getRef(); } else { - AHashSet newChild=ref.getValue().includeRef(e,shift+1); + AHashSet newChild=ref.getValue().includeRef(e); children[ix] = newChild.getRef(); } } return (SetTree) createFull(children, shift); } + + /** + * Create MapTree with specific children at specified shift level + * Children must branch at the given shift level + */ + @SafeVarargs + static SetTree create(int shift, AHashSet ... children) { + int n=children.length; + Arrays.sort(children,shiftComparator(shift)); + @SuppressWarnings("unchecked") + Ref>[] rs=new Ref[n]; + long count=0; + short mask=0; + for (int i=0; i child=children[i]; + rs[i]=Ref.get(child); + count+=child.count; + int digit=child.getFirstHash().getHexDigit(shift); + mask|=(1<(rs,shift,mask,count); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + static Comparator[] COMPARATORS=new Comparator[64]; + + @SuppressWarnings("rawtypes") + private static Comparator shiftComparator(int shift) { + if (COMPARATORS[shift]==null) { + COMPARATORS[shift]=new Comparator() { + @Override + public int compare(AHashSet o1, AHashSet o2) { + int d1= o1.getFirstHash().getHexDigit(shift); + int d2= o2.getFirstHash().getHexDigit(shift); + return d1-d2; + } + }; + }; + return COMPARATORS[shift]; + } + + + /** + * Computes the common shift for a vector of entries. + * This is the shift at which the first split occurs, i.e length of common prefix + * @param es Entries + * @return + */ + protected static int computeShift(Ref[] es) { + int shift=63; // max possible + Hash h=es[0].getHash(); + int n=es.length; + for (int i=1; i getRefByHash(Hash hash) { return children[i].getValue().getRefByHash(hash); } - @SuppressWarnings("unchecked") @Override public AHashSet exclude(ACell key) { - return excludeRef((Ref) Ref.get(key)); + return excludeHash(Cells.getHash(key)); } @Override - public AHashSet excludeRef(Ref keyRef) { - int digit = keyRef.getHash().getHexDigit(shift); + public AHashSet excludeHash(Hash hash) { + int digit =hash.getHexDigit(shift); int i = Bits.indexForDigit(digit, mask); if (i < 0) return this; // not present // dissoc entry from child AHashSet child = children[i].getValue(); - AHashSet newChild = child.excludeRef(keyRef); + AHashSet newChild = child.excludeHash(hash); if (child == newChild) return this; // no removal, no change AHashSet result=(newChild.isEmpty())?dissocChild(i):replaceChild(i, newChild.getRef()); @@ -242,6 +305,10 @@ public AHashSet toCanonical() { @SuppressWarnings("unchecked") private AHashSet dissocChild(int i) { int bsize = children.length; + if (bsize==2) { + // can just return the remaining child + return children[1-i].getValue(); + } AHashSet child = children[i].getValue(); Ref>[] newBlocks = (Ref>[]) new Ref[bsize - 1]; System.arraycopy(children, 0, newBlocks, 0, i); @@ -297,14 +364,21 @@ public static int digitForIndex(int index, short mask) { @Override public SetTree include(ACell value) { Ref keyRef = (Ref) Ref.get(value); - return includeRef(keyRef, shift); + return includeRef(keyRef); } + @Override - protected SetTree includeRef(Ref e, int shift) { - if (this.shift != shift) { - throw new Error("Invalid shift!"); + public SetTree includeRef(Ref e) { + Hash kh= e.getHash(); + int cshift= kh.commonHexPrefixLength(getFirstHash(), Hash.HEX_LENGTH); + + if (cshift newLeaf=SetLeaf.create(e); + return create(cshift,newLeaf,this); } + Ref keyRef = e; int digit = keyRef.getHash().getHexDigit(shift); int i = Bits.indexForDigit(digit, mask); @@ -315,16 +389,11 @@ protected SetTree includeRef(Ref e, int shift) { } else { // location needs update AHashSet child = children[i].getValue(); - AHashSet newChild = child.includeRef(e, shift + 1); + AHashSet newChild = child.includeRef(e); if (child == newChild) return this; return (SetTree) replaceChild(i, newChild.getRef()); } } - - @Override - public AHashSet includeRef(Ref ref) { - return includeRef(ref,shift); - } @Override public int encode(byte[] bs, int pos) { @@ -514,7 +583,7 @@ private AHashSet mergeWith(SetLeaf b, int setOp, int shift) { if (newE != null) { // include only new keys where function result is not null. Re-use existing // entry if possible. - result = result.includeRef(newE, shift); + result = result.includeRef(newE); } } return result; @@ -575,7 +644,7 @@ protected void validateWithPrefix(Hash base, int digit, int position) throws Inv Hash firstHash; try { - firstHash=getElementRef(0).getHash(); + firstHash=getFirstHash(); } catch (ClassCastException e) { throw new InvalidDataException("Bad child type:" +e.getMessage(), this); } @@ -600,9 +669,8 @@ protected void validateWithPrefix(Hash base, int digit, int position) throws Inv if (child instanceof SetTree) { SetTree childTree=(SetTree) child; - int expectedShift=shift+1; - if (childTree.shift!=expectedShift) { - throw new InvalidDataException("Wrong child shift ["+childTree.shift+"], expected ["+expectedShift+"]",this); + if (childTree.shift<=shift) { + throw new InvalidDataException("Wrong child shift ["+childTree.shift+"], expected greater than ["+shift+"]",this); } } @@ -624,7 +692,10 @@ protected void validateWithPrefix(Hash base, int digit, int position) throws Inv private boolean isValidStructure() { if (count <= SetLeaf.MAX_ELEMENTS) return false; - if (children.length != Integer.bitCount(mask & 0xFFFF)) return false; + int n=children.length; + if (n<2) return false; + + if (n != Integer.bitCount(mask & 0xFFFF)) return false; for (int i = 0; i < children.length; i++) { Ref> child = children[i]; if (child == null) return false; @@ -731,4 +802,13 @@ public ASet slice(long start, long end) { return result; } + // Cache of first hash, we don't want to descend tree repeatedly to find this + private Hash firstHash; + + @Override + protected Hash getFirstHash() { + if (firstHash==null) firstHash=children[0].getValue().getFirstHash(); + return firstHash; + } + } diff --git a/convex-core/src/main/java/convex/core/data/Sets.java b/convex-core/src/main/java/convex/core/data/Sets.java index f0bee5913..dbc26b903 100644 --- a/convex-core/src/main/java/convex/core/data/Sets.java +++ b/convex-core/src/main/java/convex/core/data/Sets.java @@ -108,7 +108,7 @@ public static ASet read(Blob b, int pos) throws BadFormatEx public static AHashSet createWithShift(int shift, ArrayList> values) { AHashSet result=Sets.empty(); for (Ref v: values) { - result=result.includeRef(v, shift); + result=result.includeRef(v); } return result; } diff --git a/convex-core/src/main/java/convex/core/data/impl/KeySet.java b/convex-core/src/main/java/convex/core/data/impl/KeySet.java index bcab6c59f..8aec9b648 100644 --- a/convex-core/src/main/java/convex/core/data/impl/KeySet.java +++ b/convex-core/src/main/java/convex/core/data/impl/KeySet.java @@ -7,6 +7,7 @@ import convex.core.data.ASet; import convex.core.data.Hash; import convex.core.data.MapEntry; +import convex.core.data.Maps; import convex.core.data.Ref; import convex.core.exceptions.InvalidDataException; import convex.core.exceptions.TODOException; @@ -156,4 +157,10 @@ public int getRefCount() { throw new UnsupportedOperationException(); } + @Override + protected Hash getFirstHash() { + // TODO Auto-generated method stub + return Maps.getFirstHash(map); + } + }