Skip to content

Commit

Permalink
touch-up changes mostly to clean up code contributed by others.
Browse files Browse the repository at this point in the history
  • Loading branch information
leerho committed Feb 25, 2025
1 parent 8dbbc8e commit 9e5b51c
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 10 deletions.
51 changes: 49 additions & 2 deletions src/main/java/org/apache/datasketches/theta/CompactSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,61 @@ else if (serVer == 2) {
"Corrupted: Serialization Version " + serVer + " not recognized.");
}

/**
* Wrap takes the sketch image in the given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method checks if the DEFAULT_UPDATE_SEED was used to create the source Memory image.
* Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.</p>
*
* @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
*
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final byte[] bytes) {
return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false);
}


/**
* Wrap takes the sketch image in the given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method checks if the given expectedSeed was used to create the source Memory image.
* Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
*
* @param bytes a byte array image of a Sketch that was created using the given expectedSeed.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) {
return wrap(bytes, expectedSeed, true);
}

private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) {
final int serVer = bytes[PreambleUtil.SER_VER_BYTE];
final int familyId = bytes[PreambleUtil.FAMILY_BYTE];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public int getCurrentBytes() {

private static final int START_PACKED_DATA_EXACT_MODE = 8;
private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;

@Override
public int getRetainedEntries(final boolean valid) { //compact is always valid
// number of entries is stored using variable length encoding
Expand Down Expand Up @@ -132,7 +132,7 @@ long[] getCache() {
final int numEntries = getRetainedEntries();
final long[] cache = new long[numEntries];
int i = 0;
HashIterator it = iterator();
final HashIterator it = iterator();
while (it.next()) {
cache[i++] = it.get();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -504,21 +504,21 @@ private void moveDataToTgt(final long[] arr, final int count) {
}

private void moveDataToTgt(final Sketch sketch) {
int count = sketch.getRetainedEntries();
final int count = sketch.getRetainedEntries();
int tmpCnt = 0;
if (wmem_ != null) { //Off Heap puts directly into mem
final int preBytes = CONST_PREAMBLE_LONGS << 3;
final int lgArrLongs = lgArrLongs_;
final long thetaLong = thetaLong_;
HashIterator it = sketch.iterator();
final HashIterator it = sketch.iterator();
while (it.next()) {
final long hash = it.get();
if (continueCondition(thetaLong, hash)) { continue; }
hashInsertOnlyMemory(wmem_, lgArrLongs, hash, preBytes);
tmpCnt++;
}
} else { //On Heap. Assumes HT exists and is large enough
HashIterator it = sketch.iterator();
final HashIterator it = sketch.iterator();
while (it.next()) {
final long hash = it.get();
if (continueCondition(thetaLong_, hash)) { continue; }
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/theta/Sketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ public String toString(final boolean sketchSummary, final boolean dataDetail, fi
final int w = width > 0 ? width : 8; // default is 8 wide
if (curCount > 0) {
sb.append("### SKETCH DATA DETAIL");
HashIterator it = iterator();
final HashIterator it = iterator();
int j = 0;
while (it.next()) {
final long h = it.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public byte[] toByteArray() {
long[] getCache() {
final long[] cache = new long[getRetainedEntries()];
int i = 0;
HashIterator it = iterator();
final HashIterator it = iterator();
while (it.next()) {
cache[i++] = it.get();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ public void checkHLL8Heap() {
} else {
sk = new HllSketch(lgK, tgtHllType);
}
String type = tgtHllType.toString();
String store = direct ? "Memory" : "Heap";
for (int i = 1; i <= N; i++) {
sk.update(i);
Expand Down

0 comments on commit 9e5b51c

Please sign in to comment.