diff --git a/pom.xml b/pom.xml index 37bf7fe..e2e231c 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ org.scijava pom-scijava - 38.0.1 + 40.0.0 @@ -111,10 +111,14 @@ sign,deploy-to-scijava - 3.3.0 - 4.2.1 - 4.1.1 - 1.3.5 + 3.3.2-SNAPSHOT + 4.2.2-SNAPSHOT + 1.1.2-SNAPSHOT + 2.2.0 + 7.0.3-SNAPSHOT + 4.2.6-SNAPSHOT + 4.1.2-SNAPSHOT + 1.4.1-SNAPSHOT 1.0.0-preview.20191208 1.4.1 @@ -148,6 +152,10 @@ org.janelia.saalfeldlab n5-zarr + + org.apache.commons + commons-compress + net.thisptr jackson-jq @@ -161,6 +169,7 @@ compile + org.janelia.saalfeldlab n5 @@ -212,8 +221,6 @@ ${commons-io.version} test - - io.findify s3mock_2.12 @@ -226,6 +233,16 @@ ${jaxb-api.version} test + + org.openjdk.jmh + jmh-core + test + + + org.openjdk.jmh + jmh-generator-annprocess + test + net.imagej diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 8873ca1..0a188a7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -52,25 +52,26 @@ import org.janelia.saalfeldlab.n5.universe.metadata.N5ViewerMultiscaleMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.canonical.CanonicalMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMetadataParser; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v05.OmeNgffV05MetadataParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import se.sawano.java.text.AlphanumericComparator; /** - * This class aids in detecting and parsing datsets in an N5 container. + * This class aids in detecting and parsing datasets in an N5 container. *

- * An N5DatasetDiscoverer specifies the types of {@link N5MetadataParser}s - * to attempt, and an {@link ExecutorService} that enables parsing in parallel. - * The parsers are passed to the constructor in a list. - * Group parsers are called after all others are called, and should - * be used when a parsers result depends on its children. + * An N5DatasetDiscoverer specifies the types of {@link N5MetadataParser}s to + * attempt, and an {@link ExecutorService} that enables parsing in parallel. The + * parsers are passed to the constructor in a list. Group parsers are called + * after all others are called, and should be used when a parsers result depends + * on its children. *

* The {@link discoverAndParseRecursive} method returns a {@link N5TreeNode} - * containing all child nodes, each of which contains pased metadata. - * For each group/dataset, the parsers will be called in order, - * and will return the first non-empty result. As such - * parsers should be ordered from most- to least-strict. + * containing all child nodes, each of which contains parsed metadata. For each + * group/dataset, the parsers will be called in order, and will return the first + * non-empty result. As such parsers should be ordered from most- to + * least-strict. * * @author Caleb Hulbert * @author John Bogovic @@ -78,489 +79,546 @@ */ public class N5DatasetDiscoverer { - private static final Logger LOG = LoggerFactory.getLogger(N5DatasetDiscoverer.class); - - - public static final N5MetadataParser[] DEFAULT_PARSERS = new N5MetadataParser[] { - new N5CosemMetadataParser(), - new N5SingleScaleMetadataParser(), - new CanonicalMetadataParser(), - new N5GenericSingleScaleMetadataParser() - }; - - public static final N5MetadataParser[] DEFAULT_GROUP_PARSERS = new N5MetadataParser[] { - new OmeNgffMetadataParser(), - new N5CosemMultiScaleMetadata.CosemMultiScaleParser(), - new N5ViewerMultiscaleMetadataParser(), - new CanonicalMetadataParser(), - }; - - private final List> metadataParsers; - private final List> groupParsers; - - private final Comparator comparator; - - private final Predicate filter; - - private final ExecutorService executor; - - private N5TreeNode root; - - private String groupSeparator; - - private N5Reader n5; - - /** - * Creates an N5 discoverer with alphanumeric sorting order of groups/datasets (such as, s9 goes before s10). - * - * @param executor the executor - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer(final ExecutorService executor, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - /** - * Creates an N5 discoverer. - * - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final List> metadataParsers, - final List> groupParsers) { - - this(Executors.newSingleThreadExecutor(), - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - /** - * Creates an N5 discoverer. - * - * @param n5 n5 reader - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer(final N5Reader n5, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - Executors.newSingleThreadExecutor(), - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final ExecutorService executor, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - filter, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - filter, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final ExecutorService executor, - final Optional> comparator, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, comparator, null, metadataParsers, groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Optional> comparator, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, executor, comparator, null, metadataParsers, groupParsers); - } - - /** - * Creates an N5 discoverer. - *

- * If the optional parameter {@code comparator} is specified, the groups and datasets - * will be listed in the order determined by this comparator. - * - * @param executor the executor - * @param comparator optional string comparator - * @param filter the dataset filter - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final ExecutorService executor, - final Optional> comparator, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this.executor = executor; - this.comparator = comparator.orElseGet(null); - this.filter = filter; - this.metadataParsers = metadataParsers; - this.groupParsers = groupParsers; - } - - /** - * Creates an N5 discoverer. - *

- * If the optional parameter {@code comparator} is specified, the groups and datasets - * will be listed in the order determined by this comparator. - * - * @param n5 the n5 reader - * @param executor the executor - * @param comparator optional string comparator - * @param filter the dataset filter - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Optional> comparator, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this.n5 = n5; - this.executor = executor; - this.comparator = comparator.orElseGet(null); - this.filter = filter; - this.metadataParsers = metadataParsers; - this.groupParsers = groupParsers; - } - - public static void parseMetadata(final N5Reader n5, final N5TreeNode node, - final List> metadataParsers) throws IOException { - - parseMetadata(n5, node, metadataParsers, new ArrayList<>()); - } - - /** - * Parses metadata for a node using the given parsers, stopping after the first success. - * - * @param n5 the N5Reader - * @param node the tree node - * @param metadataParsers list of metadata parsers - * @param groupParsers list of group parsers - * @throws IOException the exception - */ - public static void parseMetadata(final N5Reader n5, final N5TreeNode node, - final List> metadataParsers, - final List> groupParsers) throws IOException { - - // Go through all parsers to populate metadata - for (final N5MetadataParser parser : metadataParsers) { - try { - Optional parsedMeta; - parsedMeta = parser.apply(n5, node); - - parsedMeta.ifPresent(node::setMetadata); - if (parsedMeta.isPresent()) - break; - } catch (final Exception ignored) { - } - } - - // this may be a group (e.g. multiscale pyramid) try to parse groups - if ((node.getMetadata() == null) && !node.childrenList().isEmpty() && groupParsers != null) { - for (final N5MetadataParser gp : groupParsers) { - final Optional groupMeta = gp.apply(n5, node); - groupMeta.ifPresent(node::setMetadata); - if (groupMeta.isPresent()) - break; - } - } - } - - public static boolean trim(final N5TreeNode node ) { - return trim( node, x -> {}); - } - - /** - * Removes branches of the N5 container tree that do not contain any nodes that can be opened - * (nodes with metadata). - * - * @param node the node - * @param callback the callback function - * @return {@code true} if the branch contains a node that can be opened, {@code false} otherwise - */ - public static boolean trim(final N5TreeNode node, final Consumer callback ) { - - final List children = node.childrenList(); - if (children.isEmpty()) { - return node.getMetadata() != null; - } - - boolean ret = false; - for (final Iterator it = children.iterator(); it.hasNext(); ) { - final N5TreeNode childNode = it.next(); - if (!trim(childNode, callback)) { - it.remove(); - callback.accept(childNode); - } else { - ret = true; - } - } - - return ret || node.getMetadata() != null; - } - - public static void sort(final N5TreeNode node, final Comparator comparator, - final Consumer callback) { - - final List children = node.childrenList(); - children.sort(Comparator.comparing(N5TreeNode::toString, comparator)); - - if( callback != null ) { - callback.accept( node ); - } - - for (final N5TreeNode childNode : node.childrenList()) { - sort(childNode, comparator, callback ); - } - } - - public void sort(final N5TreeNode node, final Consumer callback) { - if (comparator != null) { - sort(node, comparator, callback); - } - } - - public void sort(final N5TreeNode node) { - if (comparator != null) - sort(node, comparator, null); - } - - /** - * Recursively discovers and parses metadata for datasets that are children - * of the given base path using {@link N5Reader#deepList}. Returns an {@link N5TreeNode} - * that can be displayed as a JTree. - * - * @param base the base path - * @return the n5 tree node - * @throws IOException the io exception - */ - public N5TreeNode discoverAndParseRecursive(final String base ) throws IOException { - - return discoverAndParseRecursive(base, x -> {}); - } + private static final Logger LOG = LoggerFactory.getLogger(N5DatasetDiscoverer.class); + + public static final N5MetadataParser[] DEFAULT_PARSERS = new N5MetadataParser[]{ + new N5CosemMetadataParser(), + new N5SingleScaleMetadataParser(), + new CanonicalMetadataParser(), + new N5GenericSingleScaleMetadataParser() + }; + + public static final N5MetadataParser[] DEFAULT_GROUP_PARSERS = new N5MetadataParser[]{ + new OmeNgffV05MetadataParser(), + new OmeNgffMetadataParser(), + new N5CosemMultiScaleMetadata.CosemMultiScaleParser(), + new N5ViewerMultiscaleMetadataParser(), + new CanonicalMetadataParser(), + }; + + private final List> metadataParsers; + private final List> groupParsers; + + private final Comparator comparator; + + private final Predicate filter; + + private final ExecutorService executor; + + private N5TreeNode root; + + private String groupSeparator; + + private N5Reader n5; + + /** + * Creates an N5 discoverer with alphanumeric sorting order of + * groups/datasets (such as, s9 goes before s10). + * + * @param executor + * the executor + * @param metadataParsers + * metadata parsers + * @param groupParsers + * group parsers + */ + public N5DatasetDiscoverer(final ExecutorService executor, + final List> metadataParsers, + final List> groupParsers) { + + this(executor, + Optional.of(new AlphanumericComparator(Collator.getInstance())), + null, + metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer( + final N5Reader n5, + final ExecutorService executor, + final List> metadataParsers, + final List> groupParsers) { + + this(n5, + executor, + Optional.of(new AlphanumericComparator(Collator.getInstance())), + null, + metadataParsers, + groupParsers); + } + + /** + * Creates an N5 discoverer. + * + * @param metadataParsers + * metadata parsers + * @param groupParsers + * group parsers + */ + public N5DatasetDiscoverer( + final List> metadataParsers, + final List> groupParsers) { + + this(Executors.newSingleThreadExecutor(), + Optional.of(new AlphanumericComparator(Collator.getInstance())), + null, + metadataParsers, + groupParsers); + } + + /** + * Creates an N5 discoverer. + * + * @param n5 + * n5 reader + * @param metadataParsers + * metadata parsers + * @param groupParsers + * group parsers + */ + public N5DatasetDiscoverer(final N5Reader n5, + final List> metadataParsers, + final List> groupParsers) { + + this(n5, + Executors.newSingleThreadExecutor(), + Optional.of(new AlphanumericComparator(Collator.getInstance())), + null, + metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer( + final ExecutorService executor, + final Predicate filter, + final List> metadataParsers, + final List> groupParsers) { + + this(executor, + Optional.of(new AlphanumericComparator(Collator.getInstance())), + filter, + metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer( + final N5Reader n5, + final ExecutorService executor, + final Predicate filter, + final List> metadataParsers, + final List> groupParsers) { + + this(n5, + executor, + Optional.of(new AlphanumericComparator(Collator.getInstance())), + filter, + metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer( + final ExecutorService executor, + final Optional> comparator, + final List> metadataParsers, + final List> groupParsers) { + + this(executor, comparator, null, metadataParsers, groupParsers); + } + + public N5DatasetDiscoverer( + final N5Reader n5, + final ExecutorService executor, + final Optional> comparator, + final List> metadataParsers, + final List> groupParsers) { + + this(n5, executor, comparator, null, metadataParsers, groupParsers); + } + + /** + * Creates an N5 discoverer. + *

+ * If the optional parameter {@code comparator} is specified, the groups and + * datasets will be listed in the order determined by this comparator. + * + * @param executor + * the executor + * @param comparator + * optional string comparator + * @param filter + * the dataset filter + * @param metadataParsers + * metadata parsers + * @param groupParsers + * group parsers + */ + public N5DatasetDiscoverer( + final ExecutorService executor, + final Optional> comparator, + final Predicate filter, + final List> metadataParsers, + final List> groupParsers) { + + this.executor = executor; + this.comparator = comparator.orElseGet(null); + this.filter = filter; + this.metadataParsers = metadataParsers; + this.groupParsers = groupParsers; + } + + /** + * Creates an N5 discoverer. + *

+ * If the optional parameter {@code comparator} is specified, the groups and + * datasets will be listed in the order determined by this comparator. + * + * @param n5 + * the n5 reader + * @param executor + * the executor + * @param comparator + * optional string comparator + * @param filter + * the dataset filter + * @param metadataParsers + * metadata parsers + * @param groupParsers + * group parsers + */ + public N5DatasetDiscoverer( + final N5Reader n5, + final ExecutorService executor, + final Optional> comparator, + final Predicate filter, + final List> metadataParsers, + final List> groupParsers) { + + this.n5 = n5; + this.executor = executor; + this.comparator = comparator.orElseGet(null); + this.filter = filter; + this.metadataParsers = metadataParsers; + this.groupParsers = groupParsers; + } + + public static void parseMetadata(final N5Reader n5, final N5TreeNode node, + final List> metadataParsers) throws IOException { + + parseMetadata(n5, node, metadataParsers, new ArrayList<>()); + } + + /** + * Parses metadata for a node using the given parsers, stopping after the + * first success. + * + * @param n5 + * the N5Reader + * @param node + * the tree node + * @param metadataParsers + * list of metadata parsers + * @param groupParsers + * list of group parsers + * @throws IOException + * the exception + */ + public static void parseMetadata(final N5Reader n5, final N5TreeNode node, + final List> metadataParsers, + final List> groupParsers) throws IOException { + + // Go through all parsers to populate metadata + for (final N5MetadataParser parser : metadataParsers) { + try { + Optional parsedMeta; + parsedMeta = parser.apply(n5, node); + + parsedMeta.ifPresent(node::setMetadata); + if (parsedMeta.isPresent()) + break; + } catch (final Exception ignored) {} + } + + // this may be a group (e.g. multiscale pyramid) try to parse groups + if ((node.getMetadata() == null) && !node.childrenList().isEmpty() && groupParsers != null) { + for (final N5MetadataParser gp : groupParsers) { + try { + final Optional groupMeta = gp.apply(n5, node); + groupMeta.ifPresent(node::setMetadata); + if (groupMeta.isPresent()) + break; + } catch(Exception ignored ) {} + } + } + } + + public static boolean trim(final N5TreeNode node) { + + return trim(node, x -> {}); + } + + /** + * Removes branches of the N5 container tree that do not contain any nodes + * that can be opened (nodes with metadata). + * + * @param node + * the node + * @param callback + * the callback function + * @return {@code true} if the branch contains a node that can be opened, + * {@code false} otherwise + */ + public static boolean trim(final N5TreeNode node, final Consumer callback) { + + final List children = node.childrenList(); + if (children.isEmpty()) { + return node.getMetadata() != null; + } + + boolean ret = false; + for (final Iterator it = children.iterator(); it.hasNext();) { + final N5TreeNode childNode = it.next(); + if (!trim(childNode, callback)) { + it.remove(); + callback.accept(childNode); + } else { + ret = true; + } + } + + return ret || node.getMetadata() != null; + } + + public static void sort(final N5TreeNode node, final Comparator comparator, + final Consumer callback) { + + final List children = node.childrenList(); + children.sort(Comparator.comparing(N5TreeNode::toString, comparator)); + + if (callback != null) { + callback.accept(node); + } + + for (final N5TreeNode childNode : node.childrenList()) { + sort(childNode, comparator, callback); + } + } + + public void sort(final N5TreeNode node, final Consumer callback) { + + if (comparator != null) { + sort(node, comparator, callback); + } + } + + public void sort(final N5TreeNode node) { + + if (comparator != null) + sort(node, comparator, null); + } + + /** + * Recursively discovers and parses metadata for datasets that are children + * of the given base path using {@link N5Reader#deepList}. Returns an + * {@link N5TreeNode} that can be displayed as a JTree. + * + * @param base + * the base path + * @return the n5 tree node + * @throws IOException + * the io exception + */ + public N5TreeNode discoverAndParseRecursive(final String base) throws IOException { + + return discoverAndParseRecursive(base, x -> {}); + } - public N5TreeNode discoverAndParseRecursive(final String base, final Consumer callback ) throws IOException { + public N5TreeNode discoverAndParseRecursive(final String base, final Consumer callback) throws IOException { groupSeparator = n5.getGroupSeparator(); root = new N5TreeNode(base); - discoverAndParseRecursive(root, callback ); + discoverAndParseRecursive(root, callback); return root; } - public N5TreeNode discoverAndParseRecursive(final N5TreeNode root ) throws IOException { - return discoverAndParseRecursive( root, x -> {}) ; - } + public N5TreeNode discoverAndParseRecursive(final N5TreeNode root) throws IOException { - public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consumer callback ) throws IOException { + return discoverAndParseRecursive(root, x -> {}); + } + + public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consumer callback) throws IOException { + + groupSeparator = n5.getGroupSeparator(); - groupSeparator = n5.getGroupSeparator(); + String[] datasetPaths; + try { + datasetPaths = n5.deepList(root.getPath(), executor); + N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); + } catch (final Exception e) { + return null; + } + callback.accept(root); - String[] datasetPaths; - try { - datasetPaths = n5.deepList(root.getPath(), executor); - N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); - } catch (final Exception e) { - return null; + parseMetadataRecursive(root, callback); + sortAndTrimRecursive(root, callback); + + return root; } - callback.accept(root); - parseMetadataRecursive(root,callback); - sortAndTrimRecursive(root,callback); + public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Predicate filter, + final Consumer callback) throws IOException { - return root; - } + groupSeparator = n5.getGroupSeparator(); - /** - * Returns the name of the dataset, removing the full path - * and leading groupSeparator. - * - * @param fullPath - * @return dataset name - */ - private String normalDatasetName(final String fullPath) { + String[] datasetPaths; + try { + datasetPaths = n5.deepList(root.getPath(), filter, executor); + N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); + } catch (final Exception e) { + return null; + } + callback.accept(root); - return fullPath.replaceAll("(^" + groupSeparator + "*)|(" + groupSeparator + "*$)", ""); - } + parseMetadataRecursive(root, callback); + sortAndTrimRecursive(root, callback); - public N5TreeNode parse(final String dataset) { + return root; + } - final N5TreeNode node = new N5TreeNode(dataset); - return parse(node); - } + public N5TreeNode parse(final String dataset) { - public N5TreeNode parse(final N5TreeNode node) { - // Go through all parsers to populate metadata - for (final N5MetadataParser parser : metadataParsers) { - try { - final Optional metadata = parser.apply(n5, node); - if (metadata.isPresent()) { - node.setMetadata(metadata.get()); - break; - } - } catch (final Exception e) { - } - } - return node; - } - - public void sortAndTrimRecursive(final N5TreeNode node) { - sortAndTrimRecursive( node, x -> { }); - } - - public void sortAndTrimRecursive(final N5TreeNode node, final Consumer callback ) { - trim(node, callback); - - if (comparator != null) - sort(node, callback); - - for (final N5TreeNode c : node.childrenList()) - sortAndTrimRecursive(c, callback); - } - - public void filterRecursive(final N5TreeNode node) { - - if (filter == null) - return; - - if (!filter.test(node)) - node.setMetadata(null); - - for (final N5TreeNode c : node.childrenList()) - filterRecursive(c); - } - - /** - * Parses metadata for the given node and all children in parallel using this object's executor. - * - * @param rootNode the root node - */ - public void parseMetadataRecursive(final N5TreeNode rootNode) { - parseMetadataRecursive( rootNode, x -> {}); - } - - /** - * Parses metadata for the given node and all children in parallel using this object's executor. - * The given function is called for every node after parsing is completed, successful or not. - * - * @param rootNode the root node - * @param callback the callback function - */ - public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback) { - /* depth first, check if we have children */ - final List children = rootNode.childrenList(); - final ArrayList> childrenFutures = new ArrayList>(); - if (!children.isEmpty()) { - /* If possible, parallelize the metadata parsing. */ - if (executor instanceof ThreadPoolExecutor) { - final ThreadPoolExecutor threadPoolExec = (ThreadPoolExecutor)this.executor; - for (final N5TreeNode child : children) { - final boolean useExec; - synchronized (executor) { - /* Since the parents wait for the children to finish, if there aren't enough threads to parse all the children (DFS), - * this could lock up. So we check if there are any extra threads; if not, execute if current thread. */ - useExec = (threadPoolExec.getActiveCount() < threadPoolExec.getMaximumPoolSize() - 1); - } - if (useExec) { - childrenFutures.add(this.executor.submit(() -> parseMetadataRecursive(child, callback))); - } else { - parseMetadataRecursive(child,callback); - } - } - } else { - for (final N5TreeNode child : children) { - parseMetadataRecursive(child,callback); + final N5TreeNode node = new N5TreeNode(dataset); + return parse(node); + } + + public N5TreeNode parse(final N5TreeNode node) { + + // Go through all parsers to populate metadata + for (final N5MetadataParser parser : metadataParsers) { + try { + final Optional metadata = parser.apply(n5, node); + if (metadata.isPresent()) { + node.setMetadata(metadata.get()); + break; + } + } catch (final Exception e) {} } - } + return node; } - for (final Future childrenFuture : childrenFutures) { - try { - childrenFuture.get(); - } catch (InterruptedException | ExecutionException e) { - LOG.error("Error encountered during metadata parsing", e); - throw new RuntimeException(e); - } + public void sortAndTrimRecursive(final N5TreeNode node) { + + sortAndTrimRecursive(node, x -> {}); } - try { - N5DatasetDiscoverer.parseMetadata(n5, rootNode, metadataParsers, groupParsers); - } catch (final Exception e) { + public void sortAndTrimRecursive(final N5TreeNode node, final Consumer callback) { + + trim(node, callback); + + if (comparator != null) + sort(node, callback); + + for (final N5TreeNode c : node.childrenList()) + sortAndTrimRecursive(c, callback); } - LOG.debug("parsed metadata for: {}:\t found: {}", rootNode.getPath(), rootNode.getMetadata() == null ? "NONE" : rootNode.getMetadata().getClass().getSimpleName()); - callback.accept(rootNode); + public void filterRecursive(final N5TreeNode node) { + + if (filter == null) + return; - if( rootNode.getMetadata() instanceof N5MetadataGroup ) { + if (!filter.test(node)) + node.setMetadata(null); - // spatial metadata groups may update their children metadata, and to be safe, - // run the callback on its children - @SuppressWarnings("unchecked") - final N5MetadataGroup grpMeta = (N5MetadataGroup)rootNode.getMetadata(); - for( final N5Metadata child : grpMeta.getChildrenMetadata() ) - { - rootNode.getDescendant(child.getPath()).ifPresent( x -> { - callback.accept( x ); - }); + for (final N5TreeNode c : node.childrenList()) + filterRecursive(c); + } + + /** + * Parses metadata for the given node and all children in parallel using + * this object's executor. + * + * @param rootNode + * the root node + */ + public void parseMetadataRecursive(final N5TreeNode rootNode) { + + parseMetadataRecursive(rootNode, x -> {}); + } + + /** + * Parses metadata for the given node and all children in parallel using + * this object's executor. The given function is called for every node after + * parsing is completed, successful or not. + * + * @param rootNode + * the root node + * @param callback + * the callback function + */ + public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback) { + + /* depth first, check if we have children */ + final List children = rootNode.childrenList(); + final ArrayList> childrenFutures = new ArrayList>(); + if (!children.isEmpty()) { + /* If possible, parallelize the metadata parsing. */ + if (executor instanceof ThreadPoolExecutor) { + final ThreadPoolExecutor threadPoolExec = (ThreadPoolExecutor)this.executor; + for (final N5TreeNode child : children) { + final boolean useExec; + synchronized (executor) { + /* + * Since the parents wait for the children to finish, if + * there aren't enough threads to parse all the children + * (DFS), this could lock up. So we check if there are + * any extra threads; if not, execute if current thread. + */ + useExec = (threadPoolExec.getActiveCount() < threadPoolExec.getMaximumPoolSize() - 1); + } + if (useExec) { + childrenFutures.add(this.executor.submit(() -> parseMetadataRecursive(child, callback))); + } else { + parseMetadataRecursive(child, callback); + } + } + } else { + for (final N5TreeNode child : children) { + parseMetadataRecursive(child, callback); + } + } + } + + for (final Future childrenFuture : childrenFutures) { + try { + childrenFuture.get(); + } catch (InterruptedException | ExecutionException e) { + LOG.error("Error encountered during metadata parsing", e); + throw new RuntimeException(e); + } + } + + try { + N5DatasetDiscoverer.parseMetadata(n5, rootNode, metadataParsers, groupParsers); + } catch (final Exception e) {} + LOG.debug("parsed metadata for: {}:\t found: {}", rootNode.getPath(), + rootNode.getMetadata() == null ? "NONE" : rootNode.getMetadata().getClass().getSimpleName()); + + callback.accept(rootNode); + + if (rootNode.getMetadata() instanceof N5MetadataGroup) { + + // spatial metadata groups may update their children metadata, and + // to be safe, + // run the callback on its children + @SuppressWarnings("unchecked") + final N5MetadataGroup grpMeta = (N5MetadataGroup)rootNode.getMetadata(); + for (final N5Metadata child : grpMeta.getChildrenMetadata()) { + rootNode.getDescendant(child.getPath()).ifPresent(x -> { + callback.accept(x); + }); + } } } - } public static final List> fromParsers(final N5MetadataParser[] parsers) { @@ -583,7 +641,7 @@ public static N5TreeNode discover(final N5Reader n5, final List T openN5Container( final URI uri, final TriFunction openWithKva) { - final KeyValueAccess kva = getKeyValueAccess(uri); - if (kva == null) - throw new N5Exception("Cannot get KeyValueAccess at " + uri); - return openWithKva.apply(storageFormat, kva, uri.toString()); + // in the cae of HDF5, the KeyValueAccess may be null + return openWithKva.apply(storageFormat, getKeyValueAccess(uri), uri.toString()); } private T openN5Container( final String containerUri, final BiFunction openWithFormat, final TriFunction openWithKva) { + - final Pair storageAndUri; + final Pair storageAndUri; + URI uri; try { - storageAndUri = StorageFormat.parseUri(containerUri); - } catch (final URISyntaxException e) { + storageAndUri = StorageFormat.getStorageFromNestedScheme(containerUri); + uri = parseUriFromString(storageAndUri.getB()); + } catch (final N5Exception e) { throw new N5Exception("Unable to open " + containerUri + " as N5 Container", e); } final StorageFormat format = storageAndUri.getA(); - final URI uri = storageAndUri.getB(); + if (format != null) return openWithFormat.apply(format, uri); else @@ -620,6 +710,7 @@ enum KeyValueAccessBackend implements Predicate, BiFunction Pattern.compile("\\.zarr(3?)$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), ZARR(Pattern.compile("zarr", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.zarr$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), N5(Pattern.compile("n5", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.n5$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), HDF5(Pattern.compile("h(df)?5", Pattern.CASE_INSENSITIVE), uri -> { @@ -627,7 +718,7 @@ public enum StorageFormat { return hasHdf5Extension || HDF5Utils.isHDF5(uri.getPath()); }); - static final Pattern STORAGE_SCHEME_PATTERN = Pattern.compile("^(\\s*(?(n5|h(df)?5|zarr)):(//)?)?(?.*)$", Pattern.CASE_INSENSITIVE); + static final Pattern STORAGE_SCHEME_PATTERN = Pattern.compile("^(\\s*(?(n5|h(df)?5|zarr|zarr3)):(//)?)?(?.*)$", Pattern.CASE_INSENSITIVE); private final static String STORAGE_SCHEME_GROUP = "storageScheme"; private final static String URI_GROUP = "uri"; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/container/ContainerMetadataNode.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/container/ContainerMetadataNode.java index c074aae..7ad714a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/container/ContainerMetadataNode.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/container/ContainerMetadataNode.java @@ -21,14 +21,17 @@ import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Reader; import org.janelia.saalfeldlab.n5.N5URI; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; +import org.janelia.saalfeldlab.n5.universe.N5TreeNode; +import org.janelia.saalfeldlab.n5.universe.translation.JqUtils; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonSyntaxException; import com.google.gson.reflect.TypeToken; -import org.janelia.saalfeldlab.n5.universe.N5TreeNode; -import org.janelia.saalfeldlab.n5.universe.translation.JqUtils; public class ContainerMetadataNode implements GsonN5Writer { @@ -69,12 +72,12 @@ public ContainerMetadataNode( final ContainerMetadataNode other ) { public HashMap getContainerAttributes() { return attributes; } - + @Override public JsonElement getAttributes(final String pathName) throws N5Exception.N5IOException { final String groupPath = N5URI.normalizeGroupPath(pathName); - Optional nodeOpt = getNode(groupPath); + final Optional nodeOpt = getNode(groupPath); if( nodeOpt.isPresent() ) { final ContainerMetadataNode node = nodeOpt.get(); @@ -148,11 +151,12 @@ public void addPathsRecursive() { /** * Adds path attributes to this node and recursively to its children. * + * * @param thisPath path to a node */ public void addPathsRecursive( String thisPath ) { path = thisPath; - for ( String childPath : children.keySet() ) + for ( final String childPath : children.keySet() ) children.get(childPath).addPathsRecursive( thisPath + "/" + childPath ); } @@ -181,7 +185,7 @@ public Optional getChild(final String relativePath ) { } public ContainerMetadataNode childRelative(final String normRelativePath) { - String childName = normRelativePath.substring( 0, normRelativePath.indexOf('/')); + final String childName = normRelativePath.substring( 0, normRelativePath.indexOf('/')); if( children.containsKey(childName) ) return children.get(childName); else @@ -223,7 +227,7 @@ public boolean exists(String pathName) { @Override public String[] list(String pathName) throws N5Exception.N5IOException { - Optional node = getNode(pathName); + final Optional node = getNode(pathName); if( node.isPresent() ) { final Set set = node.get().getChildren().keySet(); return set.toArray( new String[ set.size() ]); @@ -240,8 +244,8 @@ public void setAttribute( final String pathName, final String key, final T a @Override public void setAttributes(String pathName, Map attributes) { final Type mapType = new TypeToken>(){}.getType(); - JsonElement json = gson.toJsonTree(attributes); - HashMap map = gson.fromJson(json, mapType); + final JsonElement json = gson.toJsonTree(attributes); + final HashMap map = gson.fromJson(json, mapType); getNode( pathName ).ifPresent( x -> x.attributes.putAll(map) ); } @@ -285,7 +289,7 @@ public void createGroup(String pathName) { else relativePath = normPath; - String[] parts = relativePath.split(groupSeparator); + final String[] parts = relativePath.split(groupSeparator); createGroupHelper( this, parts, 0 ); addPathsRecursive(); } @@ -295,7 +299,7 @@ private static void createGroupHelper( ContainerMetadataNode node, String[] part if( i >= parts.length ) return; - String childRelpath = parts[i]; + final String childRelpath = parts[i]; ContainerMetadataNode child; if( !node.children.containsKey( childRelpath )) { child = new ContainerMetadataNode(); @@ -347,7 +351,15 @@ public DataBlock readBlock(String pathName, DatasetAttributes datasetAttribut return null; } + @Override + public Shard readShard(String datasetPath, A datasetAttributes, + long... shardGridPosition) { + return null; + } + @Override + public void writeShard(String datasetPath, A datasetAttributes, + Shard shard) throws N5Exception { } @SuppressWarnings("unchecked") public static ContainerMetadataNode build( @@ -355,13 +367,13 @@ public static ContainerMetadataNode build( if (n5 instanceof GsonN5Reader) { try { return buildGson((N)n5, dataset, gson ); - } catch (Exception e) { + } catch (final Exception e) { } } else { try { return buildN5( n5, dataset, gson ); - } catch (Exception e) { + } catch (final Exception e) { } } return null; @@ -383,7 +395,7 @@ public static ContainerMetadataNode buildGs containerRoot.addPathsRecursive(dataset); return containerRoot; - } catch (N5Exception e) { + } catch (final N5Exception e) { e.printStackTrace(); } return null; @@ -396,7 +408,7 @@ public static ContainerMetadataNode buildHelper(final N final List children = baseNode.childrenList(); final HashMap childMap = new HashMap<>(); - for (N5TreeNode child : children) + for (final N5TreeNode child : children) childMap.put(child.getNodeName(), buildHelper(n5, child)); if ( attrs != null ) @@ -417,7 +429,7 @@ public static ContainerMetadataNode buildN5(final T n5, fin containerRoot.addPathsRecursive(dataset); return containerRoot; - } catch (N5Exception e) { + } catch (final N5Exception e) { e.printStackTrace(); } return null; @@ -428,7 +440,7 @@ public static ContainerMetadataNode buildHelperN5(final N5Reader n5, N5TreeNode final List children = baseNode.childrenList(); final HashMap childMap = new HashMap<>(); - for (N5TreeNode child : children) + for (final N5TreeNode child : children) childMap.put(child.getNodeName(), buildHelperN5(n5, child, gson)); if (attrs.isPresent()) @@ -441,13 +453,13 @@ public static Optional> getMetadataMapN5(final N5Re final Gson gson) { try { final HashMap attrs = new HashMap<>(); - Map> attrClasses = n5.listAttributes(dataset); - for (String k : attrClasses.keySet()) { + final Map> attrClasses = n5.listAttributes(dataset); + for (final String k : attrClasses.keySet()) { if( attrClasses.get(k).equals(String.class)) { - String s = n5.getAttribute(dataset, k, String.class ); - Optional elem = stringToJson( s, gson ); + final String s = n5.getAttribute(dataset, k, String.class ); + final Optional elem = stringToJson( s, gson ); if( elem.isPresent()) attrs.put( k, elem.get()); else @@ -460,7 +472,7 @@ public static Optional> getMetadataMapN5(final N5Re if (attrs != null) return Optional.of(attrs); - } catch (Exception e) { + } catch (final Exception e) { } return Optional.empty(); } @@ -468,9 +480,9 @@ public static Optional> getMetadataMapN5(final N5Re public static Optional stringToJson(String s, final Gson gson) { try { - JsonObject elem = gson.fromJson(s, JsonObject.class); + final JsonObject elem = gson.fromJson(s, JsonObject.class); return Optional.of(elem); - } catch (JsonSyntaxException e) { + } catch (final JsonSyntaxException e) { return Optional.empty(); } } @@ -495,4 +507,11 @@ public void setAttributes(String groupPath, JsonElement attributes) throws N5Exc // TODO Auto-generated method stub } + @Override + public String getAttributesKey() { + + return "attributes.json"; + } + + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/MultiscalesAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/MultiscalesAdapter.java new file mode 100644 index 0000000..ae89462 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/MultiscalesAdapter.java @@ -0,0 +1,66 @@ +package org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v05; + +import com.google.gson.*; +import org.janelia.saalfeldlab.n5.universe.metadata.MetadataUtils; +import org.janelia.saalfeldlab.n5.universe.metadata.axes.Axis; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata.OmeNgffDataset; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata.OmeNgffDownsamplingMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.CoordinateTransformation; + +import java.lang.reflect.Type; + +public class MultiscalesAdapter implements JsonDeserializer, JsonSerializer< OmeNgffMultiScaleMetadata > +{ + @Override + public OmeNgffMultiScaleMetadata deserialize( final JsonElement json, final Type typeOfT, final JsonDeserializationContext context ) throws JsonParseException + { + if (!json.isJsonObject()) + return null; + + final JsonObject jobj = json.getAsJsonObject(); + if (!jobj.has("axes") && !jobj.has("datasets")) + return null; + + // name and type may be null + final String name = MetadataUtils.getStringNullable(jobj.get("name")); + final String type = MetadataUtils.getStringNullable(jobj.get("type")); + final String version = "0.5"; + + final Axis[] axes = context.deserialize(jobj.get("axes"), Axis[].class); + final OmeNgffDataset[] datasets = context.deserialize(jobj.get("datasets"), OmeNgffDataset[].class); + final CoordinateTransformation[] coordinateTransformations = context + .deserialize(jobj.get("coordinateTransformations"), CoordinateTransformation[].class); + final OmeNgffDownsamplingMetadata metadata = context.deserialize(jobj.get("metadata"), + OmeNgffDownsamplingMetadata.class); + + return new OmeNgffMultiScaleMetadata(axes.length, "", name, type, version, axes, datasets, null, + coordinateTransformations, metadata, false); + } + + @Override + public JsonElement serialize( final OmeNgffMultiScaleMetadata src, final Type typeOfSrc, final JsonSerializationContext context ) + { + final JsonObject obj = new JsonObject(); + obj.addProperty("name", src.name); + obj.addProperty("type", src.type); + //TODO: add property version to attributes/ome/ + //obj.addProperty("version", src.version); + obj.add("axes", context.serialize(src.axes)); + obj.add("datasets", context.serialize(src.datasets)); + + if( src.coordinateTransformations != null ) + if( src.coordinateTransformations.length == 0 ) + obj.add("coordinateTransformations", context.serialize(new JsonArray())); // empty array + else + obj.add("coordinateTransformations", context.serialize(src.coordinateTransformations)); + else + obj.add("coordinateTransformations", context.serialize(new JsonArray())); // empty array + + if( src.metadata != null ) + obj.add("metadata", context.serialize(src.metadata)); + + return obj; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05Metadata.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05Metadata.java new file mode 100644 index 0000000..34199bd --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05Metadata.java @@ -0,0 +1,75 @@ +package org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v05; + +import org.janelia.saalfeldlab.n5.universe.metadata.SpatialMultiscaleMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.axes.Axis; +import org.janelia.saalfeldlab.n5.universe.metadata.axes.AxisUtils; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.NgffSingleScaleAxesMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata.OmeNgffDataset; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.CoordinateTransformation; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.ScaleCoordinateTransformation; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.TranslationCoordinateTransformation; + +public class OmeNgffV05Metadata extends SpatialMultiscaleMetadata +{ + public final OmeNgffMultiScaleMetadata[] multiscales; + + public OmeNgffV05Metadata(final String path, final OmeNgffMultiScaleMetadata[] multiscales) + { + // assumes children metadata are the same for all multiscales, which should be true + super(path, multiscales[0].getChildrenMetadata()); + this.multiscales = multiscales; + } + + /** + * Creates an OmeNgffMetadata object for writing. + * See {@link AxisUtils#defaultAxes(String...)} for convenient creation of axes. + * + * @param numDimensions number of dimensions + * @param name a name for this dataset + * @param axes an array of axes (length numDimensions) + * @param scalePaths relative paths to children containing scale level arrays + * @param scales array of absolute resolutions. size: [numScales][numDimensions] + * @param translations array of translations. size: [numScales][numDimensions]. May be null. + * @return OmeNgffMetadata + */ + public static OmeNgffV05Metadata buildForWriting( final int numDimensions, + final String name, + final Axis[] axes, + final String[] scalePaths, + final double[][] scales, + final double[][] translations) { + + // TODO make this a constructor? (yes, says Caleb, and John) + + assert scalePaths.length == scales.length; + assert translations == null || scalePaths.length == translations.length; + + final int numScales = scalePaths.length; + final String version = "0.5"; + final String type = ""; + final OmeNgffDataset[] datasets = new OmeNgffDataset[numScales]; + for( int i = 0; i < numScales; i++ ) { + + final ScaleCoordinateTransformation s = new ScaleCoordinateTransformation(scales[i]); + TranslationCoordinateTransformation t = null; + if( translations != null && translations[i] != null ) + t = new TranslationCoordinateTransformation(translations[i]); + + datasets[i] = new OmeNgffDataset(); + datasets[i].path = scalePaths[i]; + datasets[i].coordinateTransformations = t == null ? + new CoordinateTransformation[]{ s } : + new CoordinateTransformation[]{ s, t }; + } + + final CoordinateTransformation[] cts = null; + final OmeNgffMultiScaleMetadata ms = new OmeNgffMultiScaleMetadata( + numDimensions, "", name, + type, version, axes, + datasets, null, cts, null); + + return new OmeNgffV05Metadata("", new OmeNgffMultiScaleMetadata[]{ ms }); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05MetadataParser.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05MetadataParser.java new file mode 100644 index 0000000..78514b2 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v05/OmeNgffV05MetadataParser.java @@ -0,0 +1,144 @@ +package org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v05; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import org.apache.commons.lang3.ArrayUtils; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.N5Reader; +import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.universe.N5TreeNode; +import org.janelia.saalfeldlab.n5.universe.metadata.MetadataUtils; +import org.janelia.saalfeldlab.n5.universe.metadata.N5DatasetMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.N5MetadataParser; +import org.janelia.saalfeldlab.n5.universe.metadata.N5MetadataWriter; +import org.janelia.saalfeldlab.n5.universe.metadata.axes.Axis; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.AxisAdapter; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.DatasetAdapter; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.NgffSingleScaleAxesMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMultiScaleMetadata.OmeNgffDataset; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.CoordinateTransformation; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.coordinateTransformations.CoordinateTransformationAdapter; +import org.janelia.saalfeldlab.n5.zarr.ZarrDatasetAttributes; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class OmeNgffV05MetadataParser implements N5MetadataParser, N5MetadataWriter { + + private final Gson gson; + + public OmeNgffV05MetadataParser(final boolean reverse) { + + gson = gsonBuilder().create(); + } + + public OmeNgffV05MetadataParser() { + + this(false); + } + + public static GsonBuilder gsonBuilder() { + + return new GsonBuilder() + .registerTypeAdapter(CoordinateTransformation.class, new CoordinateTransformationAdapter()) + .registerTypeAdapter(OmeNgffDataset.class, new DatasetAdapter()) + .registerTypeAdapter(Axis.class, new AxisAdapter()) + .registerTypeAdapter(OmeNgffMultiScaleMetadata.class, new MultiscalesAdapter()); + } + + @Override + public Optional parseMetadata(final N5Reader n5, final N5TreeNode node) { + + OmeNgffMultiScaleMetadata[] multiscales; + try { + final JsonElement version = n5.getAttribute(node.getPath(), "ome/version", JsonElement.class); + assert version.getAsString().equals("0.5"); + final JsonElement base = n5.getAttribute(node.getPath(), "ome/multiscales", JsonElement.class); + multiscales = gson.fromJson(base, OmeNgffMultiScaleMetadata[].class); + } catch (final Exception e) { + throw e; + //return Optional.empty(); + } + + if (multiscales == null || multiscales.length == 0) { + return Optional.empty(); + } + + int nd = -1; + final Map scaleLevelNodes = new HashMap<>(); + for (final N5TreeNode childNode : node.childrenList()) { + if (childNode.isDataset() && childNode.getMetadata() != null) { + scaleLevelNodes.put(childNode.getPath(), childNode); + if (nd < 0) + nd = ((N5DatasetMetadata) childNode.getMetadata()).getAttributes().getNumDimensions(); + } + } + + if (nd < 0) + return Optional.empty(); + + /* + * Need to replace all children with new children with the metadata from + * this object + */ + for (int j = 0; j < multiscales.length; j++) { + + final OmeNgffMultiScaleMetadata ms = multiscales[j]; + + final String[] paths = ms.getPaths(); + final DatasetAttributes[] attrs = new DatasetAttributes[ms.getPaths().length]; + + final N5DatasetMetadata[] dsetMeta = new N5DatasetMetadata[paths.length]; + for (int i = 0; i < paths.length; i++) { + dsetMeta[i] = ((N5DatasetMetadata)scaleLevelNodes.get(MetadataUtils.canonicalPath(node, paths[i])).getMetadata()); + attrs[i] = dsetMeta[i].getAttributes(); + } + + // maybe axes can be flipped first? + ArrayUtils.reverse(ms.axes); + + final NgffSingleScaleAxesMetadata[] msChildrenMeta = OmeNgffMultiScaleMetadata.buildMetadata( + nd, node.getPath(), ms.datasets, attrs, ms.coordinateTransformations, ms.metadata, ms.axes); + + MetadataUtils.updateChildrenMetadata(node, msChildrenMeta, false); + + // axes need to be flipped after the child is created + // is this actually true? + // ArrayUtils.reverse(ms.axes); + + multiscales[j] = new OmeNgffMultiScaleMetadata(ms, msChildrenMeta); + } + + return Optional.of(new OmeNgffV05Metadata(node.getPath(), multiscales)); + } + + @Override + public void writeMetadata(final OmeNgffV05Metadata t, final N5Writer n5, final String groupPath) throws Exception { + + final OmeNgffMultiScaleMetadata[] ms = t.multiscales; + final JsonElement jsonElem = gson.toJsonTree(ms); + + // need to reverse axes + for (final JsonElement e : jsonElem.getAsJsonArray().asList()) { + final JsonArray axes = e.getAsJsonObject().get("axes").getAsJsonArray(); + Collections.reverse(axes.asList()); + } + + n5.setAttribute(groupPath, "ome.multiscales", jsonElem); + } + + public static boolean cOrder(final DatasetAttributes datasetAttributes) { + + if (datasetAttributes instanceof ZarrDatasetAttributes) { + final ZarrDatasetAttributes zattrs = (ZarrDatasetAttributes)datasetAttributes; + return zattrs.isRowMajor(); + } + return false; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Reader.java index 08a212a..09836b8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Reader.java @@ -1,34 +1,37 @@ package org.janelia.saalfeldlab.n5.universe.translation; -import com.google.gson.Gson; -import com.google.gson.JsonElement; - import java.net.URI; import java.util.Map; + import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.GsonN5Reader; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Reader; import org.janelia.saalfeldlab.n5.N5URI; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.universe.container.ContainerMetadataNode; +import com.google.gson.Gson; +import com.google.gson.JsonElement; + public class TranslatedN5Reader implements GsonN5Reader { - + private final N5Reader n5; protected final InvertibleTreeTranslation translation; public TranslatedN5Reader( final N5Reader n5Base, final Gson gson, - final String fwdTranslation, + final String fwdTranslation, final String invTranslation ) { this.n5 = n5Base; - ContainerMetadataNode root = ContainerMetadataNode.build(n5Base, gson); + final ContainerMetadataNode root = ContainerMetadataNode.build(n5Base, gson); root.addPathsRecursive(); translation = new InvertibleTreeTranslation(root, gson, fwdTranslation, invTranslation); } - + public InvertibleTreeTranslation getTranslation() { return translation; } @@ -46,16 +49,16 @@ public JsonElement getAttributes(final String pathName) throws N5Exception.N5IOE /** * Returns the path in the original container given the path in the translated container. - * + * * @param pathName the path in the translated container * @return the path in the original container */ public String originalPath( String pathName ) { - ContainerMetadataNode pathNode = new ContainerMetadataNode(); + final ContainerMetadataNode pathNode = new ContainerMetadataNode(); pathNode.createGroup(pathName); pathNode.addPathsRecursive(); - ContainerMetadataNode translatedPathNode = translation.getInverseTranslationFunction().apply(pathNode); + final ContainerMetadataNode translatedPathNode = translation.getInverseTranslationFunction().apply(pathNode); translatedPathNode.addPathsRecursive(); final String path = translatedPathNode.flattenLeaves().findFirst().get().getPath(); return N5URI.normalizeGroupPath(path); @@ -66,6 +69,14 @@ public DataBlock readBlock(String pathName, DatasetAttributes datasetAttribut return n5.readBlock( originalPath( pathName ), datasetAttributes, gridPosition); } + + + @Override + public Shard readShard(String datasetPath, A datasetAttributes, + long... shardGridPosition) { + + return n5.readShard(originalPath(datasetPath), datasetAttributes, shardGridPosition); + } @Override public boolean exists(String pathName) { @@ -97,4 +108,11 @@ public Gson getGson() { return translation.getGson(); } + @Override + public String getAttributesKey() { + + // TODO fix + return "attributes.json"; + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Writer.java index 0017a55..08667be 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/translation/TranslatedN5Writer.java @@ -1,31 +1,37 @@ package org.janelia.saalfeldlab.n5.universe.translation; -import com.google.gson.Gson; import java.util.List; import java.util.Map; + import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.universe.container.ContainerMetadataNode; import org.janelia.saalfeldlab.n5.universe.container.ContainerMetadataWriter; +import com.google.gson.Gson; + public class TranslatedN5Writer extends TranslatedN5Reader implements N5Writer { - + protected N5Writer writer; - + protected ContainerMetadataWriter containerWriter; public TranslatedN5Writer( N5Writer n5Base, Gson gson, String fwdTranslation, String invTranslation) { super(n5Base, gson, fwdTranslation, invTranslation); - + this.writer = n5Base; containerWriter = new ContainerMetadataWriter( n5Base, translation.getTranslated() ); } - + public N5Writer getBaseWriter() { return writer; } - + @Override public void setAttribute( final String pathName, final String key, final T attribute) { translation.setTranslatedAttribute( pathName, key, attribute ); @@ -49,7 +55,7 @@ public void createGroup(String pathName) { @Override public boolean remove(String pathName) { - boolean success = writer.remove(originalPath(pathName)); + final boolean success = writer.remove(originalPath(pathName)); if( success ) { translation.getTranslated().remove(pathName); translation.updateOriginal(); @@ -59,7 +65,7 @@ public boolean remove(String pathName) { @Override public boolean remove() { - boolean success = writer.remove(); + final boolean success = writer.remove(); if( success ) { translation.rootOrig = new ContainerMetadataNode(); translation.rootTranslated = new ContainerMetadataNode(); @@ -111,4 +117,11 @@ public boolean deleteBlock(String pathName, long... gridPosition) { return writer.deleteBlock(originalPath(pathName), gridPosition); } + @Override + public void writeShard(String datasetPath, A datasetAttributes, + Shard shard) throws N5Exception { + + writer.writeShard(originalPath(datasetPath), datasetAttributes, shard); + } + } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java index f84884a..f5c9bc1 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java @@ -10,6 +10,8 @@ import org.janelia.saalfeldlab.n5.universe.N5Factory.StorageFormat; import org.janelia.saalfeldlab.n5.zarr.ZarrKeyValueReader; import org.janelia.saalfeldlab.n5.zarr.ZarrKeyValueWriter; +import org.janelia.saalfeldlab.n5.zarr.v3.ZarrV3KeyValueReader; +import org.janelia.saalfeldlab.n5.zarr.v3.ZarrV3KeyValueWriter; import org.junit.Test; import java.io.File; @@ -49,26 +51,31 @@ public void testStorageFormatGuesses() throws URISyntaxException { assertEquals("h5 extension == h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); assertNotEquals("h5 extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); assertNotEquals("h5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); + assertNotEquals("h5 extension != zarr", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); assertEquals("hdf5 extension == h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); assertNotEquals("hdf5 extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); assertNotEquals("hdf5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); + assertNotEquals("hdf5 extension != zarr3", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); assertNotEquals("n5 extension != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); assertEquals("n5 extension == n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); assertNotEquals("n5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); + assertNotEquals("n5 extension != zarr3", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); assertNotEquals("n5 extension slash != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); assertEquals("n5 extension slash == n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); - assertNotEquals("n5 extension slash != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); + assertNotEquals("n5 extension slash != zarr", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); assertNotEquals("zarr extension != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); assertNotEquals("zarr extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); - assertEquals("zarr extension == zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); + assertNotEquals("zarr extension == zarr2", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); + assertEquals("zarr extension == zarr3", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); assertNotEquals("zarr extension slash != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); assertNotEquals("zarr extension slash != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); - assertEquals("zarr extension slash == zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); + assertNotEquals("zarr extension slash != zarr2", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); + assertEquals("zarr extension slash == zarr", StorageFormat.ZARR3, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); assertNull("unknown extension != h5", N5Factory.StorageFormat.guessStorageFromUri(unknownExt)); assertNull("unknown extension != n5", N5Factory.StorageFormat.guessStorageFromUri(unknownExt)); @@ -95,8 +102,8 @@ public void testWriterTypeByExtension() throws IOException, URISyntaxException { N5HDF5Writer.class, N5KeyValueWriter.class, N5KeyValueWriter.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class + ZarrV3KeyValueWriter.class, + ZarrV3KeyValueWriter.class }; for (int i = 0; i < ext.length; i++) { @@ -119,12 +126,13 @@ public void testWriterTypeByPrefix() throws URISyntaxException, IOException { try { tmp = Files.createTempDirectory("factory-test-").toFile(); - final String[] prefix = new String[]{"h5", "hdf5", "n5", "zarr"}; + final String[] prefix = new String[]{"h5", "hdf5", "n5", "zarr", "zarr3"}; final Class[] readerTypes = new Class[]{ N5HDF5Writer.class, N5HDF5Writer.class, N5KeyValueWriter.class, - ZarrKeyValueWriter.class + ZarrKeyValueWriter.class, + ZarrV3KeyValueWriter.class }; for (int i = 0; i < prefix.length; i++) { @@ -137,7 +145,7 @@ public void testWriterTypeByPrefix() throws URISyntaxException, IOException { } // ensure that prefix is preferred to extensions - final String[] extensions = new String[]{".h5", ".hdf5", ".n5", ".zarr"}; + final String[] extensions = new String[]{".h5", ".hdf5", ".n5", ".zarr", ".zarr"}; for (int i = 0; i < prefix.length; i++) { for (int j = 0; j < extensions.length; j++) { @@ -187,15 +195,13 @@ public void testDefaultForAmbiguousWriters() throws IOException { tmpEmptyDir.mkdirs(); tmpEmptyDir.deleteOnExit(); - - final Class[] writerTypes = new Class[]{ null, N5HDF5Writer.class, ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class + N5KeyValueWriter.class, + ZarrV3KeyValueWriter.class, + ZarrV3KeyValueWriter.class }; for (int i = 0; i < paths.length; i++) { @@ -208,7 +214,47 @@ public void testDefaultForAmbiguousWriters() throws IOException { tmp.delete(); } } + + @Test + public void testForExistingWriters() throws IOException { + + final N5Factory factory = new N5Factory(); + + File tmp = null; + try { + tmp = Files.createTempDirectory("factory-test-").toFile(); + + final String[] paths = new String[]{ + "h5WithWeirdExtension.zarr", + "zarr2WithWeirdExtension.n5", + "zarr3WithWeirdExtension.jpg", + "n5WithWeirdExtension.h5" + }; + + final Path tmpPath = tmp.toPath(); + factory.openWriter(StorageFormat.HDF5, tmpPath.resolve(paths[0]).toFile().getCanonicalPath()).close(); + factory.openWriter(StorageFormat.ZARR, tmpPath.resolve(paths[1]).toFile().getCanonicalPath()).close(); + factory.openWriter(StorageFormat.ZARR3, tmpPath.resolve(paths[2]).toFile().getCanonicalPath()).close(); + factory.openWriter(StorageFormat.N5, tmpPath.resolve(paths[3]).toFile().getCanonicalPath()).close(); + + final Class[] writerTypes = new Class[]{ + N5HDF5Writer.class, + ZarrKeyValueWriter.class, + ZarrV3KeyValueWriter.class, + N5KeyValueWriter.class + }; + + for (int i = 0; i < paths.length; i++) { + final String prefixUri = tmpPath.resolve(paths[i]).normalize().toUri().toString(); + checkWriterTypeFromFactory( factory, prefixUri, writerTypes[i], " with path " + paths[i]); + } + + } finally { + tmp.delete(); + } + } + @Test public void testDefaultForAmbiguousReaders() throws IOException { @@ -223,9 +269,10 @@ public void testDefaultForAmbiguousReaders() throws IOException { "a_non_hdf5_file", "an_hdf5_file", "a_zarr_directory", + "a_zarr3_directory", "an_n5_directory", "an_empty_directory", - "a_non_existent_path" + "a_non_existent_path", }; final Path tmpPath = tmp.toPath(); @@ -236,18 +283,18 @@ public void testDefaultForAmbiguousReaders() throws IOException { factory.openWriter(StorageFormat.HDF5, tmpPath.resolve(paths[1]).toFile().getCanonicalPath()).close(); factory.openWriter(StorageFormat.ZARR, tmpPath.resolve(paths[2]).toFile().getCanonicalPath()).close(); - factory.openWriter(StorageFormat.N5, tmpPath.resolve(paths[3]).toFile().getCanonicalPath()).close(); + factory.openWriter(StorageFormat.ZARR3, tmpPath.resolve(paths[3]).toFile().getCanonicalPath()).close(); + factory.openWriter(StorageFormat.N5, tmpPath.resolve(paths[4]).toFile().getCanonicalPath()).close(); - final File tmpEmptyDir = tmpPath.resolve(paths[4]).toFile(); + final File tmpEmptyDir = tmpPath.resolve(paths[5]).toFile(); tmpEmptyDir.mkdirs(); tmpEmptyDir.deleteOnExit(); - - final Class[] readerTypes = new Class[]{ null, N5HDF5Reader.class, ZarrKeyValueReader.class, + ZarrV3KeyValueReader.class, N5KeyValueReader.class, N5KeyValueReader.class, null @@ -262,7 +309,7 @@ public void testDefaultForAmbiguousReaders() throws IOException { tmp.delete(); } } - + private void checkWriterTypeFromFactory(N5Factory factory, String uri, Class expected, String messageSuffix) { if (expected == null) { @@ -271,7 +318,7 @@ private void checkWriterTypeFromFactory(N5Factory factory, String uri, Class } final N5Writer n5 = factory.openWriter(uri); - assertNotNull( "null n5 for " + uri, n5); + assertNotNull("null n5 for " + uri, n5); assertEquals(expected.getName() + messageSuffix, expected, n5.getClass()); n5.remove(); } @@ -284,7 +331,7 @@ private void checkReaderTypeFromFactory(N5Factory factory, String uri, Class } final N5Reader n5 = factory.openReader(uri); - assertNotNull( "null n5 for " + uri, n5); + assertNotNull("null n5 for " + uri, n5); assertEquals(expected.getName() + messageSuffix, expected, n5.getClass()); } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/PartialReadBenchmarks.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/PartialReadBenchmarks.java new file mode 100644 index 0000000..0c93cd3 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/PartialReadBenchmarks.java @@ -0,0 +1,184 @@ +package org.janelia.saalfeldlab.n5.universe.benchmarks; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.IntStream; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 1000, timeUnit = TimeUnit.MICROSECONDS) +@Measurement(iterations = 50, time = 1000, timeUnit = TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(1) +public class PartialReadBenchmarks { + + // @Param(value = {"1000000", "10000000", "100000000"}) + @Param(value = {"10000000"}) + protected int objectSizeBytes; + + @Param(value = {"1000"}) + protected int readSizeBytes; + + @Param(value = {"10", "50", "100"}) + protected int numSubReads; + + @Param(value = {"partial", "complete"}) + protected String mode; + + protected String baseDir; + protected KeyValueAccess kva; + protected Random random; + + protected int[] startPositions; + + public PartialReadBenchmarks() {} + + @TearDown(Level.Trial) + public void teardown() { + + for (final int sz : sizes()) { + new File(baseDir, "" + sz).delete(); + new File(baseDir).delete(); + } + } + + @Setup(Level.Trial) + public void setup() { + + random = new Random(1); + kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); + + File tmpFile; + try { + tmpFile = Files.createTempDirectory("partialReadBenchmark-").toFile(); + baseDir = tmpFile.getCanonicalPath(); + } catch (final IOException e) { + e.printStackTrace(); + } + + startPositions = IntStream.generate(() -> { + + return random.nextInt(objectSizeBytes - readSizeBytes); + }).limit(numSubReads).toArray(); + + for (final int sz : sizes()) { + String path; + try { + path = new File(baseDir, "" + sz).getCanonicalPath(); + write(path, sz); + } catch (final IOException e) {} + } + } + + protected void write(String path, int numBytes) { + + final byte[] data = new byte[numBytes]; + random.nextBytes(data); + + System.out.println("write to path: " + path); + + try (final LockedChannel ch = kva.lockForWriting(path)) { + final OutputStream os = ch.newOutputStream(); + os.write(data); + os.flush(); + os.close(); + } catch (final IOException e) { + e.printStackTrace(); + } + } + + private byte[] read(String path, int startByte, int numBytes) throws IOException { + + try (final LockedChannel ch = kva.lockForReading(path, startByte, numBytes)) { + final InputStream is = ch.newInputStream(); + final byte[] data = new byte[numBytes]; + is.read(data); + is.close(); // not strictly needed + return data; + } + } + + private byte[] readComplete(String path) throws IOException { + + try (final ByteArrayOutputStream result = new ByteArrayOutputStream()) { + + try (final LockedChannel ch = kva.lockForReading(path)) { + final InputStream is = ch.newInputStream(); + byte[] buffer = new byte[1024]; + for (int length; (length = is.read(buffer)) != -1;) { + result.write(buffer, 0, length); + } + return result.toByteArray(); + } + } + } + + @Benchmark + public void run(Blackhole blackhole) throws IOException { + + final String path = new File(baseDir, "" + objectSizeBytes).getCanonicalPath(); + if (mode.equals("partial")) { + for (int i = 0; i < numSubReads; i++) { + blackhole.consume(read(path, startPositions[i], readSizeBytes)); + } + } else { + blackhole.consume(readComplete(path)); + } + } + + public int[] sizes() { + + try { + final Param ann = PartialReadBenchmarks.class.getDeclaredField("objectSizeBytes") + .getAnnotation(Param.class); + System.out.println(Arrays.toString(ann.value())); + return Arrays.stream(ann.value()).mapToInt(Integer::parseInt).toArray(); + + } catch (final NoSuchFieldException e) { + e.printStackTrace(); + } catch (final SecurityException e) { + e.printStackTrace(); + } + + return null; + } + + public static void main(String... args) throws RunnerException { + + final Options options = new OptionsBuilder().include(PartialReadBenchmarks.class.getSimpleName() + "\\.") + .build(); + + new Runner(options).run(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/S3PartialReadBenchmarks.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/S3PartialReadBenchmarks.java new file mode 100644 index 0000000..3d74ba5 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/S3PartialReadBenchmarks.java @@ -0,0 +1,160 @@ +package org.janelia.saalfeldlab.n5.universe.benchmarks; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; +import org.janelia.saalfeldlab.n5.s3.AmazonS3KeyValueAccess; +import org.janelia.saalfeldlab.n5.s3.AmazonS3Utils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import com.amazonaws.services.s3.AmazonS3; + +@State(Scope.Benchmark) +@Warmup(iterations = 1, time = 100, timeUnit = TimeUnit.MICROSECONDS) +@Measurement(iterations = 2, time = 100, timeUnit = TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(1) +public class S3PartialReadBenchmarks { + + // TODO do a better job sharing functionality with file system benchmark? + + @Param(value = {"1000000"}) + protected int objectSizeBytes; + + @Param(value = {"1", "10", "50", "100"}) + protected int numSubReads; + + protected String baseDir; + protected KeyValueAccess kva; + protected Random random; + + public S3PartialReadBenchmarks() {} + + @TearDown(Level.Trial) + public void teardown() { + + for (final int sz : sizes()) { + try { + kva.delete(baseDir + "/" + sz); + kva.delete(baseDir); + } catch (final IOException e) {} + } + } + + @Setup(Level.Trial) + public void setup() throws InterruptedException { + + random = new Random(); + + URI uri; + try { + uri = new URI("s3://n5-zarr-benchmarks"); + } catch (final URISyntaxException e) { + e.printStackTrace(); + return; + } + + final AmazonS3 s3 = AmazonS3Utils.createS3(uri.toASCIIString(), null, + AmazonS3Utils.getS3Credentials(null, false), null, null); + kva = new AmazonS3KeyValueAccess(s3, uri, false); + + baseDir = uri + "/partialReadBenchmarkData/" + random.nextInt(99999); + for (final int sz : sizes()) { + final String path = baseDir + "/" + sz; + write(path, sz); + Thread.sleep(500); + } + } + + private void read(String path, int startByte, int numBytes) { + + try (final LockedChannel ch = kva.lockForReading(path, startByte, numBytes)) { + final InputStream is = ch.newInputStream(); + final byte[] data = new byte[numBytes]; + is.read(data); + is.close(); // not strictly needed + } catch (final IOException e) { + e.printStackTrace(); + } + } + + @Benchmark + public void run() throws IOException { + + final String path = baseDir + "/" + objectSizeBytes; + + // read + final int numBytesToRead = objectSizeBytes / numSubReads; + int start = 0; + for (int i = 0; i < numSubReads; i++) { + read(path, start, numBytesToRead); + start += numBytesToRead; + } + } + + protected void write(String path, int numBytes) { + + final byte[] data = new byte[numBytes]; + random.nextBytes(data); + + try (final LockedChannel ch = kva.lockForWriting(path)) { + final OutputStream os = ch.newOutputStream(); + os.write(data); + os.flush(); + os.close(); + } catch (final IOException e) { + e.printStackTrace(); + } + } + + public int[] sizes() { + + try { + final Param ann = S3PartialReadBenchmarks.class.getDeclaredField("objectSizeBytes") + .getAnnotation(Param.class); + System.out.println(Arrays.toString(ann.value())); + return Arrays.stream(ann.value()).mapToInt(Integer::parseInt).toArray(); + + } catch (final NoSuchFieldException e) { + e.printStackTrace(); + } catch (final SecurityException e) { + e.printStackTrace(); + } + + return null; + } + + public static void main(String... args) throws RunnerException { + + final Options options = new OptionsBuilder().include(S3PartialReadBenchmarks.class.getSimpleName() + "\\.") + .build(); + + new Runner(options).run(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardReadBenchmarks.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardReadBenchmarks.java new file mode 100644 index 0000000..a9e10a5 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardReadBenchmarks.java @@ -0,0 +1,248 @@ +package org.janelia.saalfeldlab.n5.universe.benchmarks; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.ByteArrayDataBlock; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.GsonKeyValueN5Writer; +import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.N5Exception; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; +import org.janelia.saalfeldlab.n5.shard.InMemoryShard; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.VirtualShard; +import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.janelia.saalfeldlab.n5.util.GridIterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + + +import net.imglib2.type.numeric.integer.UnsignedByteType; + +@State(Scope.Benchmark) +@Warmup(iterations = 5, time=100, timeUnit = TimeUnit.MICROSECONDS) +@Measurement(iterations = 25, time=100, timeUnit = TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(1) +public class ShardReadBenchmarks { + + @Param(value = {"SHARD_WHOLE", "SHARD_BATCH", "BLOCK" }) + protected String readMethod; + +// @Param(value = {"10", "100", "1000"}) + @Param(value = {"1000"}) + protected int blocksPerShard; + +// @Param(value = {"10", "100", "1000"}) + @Param(value = {"1", "100"}) + protected int numBlockReads; + +// @Param(value = {"1000", "10000", "100000"}) + @Param(value = {"10000"}) + protected int blockSize; + + @Param(value = {"END"}) + protected String indexLocation; + + protected String baseDir; + protected String dset = "shards"; + + protected GsonKeyValueN5Writer n5Writer; + protected int[] blockIndexes; + protected List blockPositions; + private ShardedDatasetAttributes attrs; + + protected Random random; + + + public ShardReadBenchmarks() {} + + @TearDown(Level.Trial) + public void teardown() { + + n5Writer.remove(); + } + + @Setup(Level.Trial) + public void setup() { + + random = new Random(); + +// File tmpFile; +// try { +// tmpFile = Files.createTempDirectory("shardReadBenchmark-").toFile(); +// baseDir = tmpFile.getCanonicalPath(); +// } catch (final IOException e) { +// e.printStackTrace(); +// } + + baseDir = "s3://n5-zarr-benchmarks/shardBenchmarks.zarr"; + + write(baseDir); + + // the block indexes to read, a random + List indexes = IntStream.range(0, blocksPerShard).boxed().collect(Collectors.toList()); + Collections.shuffle(indexes); + blockIndexes = indexes.stream().mapToInt(Integer::intValue).limit(numBlockReads).toArray(); + blockPositions = Arrays.stream(blockIndexes).mapToObj(i -> { + final long[] p = new long[1]; + GridIterator.indexToPosition(i, new int[]{blocksPerShard}, p); + return p; + }).collect(Collectors.toList()); + + } + + protected void write(String path) { + + n5Writer = (GsonKeyValueN5Writer) new N5Factory().openWriter("zarr3:"+path); + attrs = new ShardedDatasetAttributes(dimensions(), shardSize(), blockSize(), DataType.INT8, + new Codec[]{new BytesCodec(), new GzipCompression()}, + new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, + indexLocation()); + + n5Writer.remove(dset); + n5Writer.createDataset(dset, attrs); + + System.out.println("writing to: " + path); + + final InMemoryShard shard = new InMemoryShard(attrs, new long[1]); + n5Writer.writeBlocks(dset, attrs, dataBlocks(shard)); + } + + protected DataBlock[] dataBlocks(final Shard shard) { + + final DataBlock[] blocks = new DataBlock[blocksPerShard]; + + int i = 0; + Iterator it = shard.blockPositionIterator(); + while( it.hasNext()) { + final long[] position = it.next(); + + final byte[] arr = new byte[blockSize]; + random.nextBytes(arr); + + blocks[i++] = new ByteArrayDataBlock(blockSize(), position, arr); + } + return blocks; + } + + protected long[] dimensions() { + return new long[] { blockSize * blocksPerShard }; + } + + protected int[] shardSize() { + return GridIterator.long2int(dimensions()); + } + + protected int[] blockSize() { + return new int[] { blockSize }; + } + + protected IndexLocation indexLocation() { + return IndexLocation.valueOf(indexLocation); + } + + private void readShardBatch(Blackhole blackhole, String path) { + + VirtualShard shard = (VirtualShard) n5Writer.getShard(path, attrs, 0); + List> blks = shard.getBlocks(blockIndexes); + for( DataBlock blk : blks) + blackhole.consume(blk); + + } + + private void readShardWhole(Blackhole blackhole, String path) { + + InMemoryShard shard; + try { + final KeyValueAccess kva = n5Writer.getKeyValueAccess(); + final String shardPath = n5Writer.absoluteDataBlockPath(path, 0); + shard = InMemoryShard.readShard(kva, shardPath, new long[]{ 0 }, attrs); + for (DataBlock blk : shard.getBlocks(blockIndexes)) + blackhole.consume(blk); + + } catch (IOException e) { } + + } + + private void readBlock(Blackhole blackhole, String path) { + + VirtualShard shard = (VirtualShard) n5Writer.getShard(path, attrs, 0); + DataBlock blk; + for( long[] blockPosition : blockPositions ) { + blk = shard.getBlock(blockPosition); + blackhole.consume(blk); + } + } + + @Benchmark + public void run(Blackhole blackhole) throws IOException { + + if (readMethod.equals("SHARD_BATCH")) + readShardBatch(blackhole, dset); + else if (readMethod.equals("SHARD_WHOLE")) + readShardWhole(blackhole, dset); + else if (readMethod.equals("BLOCK")) + readBlock(blackhole, dset); + } + + public int[] sizes() { + + try { + final Param ann = ShardReadBenchmarks.class.getDeclaredField("objectSizeBytes") + .getAnnotation(Param.class); + System.out.println(Arrays.toString(ann.value())); + return Arrays.stream(ann.value()).mapToInt(Integer::parseInt).toArray(); + + } catch (final NoSuchFieldException e) { + e.printStackTrace(); + } catch (final SecurityException e) { + e.printStackTrace(); + } + + return null; + } + + public static void main(String... args) throws RunnerException { + + final Options options = new OptionsBuilder().include(ShardReadBenchmarks.class.getSimpleName() + "\\.") + .build(); + + new Runner(options).run(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardWriteBenchmarks.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardWriteBenchmarks.java new file mode 100644 index 0000000..6eba64e --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/benchmarks/ShardWriteBenchmarks.java @@ -0,0 +1,179 @@ +package org.janelia.saalfeldlab.n5.universe.benchmarks; + +import java.io.File; +import java.io.IOException; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.ByteArrayDataBlock; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; +import org.janelia.saalfeldlab.n5.shard.InMemoryShard; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * Compare the speed of writing in append-only mode vs "packed" (read-write) mode. + */ +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 100, timeUnit = TimeUnit.MICROSECONDS) +@Measurement(iterations = 50, time = 100, timeUnit = TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(1) +public class ShardWriteBenchmarks { + + @Param(value = {"1000"}) + protected int blockSize; + + @Param(value = {"50"}) + protected int numBlocks; + + @Param(value = {"50", "75"}) + protected int numBlockWrites; + + @Param(value = {"append", "readWrite"}) + protected String writeMode; + + protected String baseDir; + protected KeyValueAccess kva; + protected Random random; + + protected N5Writer n5; + private ShardedDatasetAttributes attributes; + protected List> blocks; + + public ShardWriteBenchmarks() {} + + @TearDown(Level.Trial) + public void teardown() { + + System.out.println("teardown"); + n5.remove(); + n5.close(); + } + + @Setup(Level.Trial) + public void setup() { + + random = new Random(); + kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); + + File tmpFile; + try { + tmpFile = Files.createTempDirectory("shardWriteBenchmark-").toFile(); + baseDir = tmpFile.getCanonicalPath(); + } catch (final IOException e) { + e.printStackTrace(); + } + + genDataBlocks(); + genAttributes(); + + n5 = new N5Factory().openWriter(baseDir); + n5.createDataset("", attributes); + } + + protected void genAttributes() { + + final long[] dimensions = new long[]{blockSize * numBlocks}; + final int[] blkSize = new int[]{blockSize}; + final int[] shardSize = new int[]{blockSize * numBlocks}; + + attributes = new ShardedDatasetAttributes( + dimensions, + shardSize, + blkSize, + DataType.UINT8, + new Codec[]{new N5BlockCodec(), new GzipCompression(4)}, + new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, + IndexLocation.END + ); + } + + protected void genDataBlocks() { + + blocks = IntStream.range(0, numBlockWrites).mapToObj( i -> { + + final byte[] data = new byte[blockSize]; + random.nextBytes(data); + + final int position = i % numBlocks; + return new ByteArrayDataBlock(new int[]{blockSize}, new long[] {position}, data); + + }).collect(Collectors.toList()); + } + + public void append() { + + for (DataBlock blk : blocks) { + n5.writeBlock(baseDir, attributes, blk); + } + } + + public void readWrite() { + + for (DataBlock blk : blocks) { + + @SuppressWarnings("unchecked") + final Shard vshard = (Shard) n5.readShard("", attributes, 0); + final InMemoryShard shard = InMemoryShard.fromShard(vshard); + shard.addBlock(blk); + + n5.writeShard("", attributes, shard); + } + } + + @Benchmark + public void run() throws IOException { + + if( writeMode.equals("readWrite")) { + readWrite(); + } + else { + append(); + } + } + + public static void main(String... args) throws RunnerException { + + final Options options = new OptionsBuilder().include(ShardWriteBenchmarks.class.getSimpleName() + "\\.") + .build(); + + new Runner(options).run(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/CodecDemo.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/CodecDemo.java new file mode 100644 index 0000000..c05d0a8 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/CodecDemo.java @@ -0,0 +1,177 @@ +package org.janelia.saalfeldlab.n5.universe.demo; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.FilterInputStream; +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.function.IntUnaryOperator; + +import org.janelia.saalfeldlab.n5.GsonUtils; +import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +public class CodecDemo { + + public static void main(String[] args) throws IOException { + + final String message = "my son is also named bort"; + + encodeDecode(message); + customCodecs(message); + composeCodecs(message); + serializeCodec(); + } + + public static void encodeDecode(final String message) throws IOException { + final Codec.BytesCodec codec = new GzipCompression(); + + // encode + byte[] encodedData; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + OutputStream encodedOs = codec.encode(out); + + encodedOs.write(message.getBytes()); + encodedOs.close(); + encodedData = out.toByteArray(); + + // decode + byte[] decodedData = new byte[message.getBytes().length]; + ByteArrayInputStream is = new ByteArrayInputStream(encodedData); + InputStream decodedIs = codec.decode(is); + decodedIs.read(decodedData); + + System.out.println("\nGzip Codec:"); + System.out.println("original message: " + message); + System.out.println("encoded messsage: " + new String(encodedData)); + System.out.println("decoded messsage: " + new String(decodedData)); + } + + public static void customCodecs(final String message) throws IOException { + + final FunctionCodec addCodec = new FunctionCodec( x -> x + 3, x -> x - 3); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + OutputStream encodedOut = addCodec.encode(out); + encodedOut.write(message.getBytes()); + + System.out.println("\nAdd Codec:"); + System.out.println(message); + System.out.println(new String(out.toByteArray())); + } + + public static void composeCodecs(final String message) throws IOException { + + final FunctionCodec subtractCodec = new FunctionCodec(x -> x - 32, x -> x + 32); + final FunctionCodec noNegativesCodec = new FunctionCodec(x -> x > 0 ? x : 32, x -> x); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + OutputStream encodedOut = Codec.encode(out, noNegativesCodec, subtractCodec); + encodedOut.write(message.getBytes()); + + System.out.println("\nComposed Codec:"); + System.out.println(message); + System.out.println(new String(out.toByteArray())); + } + + public static void serializeCodec() throws IOException { + + final FancyCodec codec = new FancyCodec(); + + final GsonBuilder gsonBuilder = new GsonBuilder(); + GsonUtils.registerGson(gsonBuilder); + final Gson gson = gsonBuilder.create(); + + System.out.println( gson.toJson(codec) ); + } + + /* + * Not actually useful. For demonstration purposes only. + */ + public static class FunctionCodec implements Codec.BytesCodec { + + private static final long serialVersionUID = 999L; + + public static final String TYPE = "add"; + + private IntUnaryOperator encode; + private IntUnaryOperator decode; + + public FunctionCodec(IntUnaryOperator encode, IntUnaryOperator decode) { + this.encode = encode; + this.decode = decode; + } + + @Override + public String getType() { + return TYPE; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + return new FilterInputStream(in) { + + public int read() throws IOException { + return decode.applyAsInt(in.read()); + } + }; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + return new FilterOutputStream(out) { + public void write(int b) throws IOException { + out.write(encode.applyAsInt(b)); + } + }; + } + } + + @NameConfig.Name("fancy") + public static class FancyCodec implements Codec.BytesCodec { + + private static final long serialVersionUID = -1785908861729837317L; + + @NameConfig.Parameter + private final int rizz; + + @NameConfig.Parameter(optional = true) + private final String swag; + + public FancyCodec() { + this(99, "hella"); + } + + public FancyCodec(int rizz, String swag) { + + this.rizz = rizz; + this.swag = swag; + } + + @Override + public String getType() { + return "fancy"; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + return in; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + return out; + } + + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/ShardWritingDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/ShardWritingDemos.java new file mode 100644 index 0000000..2b96be7 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/ShardWritingDemos.java @@ -0,0 +1,286 @@ +package org.janelia.saalfeldlab.n5.universe.demo; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.function.IntUnaryOperator; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.GsonKeyValueN5Writer; +import org.janelia.saalfeldlab.n5.IntArrayDataBlock; +import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; +import org.janelia.saalfeldlab.n5.imglib2.N5Utils; +import org.janelia.saalfeldlab.n5.shard.InMemoryShard; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.VirtualShard; +import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.janelia.scicomp.n5.zstandard.ZstandardCompression; + +import net.imglib2.img.array.ArrayCursor; +import net.imglib2.img.array.ArrayImg; +import net.imglib2.img.array.ArrayImgs; +import net.imglib2.img.basictypeaccess.array.IntArray; +import net.imglib2.type.numeric.integer.IntType; + +public class ShardWritingDemos { + + public static void main(String[] args) throws IOException { + + highLevel(); + + midLevel(); + midLevelBatch(); + + lowLevelVirtual(); + lowLevelBatch(); + } + + public static void highLevel() { + + final ArrayImg img = generateData(32, 27); + + try (final N5Writer zarr = new N5Factory().openWriter("zarr3:/home/john/tests/codeReview/sharded.zarr")) { + + N5Utils.save(img, zarr, + "highLevel", // dataset path + new int[] { 16, 9 }, // shard size + new int[] { 4, 3 }, // block size + new ZstandardCompression()); + } + } + + public static void midLevel() { + + final long[] imageSize = new long[] { 32, 27 }; + final int[] shardSize = new int[] { 16, 9 }; + final int[] blockSize = new int[] { 4, 3 }; + final int numBlockElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); + + try( final N5Writer zarr = new N5Factory().openWriter("zarr3:/home/john/tests/codeReview/sharded.zarr") ) { + + /* + * If you need control over everything, this is here. + */ + final ShardedDatasetAttributes attributes = new ShardedDatasetAttributes( + imageSize, + shardSize, + blockSize, + DataType.INT32, + new Codec[]{ + // codecs applied to image data + new BytesCodec(ByteOrder.BIG_ENDIAN) + }, + new DeterministicSizeCodec[]{ + // codecs applied to the shard index, must not be compressors + new BytesCodec(ByteOrder.LITTLE_ENDIAN), + new Crc32cChecksumCodec() + }, + IndexLocation.START + ); + + // manually create a dataset + zarr.createDataset("midLevel", attributes); + + // manually write a few blocks + zarr.writeBlock("midLevel", attributes, + new IntArrayDataBlock( blockSize, new long[] {1,1}, generateArray(numBlockElements, x -> 1))); + + zarr.writeBlock("midLevel", attributes, + new IntArrayDataBlock( blockSize, new long[] {0,1}, generateArray(numBlockElements, x -> 2))); + + zarr.writeBlock("midLevel", attributes, + new IntArrayDataBlock( blockSize, new long[] {4,0}, generateArray(numBlockElements, x -> 3))); + } + + } + + public static void midLevelBatch() { + + final String dset = "midLevelBatch"; + final long[] imageSize = new long[] { 32, 27 }; + final int[] shardSize = new int[] { 16, 9 }; + final int[] blockSize = new int[] { 4, 3 }; + final int numBlockElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); + + try( final N5Writer zarr = new N5Factory().openWriter("zarr3:/home/john/tests/codeReview/sharded.zarr") ) { + + /* + * If you need control over everything, this is here. + */ + final ShardedDatasetAttributes attributes = new ShardedDatasetAttributes( + imageSize, + shardSize, + blockSize, + DataType.INT32, + new Codec[]{ + // codecs applied to image data + new BytesCodec(ByteOrder.BIG_ENDIAN) + }, + new DeterministicSizeCodec[]{ + // codecs applied to the shard index, must not be compressors + new BytesCodec(ByteOrder.LITTLE_ENDIAN), + new Crc32cChecksumCodec() + }, + IndexLocation.START + ); + + // manually create a dataset + zarr.createDataset(dset, attributes); + + // Manually write several blocks + // In this case, the blocks that belong to the same shard are combined before writing + + // should this be a collection? + // alternatively, this could be a generator over blocks + // eventually need to also pass an ExecutorService here + + // could also be a generator of a collection + // we'd like to have an iterator over "writable units" + // where the writable units are blocks are shards. + // downstream code would be responsible for filling those units + zarr.writeBlocks(dset, attributes, + new IntArrayDataBlock( blockSize, new long[] {1,1}, generateArray(numBlockElements, x -> 1)), + new IntArrayDataBlock( blockSize, new long[] {0,1}, generateArray(numBlockElements, x -> 2)), + new IntArrayDataBlock( blockSize, new long[] {4,0}, generateArray(numBlockElements, x -> 3)) + ); + + } + + } + + public static void lowLevelVirtual() { + + final String dset = "lowLevelVirtual"; + final long[] imageSize = new long[] { 32, 27 }; + final int[] shardSize = new int[] { 16, 9 }; + final int[] blockSize = new int[] { 4, 3 }; + final int numBlockElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); + + try( final GsonKeyValueN5Writer zarr = (GsonKeyValueN5Writer)new N5Factory().openWriter("zarr3:/home/john/tests/codeReview/sharded.zarr") ) { + + final ShardedDatasetAttributes attributes = new ShardedDatasetAttributes( + imageSize, + shardSize, + blockSize, + DataType.INT32, + new Codec[]{ + // codecs applied to image data + new BytesCodec(ByteOrder.BIG_ENDIAN) + }, + new DeterministicSizeCodec[]{ + // codecs applied to the shard index, must not be compressors + new BytesCodec(ByteOrder.LITTLE_ENDIAN), + new Crc32cChecksumCodec() + }, + IndexLocation.END + ); + + // manually create a dataset + zarr.createDataset(dset, attributes); + + /* + * Programmer's reponsibility to create shards, and to determine + * which blocks go in which shard. + */ + final VirtualShard shard00 = new VirtualShard<>( + attributes, + new long[]{0,0}, + zarr.getKeyValueAccess(), + zarr.absoluteDataBlockPath(dset, 0, 0) // path for this shard + ); + + // + + // write to disk + + // the block location here could be relative to the shard + shard00.writeBlock(new IntArrayDataBlock(blockSize, new long[] {1,1}, generateArray(numBlockElements, x -> 1))); + // write to disk + shard00.writeBlock(new IntArrayDataBlock(blockSize, new long[] {0,1}, generateArray(numBlockElements, x -> 2))); + + final VirtualShard shard10 = new VirtualShard<>( + attributes, + new long[]{1,0}, + zarr.getKeyValueAccess(), + zarr.absoluteDataBlockPath(dset, 1, 0) // path for this shard + ); + shard10.writeBlock(new IntArrayDataBlock( blockSize, new long[] {4,0}, generateArray(numBlockElements, x -> 3))); + } + } + + public static void lowLevelBatch() throws IOException { + + final String dset = "lowLevelBatch"; + final long[] imageSize = new long[] { 32, 27 }; + final int[] shardSize = new int[] { 16, 9 }; + final int[] blockSize = new int[] { 4, 3 }; + final int numBlockElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); + + try( final GsonKeyValueN5Writer zarr = (GsonKeyValueN5Writer)new N5Factory().openWriter("zarr3:/home/john/tests/codeReview/sharded.zarr") ) { + + final ShardedDatasetAttributes attributes = new ShardedDatasetAttributes( + imageSize, + shardSize, + blockSize, + DataType.INT32, + new Codec[]{ + // codecs applied to image data + new BytesCodec(ByteOrder.BIG_ENDIAN) + }, + new DeterministicSizeCodec[]{ + // codecs applied to the shard index, must not be compressors + new BytesCodec(ByteOrder.LITTLE_ENDIAN), + new Crc32cChecksumCodec() + }, + IndexLocation.END + ); + + // manually create a dataset + zarr.createDataset("lowLevelBatch", attributes); + + /* + * Programmer's reponsibility to create shards, and to determine + * which blocks go in which shard. + */ + final InMemoryShard shard00 = new InMemoryShard<>(attributes, new long[] { 0, 0 }); + shard00.addBlock(new IntArrayDataBlock(blockSize, new long[] { 1, 1 }, generateArray(numBlockElements, x -> 1))); + shard00.addBlock(new IntArrayDataBlock(blockSize, new long[] { 0, 1 }, generateArray(numBlockElements, x -> 2))); + + // write to disk + shard00.write(zarr.getKeyValueAccess(), zarr.absoluteDataBlockPath(dset, 0, 0)); + + final InMemoryShard shard10 = new InMemoryShard<>( attributes, new long[]{1,0}); + shard10.addBlock(new IntArrayDataBlock( blockSize, new long[] {4,0}, generateArray(numBlockElements, x -> 3))); + + // write to disk + shard10.write(zarr.getKeyValueAccess(), zarr.absoluteDataBlockPath(dset, 1, 0)); + } + } + + public static int[] generateArray( int size ) { + return IntStream.range(0, size).toArray(); + } + + public static int[] generateArray( int size, IntUnaryOperator f ) { + return IntStream.range(0, size).map(f) .toArray(); + } + + public static ArrayImg generateData( long... size) { + + ArrayImg img = ArrayImgs.ints(size); + int i = 0; + final ArrayCursor c = img.cursor(); + while (c.hasNext()) { + c.next().set(i++); + } + return img; + } + + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/StartImageJ.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/StartImageJ.java index 06d28d5..6a20838 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/StartImageJ.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/demo/StartImageJ.java @@ -1,31 +1,3 @@ -/*- - * #%L - * Mastodon - * %% - * Copyright (C) 2014 - 2024 Tobias Pietzsch, Jean-Yves Tinevez - * %% - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * #L% - */ package org.janelia.saalfeldlab.n5.universe.demo; import org.scijava.Context;