From a2139e36d5e7355f79097257567214d62d193eb4 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 15:38:18 +0100 Subject: [PATCH 01/15] M0 implementation Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 30 +++++- .../nextflow/data/cid/CidObserver.groovy | 95 +++++++++++++++++-- .../groovy/nextflow/data/cid/CidStore.groovy | 4 + .../nextflow/data/cid/DefaultCidStore.groovy | 4 + .../nextflow/data/cid/model/DataType.groovy | 2 +- .../{TaskOutput.groovy => Output.groovy} | 6 +- .../nextflow/data/cid/model/TaskRun.groovy | 1 + .../nextflow/data/cid/model/Workflow.groovy | 36 +++++++ .../data/cid/model/WorkflowRun.groovy | 35 +++++++ .../nextflow/data/config/DataConfig.groovy | 2 +- .../nextflow/processor/PublishDir.groovy | 25 ++++- .../trace/DefaultObserverFactory.groovy | 4 +- .../src/main/nextflow/file/FileHelper.groovy | 24 ++++- 13 files changed, 252 insertions(+), 16 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/data/cid/model/{TaskOutput.groovy => Output.groovy} (94%) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 09152bc301..ca846bfcd8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,8 @@ package nextflow +import nextflow.util.CacheHelper + import java.nio.file.Files import java.nio.file.Path import java.nio.file.Paths @@ -254,6 +256,14 @@ class Session implements ISession { private boolean statsEnabled + private volatile boolean cidEnabled + + boolean getCidEnabled() { cidEnabled } + + private HashCode executionHash + + String getExecutionHash() { executionHash } + private WorkflowMetadata workflowMetadata private WorkflowStatsObserver statsObserver @@ -393,6 +403,10 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) + if (config.cid) { + this.cidEnabled = true + } + } protected Path cloudCachePath(Map cloudcache, Path workDir) { @@ -405,12 +419,27 @@ class Session implements ISession { } return result } + private HashCode generateExecutionHash(ScriptFile scriptFile){ + List keys = [generateScriptHash(scriptFile).toString(), scriptFile?.repository, scriptFile?.commitId, uniqueId, (Map)config.params] + return CacheHelper.hasher(keys).hash() + } + + private HashCode generateScriptHash(ScriptFile scriptFile){ + List keys = [ scriptFile?.scriptId ] + for( Path p : ScriptMeta.allScriptNames().values() ){ + keys << CacheHelper.hasher(p.text).hash().toString() + } + return CacheHelper.hasher(keys).hash() + } /** * Initialize the session workDir, libDir, baseDir and scriptName variables */ Session init( ScriptFile scriptFile, List args=null ) { + if(cidEnabled) { + this.executionHash = generateExecutionHash(scriptFile) + } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" @@ -439,7 +468,6 @@ class Session implements ISession { binding.setArgs( new ScriptRunner.ArgsList(args) ) cache = CacheFactory.create(uniqueId,runName).open() - return this } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 348c9ad1d5..0e11526754 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -17,6 +17,11 @@ package nextflow.data.cid +import com.google.common.hash.HashCode +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowRun +import nextflow.file.FileHelper + import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes @@ -25,7 +30,7 @@ import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session import nextflow.data.cid.model.DataType -import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.Output import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler import nextflow.processor.TaskRun @@ -42,19 +47,43 @@ import nextflow.util.CacheHelper class CidObserver implements TraceObserver { private CidStore store + private Session session @Override void onFlowCreate(Session session) { + this.session = session store = new DefaultCidStore() store.open(DataConfig.create(session)) } + void onFlowBegin() { + storeWorkflowRun() + } + + protected void storeWorkflowRun() { + final workflow = new Workflow( + DataType.Workflow, + session.workflowMetadata.scriptFile.toString(), + session.workflowMetadata.scriptId.toString(), + session.workflowMetadata.repository, + session.workflowMetadata.commitId + ) + final value = new WorkflowRun( + DataType.WorkflowRun, + workflow, + session.uniqueId.toString(), + session.runName, + session.params + ) + final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) + store.save("${session.executionHash}/.data.json", content) + } @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) } - void storeTaskInfo(TaskRun task) { + protected void storeTaskInfo(TaskRun task) { // store the task run entry storeTaskRun(task) // store all task outputs files @@ -76,7 +105,9 @@ class CidObserver implements TraceObserver { DataType.Task, task.id.value, task.getName(), - task.hash.toString() ) + task.hash.toString(), + convertToReferences(task.inputFilesMap) + ) // store in the underlying persistence final key = "${value.hash}/.data.json" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) @@ -86,14 +117,13 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() final cid = "${task.hash}/${rel}" - final uri = "cid://${cid}" final key = "${cid}/.data.json" final hash = CacheHelper.hasher(path).hash().toString() - final value = new TaskOutput( + final value = new Output( DataType.Output, - uri, - path.toUriString(), + path.toString(), hash, + "cid://$task.hash", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) @@ -104,4 +134,55 @@ class CidObserver implements TraceObserver { protected BasicFileAttributes readAttributes(Path path) { Files.readAttributes(path, BasicFileAttributes) } + + @Override + void onFilePublish(Path destination, Path source){ + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = session.outputDir.relativize(destination).toString() + final key = "${rel}/.data.json" + final sourceReference = getSourceReference(source) + final attrs = readAttributes(destination) + final value = new Output( + DataType.Output, + destination.toString(), + hash, + sourceReference, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + String getSourceReference(Path source){ + final hash = FileHelper.getTaskHashFromPath(source, session.workDir) + if (hash) { + final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() + return "cid://$hash/$target" + } + return null + } + + @Override + void onFilePublish(Path destination){ + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = session.outputDir.relativize(destination).toString() + final attrs = readAttributes(destination) + final value = new Output( + DataType.Output, + destination.toString(), + hash, + session.executionHash, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + store.save(rel, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected Map convertToReferences(Map inputs) { + Map references = new HashMap() + inputs.each { name, path -> + final ref = getSourceReference(path) + references.put(name, ref ? ref : path.toString())} + return references + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy index 6591e67b0c..67017f0bf0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid +import java.nio.file.Path import java.util.function.Consumer import groovy.transform.CompileStatic @@ -36,4 +37,7 @@ interface CidStore { Object load(String key) + Path getPath() + + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index ae6faaeceb..1e7c22cec8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -64,4 +64,8 @@ class DefaultCidStore implements CidStore { location.resolve(key).text } + @Override + Path getPath(){ location } + + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy index 23cfc19d03..ccacbb145d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -22,5 +22,5 @@ package nextflow.data.cid.model * @author Paolo Di Tommaso */ enum DataType { - Task, Workflow, Output + Task, Workflow, WorkflowRun, Output } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy similarity index 94% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy rename to modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy index 6467d36c6f..738f843cc6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy @@ -26,11 +26,11 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class TaskOutput { +class Output { DataType type - String uri - String realPath + String path String hash + String source long size long createdAt long modifiedAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 2b91df426c..fea557ee08 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -31,5 +31,6 @@ class TaskRun { int id String name String hash + Map inputs List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy new file mode 100644 index 0000000000..a52ddbd814 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -0,0 +1,36 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + + +/** + * + * @author Jorge Ejarque = 2) { + final bucket = relativePath.getName(0).toString() + if (bucket.size() == 2) { + final strHash = bucket + relativePath.getName(1).toString() + try { + return HashCode.fromString(strHash) + } catch (Throwable e) { + log.debug("String '${strHash}' is not a valid hash", e) + } + } + } + } + return null + } } From fddc5f77b35d358ce0bfd72a7bce1a119c44993b Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 16:58:04 +0100 Subject: [PATCH 02/15] fix tests Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserver.groovy | 2 +- .../nextflow/processor/PublishDir.groovy | 2 +- .../nextflow/data/cid/CidObserverTest.groovy | 31 ++++++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 0e11526754..ef1757e2ca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -106,7 +106,7 @@ class CidObserver implements TraceObserver { task.id.value, task.getName(), task.hash.toString(), - convertToReferences(task.inputFilesMap) + task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence final key = "${value.hash}/.data.json" diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index fc82cb80e0..e36fa051b1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -146,7 +146,7 @@ class PublishDir { final resolved = value instanceof Closure ? value.call() : value if( resolved instanceof String || resolved instanceof GString ) nullPathWarn = checkNull(resolved.toString()) - if( session.cidEnabled ){ + if( session?.cidEnabled ){ final resolvedPath = FileHelper.toPath(resolved) if (resolvedPath.isAbsolute()){ log.warn("CID store is enabled but publish dir is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index fe06bb0bb7..233520c96a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -17,6 +17,9 @@ package nextflow.data.cid +import groovy.json.JsonOutput +import nextflow.util.CacheHelper + import java.nio.file.Files import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.FileTime @@ -36,7 +39,7 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [cid:[store:[location:folder.toString()]]] def session = Mock(Session) { getConfig()>>config } def observer = new CidObserver() observer.onFlowCreate(session) @@ -51,7 +54,7 @@ class CidObserverTest extends Specification { when: observer.storeTaskRun(task) then: - folder.resolve(hash.toString()).text == '{"id":100,"name":"foo","hash":"15cd5b07","annotations":null}' + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","hash":"15cd5b07","inputs": null,"annotations":null}') cleanup: folder?.deleteDir() @@ -60,7 +63,7 @@ class CidObserverTest extends Specification { def 'should save task output' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [cid:[store:[location:folder.toString()]]] def session = Mock(Session) { getConfig()>>config } def observer = Spy(new CidObserver()) observer.onFlowCreate(session) @@ -71,6 +74,7 @@ class CidObserverTest extends Specification { def outFile = workDir.resolve('foo/bar/file.bam') Files.createDirectories(outFile.parent) outFile.text = 'some data' + def fileHash = CacheHelper.hasher(outFile).hash().toString() and: def hash = HashCode.fromInt(123456789) and: @@ -81,21 +85,24 @@ class CidObserverTest extends Specification { getWorkDir() >> workDir } and: - def ts1 = Instant.ofEpochMilli(1737914400) - def ts2 = Instant.ofEpochMilli(1737914500) - def attrs = Mock(BasicFileAttributes) { - size() >> 100 - creationTime() >> FileTime.from(ts1) - lastModifiedTime() >> FileTime.from(ts2) - } + def attrs = Files.readAttributes(outFile, BasicFileAttributes) + def expectedString = '{"type":"Output",' + + '"path":"' + outFile.toString() + '",' + + '"hash":"'+ fileHash + '",' + + '"source":"cid://15cd5b07",' + + '"size":'+attrs.size() + ',' + + '"createdAt":' + attrs.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + and: observer.readAttributes(outFile) >> attrs when: observer.storeTaskOutput(task, outFile) then: - folder.resolve("${hash}/foo/bar/file.bam").text - == '{"uri":"cid://15cd5b07/foo/bar/file.bam","size":100,"createdAt":1737914400,"modifiedAt":1737914500,"annotations":null}' + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text + == JsonOutput.prettyPrint(expectedString) cleanup: folder?.deleteDir() From fe780a854148210ef519da565110fa5d71fbfef3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 19:34:46 +0100 Subject: [PATCH 03/15] fix tests Signed-off-by: jorgee --- modules/nf-commons/src/main/nextflow/file/FileHelper.groovy | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index db577b6e08..430222ade5 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -256,6 +256,9 @@ class FileHelper { } static Path toCanonicalPath(value) { + if( value==null ) + return null + Path result = toPath(value) if( result.fileSystem != FileSystems.default ) { From f9f7ed221777811051e9cefec1c48577ef997546 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 14 Feb 2025 14:16:14 +0100 Subject: [PATCH 04/15] first M1 updates Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 14 ++ .../main/groovy/nextflow/cli/CmdCid.groovy | 166 +++++++++++++++++- .../main/groovy/nextflow/cli/CmdLog.groovy | 1 + .../nextflow/data/cid/CidObserver.groovy | 25 +-- .../nextflow/data/cid/DefaultCidStore.groovy | 16 +- .../nextflow/data/cid/model/TaskRun.groovy | 1 - .../nextflow/data/cid/model/Workflow.groovy | 4 +- .../nextflow/processor/PublishDir.groovy | 15 +- .../nextflow/script/ScriptRunner.groovy | 3 +- .../groovy/nextflow/util/HistoryFile.groovy | 46 ++++- .../nextflow/dag/mermaid.dag.template.html | 2 +- 11 files changed, 250 insertions(+), 43 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index ca846bfcd8..78a7e9293b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,9 @@ package nextflow +import nextflow.data.cid.CidStore +import nextflow.data.cid.DefaultCidStore +import nextflow.data.config.DataConfig import nextflow.util.CacheHelper import java.nio.file.Files @@ -262,6 +265,10 @@ class Session implements ISession { private HashCode executionHash + private CidStore cidStore + + CidStore getCidStore() { cidStore } + String getExecutionHash() { executionHash } private WorkflowMetadata workflowMetadata @@ -405,6 +412,8 @@ class Session implements ISession { if (config.cid) { this.cidEnabled = true + this.cidStore = new DefaultCidStore() + this.cidStore.open(DataConfig.create(this)) } } @@ -439,6 +448,11 @@ class Session implements ISession { if(cidEnabled) { this.executionHash = generateExecutionHash(scriptFile) + this.outputDir = cidStore.getPath().resolve(executionHash.toString()) + log.warn("CID store enabled. Defined output directory will be ignored and set to ${outputDir}.") + if( !HistoryFile.disabled() && HistoryFile.DEFAULT.exists() ) { + HistoryFile.DEFAULT.updateCidHash(runName,executionHash.toString()) + } } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index d6d242fd6d..72d92bd0a5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -18,10 +18,24 @@ package nextflow.cli import com.beust.jcommander.Parameter +import groovy.json.JsonSlurper +import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.config.ConfigBuilder +import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidStore +import nextflow.data.cid.DefaultCidStore +import nextflow.data.cid.model.DataType +import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import java.nio.file.Path +import java.nio.file.Paths + +import static nextflow.data.cid.CidObserver.* + /** * * @author Paolo Di Tommaso @@ -33,13 +47,15 @@ class CmdCid extends CmdBase { interface SubCmd { String getName() - void apply(List result) - void usage(List result) + void apply(List args) + void usage() } private List commands = new ArrayList<>() CmdCid() { + commands << new CmdShow() + commands << new CmdLineage() } @@ -75,4 +91,150 @@ class CmdCid extends CmdBase { msg += " -- Did you mean one of these?\n" + matches.collect { " $it"}.join('\n') throw new AbortOperationException(msg) } + + class CmdShow implements SubCmd{ + + @Override + String getName() { + return 'show' + } + + @Override + void apply(List args) { + if (args.size() != 1) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + final store = session.cidStore + println store.load("${args[0]}/$METADATA_FILE").toString() + } + + @Override + void usage() { + println 'Usage: nextflow cid show ' + } + } + + + class CmdLineage implements SubCmd { + + @Canonical + class Edge { + String source + String destination + String label + } + + @Override + String getName() { 'lineage' } + + @Override + void apply(List args) { + if (args.size() != 2) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + try { + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + final store = session.cidStore + final template = readTemplate() + final network = getLineage(store, args[0]) + Path file = Path.of(args[1]) + file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) + println("Linage graph for ${args[0]} rendered in ${args[1]}") + } catch (Throwable e) { + println("ERROR: rendering lineage graph. ${e.getLocalizedMessage()}") + } + } + + private String getLineage(CidStore store, String dataCid) { + def lines = [] as List + lines << "flowchart BT".toString() + + final nodesToRender = new LinkedList() + nodesToRender.add(dataCid) + final edgesToRender = new LinkedList() + while (!nodesToRender.isEmpty()) { + final node = nodesToRender.removeFirst() + processNode(lines, node, nodesToRender, edgesToRender, store) + } + lines << "" + edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } + lines << "" + return lines.join('\n') + } + + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { + final slurper = new JsonSlurper() + final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map + switch (DataType.valueOf(cidObject.type as String)) { + case DataType.Output: + lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); + final source = cidObject.source as String + if (source) { + if (source.startsWith(CID_PROT)) { + final cid = source.substring(CID_PROT.size()) + nodes.add(cid) + edges.add(new Edge(cid, nodeToRender)) + } else { + lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + + break; + case DataType.WorkflowRun: + lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() + final parameters = cidObject.params as Map + parameters.values().each { + lines << " ${it}@{shape: document, label: \"${it}\"}".toString(); + edges.add(new Edge(it.toString(), nodeToRender)) + } + break; + case DataType.Task: + lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() + final parameters = cidObject.inputs as Map + parameters.values().each { String source -> + if (source.startsWith(CID_PROT)) { + final cid = source.substring(CID_PROT.size()) + nodes.add(cid) + edges.add(new Edge(cid, nodeToRender)) + } else { + lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + break; + default: + throw new Exception("Unrecognized type reference ${cidObject.type}") + } + } + + private String readTemplate() { + final writer = new StringWriter() + final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') + int ch + while( (ch=res.read()) != -1 ) { + writer.append(ch as char) + } + writer.toString() + } + + @Override + void usage() { + println 'Usage: nextflow cid lineage ' + } + + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy index 66d88980d1..0c6d4356f3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy @@ -203,6 +203,7 @@ class CmdLog extends CmdBase implements CacheBase { .head('STATUS') .head('REVISION ID') .head('SESSION ID') + .head('CID HASH') .head('COMMAND') history.eachRow { List row -> diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index ef1757e2ca..cf739b96a1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -21,6 +21,7 @@ import com.google.common.hash.HashCode import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper +import nextflow.script.ScriptMeta import java.nio.file.Files import java.nio.file.Path @@ -31,7 +32,6 @@ import groovy.transform.CompileStatic import nextflow.Session import nextflow.data.cid.model.DataType import nextflow.data.cid.model.Output -import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam @@ -45,15 +45,15 @@ import nextflow.util.CacheHelper */ @CompileStatic class CidObserver implements TraceObserver { - + public static final String METADATA_FILE = '.data.json' + public static final String CID_PROT = 'cid://' private CidStore store private Session session @Override void onFlowCreate(Session session) { this.session = session - store = new DefaultCidStore() - store.open(DataConfig.create(session)) + this.store = session.cidStore } void onFlowBegin() { @@ -64,7 +64,7 @@ class CidObserver implements TraceObserver { final workflow = new Workflow( DataType.Workflow, session.workflowMetadata.scriptFile.toString(), - session.workflowMetadata.scriptId.toString(), + ScriptMeta.allScriptNames().values().collect { it.toString()}, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -76,7 +76,7 @@ class CidObserver implements TraceObserver { session.params ) final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) - store.save("${session.executionHash}/.data.json", content) + store.save("${session.executionHash}/$METADATA_FILE", content) } @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { @@ -109,7 +109,7 @@ class CidObserver implements TraceObserver { task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence - final key = "${value.hash}/.data.json" + final key = "${value.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } @@ -117,13 +117,13 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() final cid = "${task.hash}/${rel}" - final key = "${cid}/.data.json" + final key = "${cid}/$METADATA_FILE" final hash = CacheHelper.hasher(path).hash().toString() final value = new Output( DataType.Output, path.toString(), hash, - "cid://$task.hash", + "$CID_PROT$task.hash", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) @@ -139,7 +139,7 @@ class CidObserver implements TraceObserver { void onFilePublish(Path destination, Path source){ final hash = CacheHelper.hasher(destination).hash().toString() final rel = session.outputDir.relativize(destination).toString() - final key = "${rel}/.data.json" + final key = "$session.executionHash/${rel}/$METADATA_FILE" final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) final value = new Output( @@ -157,7 +157,7 @@ class CidObserver implements TraceObserver { final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() - return "cid://$hash/$target" + return "$CID_PROT$hash/$target" } return null } @@ -166,6 +166,7 @@ class CidObserver implements TraceObserver { void onFilePublish(Path destination){ final hash = CacheHelper.hasher(destination).hash().toString() final rel = session.outputDir.relativize(destination).toString() + final key = "$session.executionHash/${rel}/$METADATA_FILE" final attrs = readAttributes(destination) final value = new Output( DataType.Output, @@ -175,7 +176,7 @@ class CidObserver implements TraceObserver { attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) - store.save(rel, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } protected Map convertToReferences(Map inputs) { diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 1e7c22cec8..958cc4ef49 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -34,18 +34,20 @@ import nextflow.exception.AbortOperationException @CompileStatic class DefaultCidStore implements CidStore { + private Path metaLocation private Path location void open(DataConfig config) { - location = config.store.location.resolve('.meta') - if( !Files.exists(location) && !Files.createDirectories(location) ) { - throw new AbortOperationException("Unable to create CID store directory: $location") + location = config.store.location + metaLocation = location.resolve('.meta') + if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { + throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } } @Override void save(String key, Object value) { - final path = location.resolve(key) + final path = metaLocation.resolve(key) Files.createDirectories(path.parent) log.debug "Save CID file path: $path" path.text = value @@ -53,15 +55,15 @@ class DefaultCidStore implements CidStore { @Override void list(String key, Consumer consumer) { - for( Path it : Files.walk(location.resolve(key)) ) { - final fileKey = location.relativize(it).toString() + for( Path it : Files.walk(metaLocation.resolve(key)) ) { + final fileKey = metaLocation.relativize(it).toString() consumer.accept(fileKey) } } @Override Object load(String key) { - location.resolve(key).text + metaLocation.resolve(key).text } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index fea557ee08..22318cdeda 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -30,7 +30,6 @@ class TaskRun { DataType type int id String name - String hash Map inputs List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy index a52ddbd814..643af9ec7e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -29,8 +29,8 @@ import groovy.transform.CompileStatic @CompileStatic class Workflow { DataType type - String scriptFile - String scriptId + String mainScriptFile + List otherScriptFiles String repository String commitId } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index e36fa051b1..fce784b543 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -149,11 +149,11 @@ class PublishDir { if( session?.cidEnabled ){ final resolvedPath = FileHelper.toPath(resolved) if (resolvedPath.isAbsolute()){ - log.warn("CID store is enabled but publish dir is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") + log.warn("CID store is enabled but 'publishDir' is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") this.path = FileHelper.toCanonicalPath(resolved) } else{ - this.path = session.outputDir.resolve(session.executionHash).resolve(resolvedPath) + this.path = session.outputDir.resolve(resolvedPath) } } else { @@ -385,17 +385,6 @@ class PublishDir { throw new IllegalArgumentException("Not a valid publish target path: `$target` [${target?.class?.name}]") } - private Path resolveRelative(String target){ - //If comes from a task - if (session.cidEnabled && sourceDir && sourceDir.startsWith(session.workDir)){ - log.debug("Must add taskhash") - String taskHash = FileHelper.getTaskHashFromPath(sourceDir, session.workDir) - if( taskHash ) - return path.resolve(Path.of(taskHash, target.toString())) - } - return path.resolve(target) - } - protected void safeProcessFile(Path source, Path target) { try { retryableProcessFile(source, target) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy index 498d50f41f..1ac700e44f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy @@ -284,7 +284,8 @@ class ScriptRunner { } def revisionId = scriptFile.commitId ?: scriptFile.scriptId - HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, cli ) + def executionHash = session.executionHash ?: '-' + HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, executionHash, cli ) } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy index 15d5cb83ca..92a8e93cbe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy @@ -61,14 +61,14 @@ class HistoryFile extends File { super(file.toString()) } - void write( String name, UUID key, String revisionId, args, Date date = null ) { + void write( String name, UUID key, String revisionId, String cidHash, args, Date date = null ) { assert key assert args != null withFileLock { def timestamp = date ?: new Date() def value = args instanceof Collection ? args.join(' ') : args - this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, command: value).toString() << '\n' + this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, cidHash: cidHash, command: value).toString() << '\n' } } @@ -350,6 +350,41 @@ class HistoryFile extends File { } + void updateCidHash(String name, String hashCode) { + assert name + assert hashCode + try { + withFileLock {updateCidHash0(name, hashCode) } + } + catch( Throwable e ) { + log.warn "Can't update history file: $this",e + } + } + + private void updateCidHash0(String name, String hashCode){ + def newHistory = new StringBuilder() + + this.readLines().each { line -> + try { + def current = line ? Record.parse(line) : null + if( current?.runName == name ) { + current.cidHash = hashCode + newHistory << current.toString() << '\n' + } + else { + newHistory << line << '\n' + } + } + catch( IllegalArgumentException e ) { + log.warn("Can't read history file: $this", e) + } + } + + // rewrite the history content + this.setText(newHistory.toString()) + } + + @EqualsAndHashCode(includes = 'runName,sessionId') static class Record { Date timestamp @@ -358,6 +393,7 @@ class HistoryFile extends File { String status String revisionId UUID sessionId + String cidHash String command Record(String sessionId, String name=null) { @@ -380,6 +416,7 @@ class HistoryFile extends File { line << (status ?: '-') line << (revisionId ?: '-') line << (sessionId.toString()) + line << (cidHash ?: '-') line << (command ?: '-') } @@ -393,7 +430,7 @@ class HistoryFile extends File { if( cols.size() == 2 ) return new Record(cols[0]) - if( cols.size()==7 ) { + if( cols.size()== 8 ) { return new Record( timestamp: TIMESTAMP_FMT.parse(cols[0]), @@ -402,7 +439,8 @@ class HistoryFile extends File { status: cols[3] && cols[3] != '-' ? cols[3] : null, revisionId: cols[4], sessionId: UUID.fromString(cols[5]), - command: cols[6] + cidHash: cols[6], + command: cols[7] ) } diff --git a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html index 0ab1d9475e..ebbf8e834a 100644 --- a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html +++ b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html @@ -36,7 +36,7 @@ REPLACE_WITH_NETWORK_DATA From 0c2492e5b742a88eb87a11995cd634e5a79f74d1 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 14 Feb 2025 14:57:15 +0100 Subject: [PATCH 05/15] fix tests Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserver.groovy | 3 +- .../groovy/nextflow/cli/CmdLogTest.groovy | 4 +- .../nextflow/data/cid/CidObserverTest.groovy | 17 ++++- .../nextflow/util/HistoryFileTest.groovy | 76 ++++++++++++------- 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index cf739b96a1..a313e925e9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -105,11 +105,10 @@ class CidObserver implements TraceObserver { DataType.Task, task.id.value, task.getName(), - task.hash.toString(), task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence - final key = "${value.hash}/$METADATA_FILE" + final key = "${task.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy index 8deff84359..56aced51b9 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy @@ -97,7 +97,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','run') + history.write(runName,uuid,'b3d3aca8eb','-','run') when: def log = new CmdLog(basePath: folder, args: [runName]) @@ -167,7 +167,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','run') + history.write(runName,uuid,'b3d3aca8eb','-','run') when: diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 233520c96a..80c3295f5d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -18,6 +18,7 @@ package nextflow.data.cid import groovy.json.JsonOutput +import nextflow.data.config.DataConfig import nextflow.util.CacheHelper import java.nio.file.Files @@ -40,7 +41,12 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] - def session = Mock(Session) { getConfig()>>config } + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) def observer = new CidObserver() observer.onFlowCreate(session) and: @@ -54,7 +60,7 @@ class CidObserverTest extends Specification { when: observer.storeTaskRun(task) then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","hash":"15cd5b07","inputs": null,"annotations":null}') + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","inputs": null,"annotations":null}') cleanup: folder?.deleteDir() @@ -64,7 +70,12 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] - def session = Mock(Session) { getConfig()>>config } + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) def observer = Spy(new CidObserver()) observer.onFlowCreate(session) and: diff --git a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy index 4233f744b1..c867304d70 100644 --- a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy @@ -32,10 +32,10 @@ class HistoryFileTest extends Specification { b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa -resume 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa -2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' def 'should support custom base dir' () { @@ -66,9 +66,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa def d1 = new Date(now - 50_000) def d2 = new Date(now - 30_000) def d3 = new Date(now - 10_000) - history.write( 'hello_world', id1, 'abc', [1,2,3], d1 ) - history.write( 'super_star', id2, '123', [1,2,3], d2 ) - history.write( 'slow_food', id3, 'xyz', [1,2,3], d3 ) + history.write( 'hello_world', id1, 'abc', '-', [1,2,3], d1 ) + history.write( 'super_star', id2, '123', '-', [1,2,3], d2 ) + history.write( 'slow_food', id3, 'xyz', '-', [1,2,3], d3 ) then: history.getLast() == new HistoryRecord(sessionId: id3, runName: 'slow_food', timestamp: d3, command: '1 2 3') @@ -243,9 +243,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa then: history.text == ''' 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa - 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello - 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume - 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello + 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello + 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume + 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' .stripIndent() } @@ -306,14 +306,38 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.findAllRunNames() == ['evil_pike', 'gigantic_keller', 'small_cirum', 'modest_bartik'] as Set } + def 'should update cid hash ' () { + given: + def source = ''' +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +''' + def file = Files.createTempFile('test',null) + file.deleteOnExit() + file.text = source + def history = new HistoryFile(file) + + + when: + history.updateCidHash('evil_pike','cid_hash') + then: + history.text == ''' +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\tcid_hash\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +''' + } def 'should update the history entries ' () { given: def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' def file = Files.createTempFile('test',null) file.deleteOnExit() @@ -326,10 +350,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('evil_pike',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' when: @@ -337,10 +361,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('small_cirum',false,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' when: @@ -348,10 +372,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('gigantic_keller',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' } From 41ac817f87ded4dd6e2fcf2f7734dafc7cbd82a5 Mon Sep 17 00:00:00 2001 From: jorgee Date: Mon, 17 Feb 2025 18:15:43 +0100 Subject: [PATCH 06/15] update descriptions Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdCid.groovy | 9 +- .../nextflow/data/cid/CidObserver.groovy | 58 +++++++++--- .../nextflow/data/cid/model/DataType.groovy | 2 +- .../nextflow/data/cid/model/Output.groovy | 2 +- .../nextflow/data/cid/model/TaskRun.groovy | 11 ++- .../data/cid/model/WorkflowRun.groovy | 2 +- .../nextflow/processor/TaskProcessor.groovy | 4 +- .../groovy/nextflow/processor/TaskRun.groovy | 4 + .../nextflow/util/PathNormalizer.groovy | 93 +++++++++++++++++++ 9 files changed, 159 insertions(+), 26 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 72d92bd0a5..a27bfdfec7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -179,7 +179,8 @@ class CmdCid extends CmdBase { final slurper = new JsonSlurper() final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map switch (DataType.valueOf(cidObject.type as String)) { - case DataType.Output: + case DataType.TaskOutput: + case DataType.WorkflowOutput: lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); final source = cidObject.source as String if (source) { @@ -202,10 +203,10 @@ class CmdCid extends CmdBase { edges.add(new Edge(it.toString(), nodeToRender)) } break; - case DataType.Task: + case DataType.TaskRun: lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as Map - parameters.values().each { String source -> + final parameters = cidObject.inputs as List + parameters.each { String source -> if (source.startsWith(CID_PROT)) { final cid = source.substring(CID_PROT.size()) nodes.add(cid) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index a313e925e9..f59b582f60 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -17,11 +17,12 @@ package nextflow.data.cid -import com.google.common.hash.HashCode +import groovy.util.logging.Slf4j import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper import nextflow.script.ScriptMeta +import nextflow.util.PathNormalizer import java.nio.file.Files import java.nio.file.Path @@ -43,6 +44,7 @@ import nextflow.util.CacheHelper * * @author Paolo Di Tommaso */ +@Slf4j @CompileStatic class CidObserver implements TraceObserver { public static final String METADATA_FILE = '.data.json' @@ -61,10 +63,12 @@ class CidObserver implements TraceObserver { } protected void storeWorkflowRun() { + final normalizer = new PathNormalizer(session.workflowMetadata) + final mainScript = normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()) final workflow = new Workflow( DataType.Workflow, - session.workflowMetadata.scriptFile.toString(), - ScriptMeta.allScriptNames().values().collect { it.toString()}, + mainScript, + ScriptMeta.allScriptNames().values().collect {normalizer.normalizePath(it.normalize())}, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -73,19 +77,36 @@ class CidObserver implements TraceObserver { workflow, session.uniqueId.toString(), session.runName, - session.params + getNormalizedParams(session.params, normalizer) ) final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) store.save("${session.executionHash}/$METADATA_FILE", content) } + + private static Map getNormalizedParams(Map params, PathNormalizer normalizer){ + final normalizedParams = new HashMap() + params.each{String key, Object value -> + log.debug("Managing parameter $key , class ${value.class}") + if (value instanceof Path) + normalizedParams.put(key,normalizer.normalizePath(value as Path)) + else if (value instanceof String || value instanceof GString) + normalizedParams.put(key,normalizer.normalizePath(value.toString())) + else + normalizedParams.put(key, value) + } + return normalizedParams + } + + @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) } protected void storeTaskInfo(TaskRun task) { + final pathNormalizer = new PathNormalizer(session.workflowMetadata) // store the task run entry - storeTaskRun(task) + storeTaskRun(task, pathNormalizer) // store all task outputs files final outputs = task.getOutputsByType(FileOutParam) for( Map.Entry entry : outputs ) { @@ -100,12 +121,19 @@ class CidObserver implements TraceObserver { } } - protected void storeTaskRun(TaskRun task) { + protected void storeTaskRun(TaskRun task, PathNormalizer normalizer) { final value = new nextflow.data.cid.model.TaskRun( - DataType.Task, - task.id.value, + DataType.TaskRun, + session.uniqueId.toString(), task.getName(), - task.inputFilesMap ? convertToReferences(task.inputFilesMap): null + session.stubRun ? task.stubSource: task.source, + task.inputFilesMap ? convertToReferences(task.inputFilesMap, normalizer): null, + task.isContainerEnabled() ? task.getContainerFingerprint(): null, + normalizer.normalizePath(task.getCondaEnv()), + normalizer.normalizePath(task.getSpackEnv()), + task.config?.getArchitecture()?.toString(), + task.processor.getTaskGlobalVars(task), + task.processor.getTaskBinEntries(task.source).collect { Path p -> normalizer.normalizePath(p.normalize()) } ) // store in the underlying persistence final key = "${task.hash}/$METADATA_FILE" @@ -119,7 +147,7 @@ class CidObserver implements TraceObserver { final key = "${cid}/$METADATA_FILE" final hash = CacheHelper.hasher(path).hash().toString() final value = new Output( - DataType.Output, + DataType.TaskOutput, path.toString(), hash, "$CID_PROT$task.hash", @@ -142,7 +170,7 @@ class CidObserver implements TraceObserver { final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) final value = new Output( - DataType.Output, + DataType.WorkflowOutput, destination.toString(), hash, sourceReference, @@ -168,7 +196,7 @@ class CidObserver implements TraceObserver { final key = "$session.executionHash/${rel}/$METADATA_FILE" final attrs = readAttributes(destination) final value = new Output( - DataType.Output, + DataType.WorkflowOutput, destination.toString(), hash, session.executionHash, @@ -178,11 +206,11 @@ class CidObserver implements TraceObserver { store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } - protected Map convertToReferences(Map inputs) { - Map references = new HashMap() + protected List convertToReferences(Map inputs, PathNormalizer normalizer) { + List references = new LinkedList() inputs.each { name, path -> final ref = getSourceReference(path) - references.put(name, ref ? ref : path.toString())} + references.add(ref ? ref : normalizer.normalizePath(path))} return references } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy index ccacbb145d..955f131e9e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -22,5 +22,5 @@ package nextflow.data.cid.model * @author Paolo Di Tommaso */ enum DataType { - Task, Workflow, WorkflowRun, Output + TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy index 738f843cc6..610168b129 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy @@ -29,7 +29,7 @@ import groovy.transform.CompileStatic class Output { DataType type String path - String hash + String checksum String source long size long createdAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 22318cdeda..857f2ee099 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -28,8 +28,15 @@ import groovy.transform.CompileStatic @CompileStatic class TaskRun { DataType type - int id + String sessionId String name - Map inputs + String source + List inputs + String container + String conda + String spack + String architecture + Map globalVars + List binEntries List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy index cdc67e50ba..e99cdd8425 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy @@ -29,7 +29,7 @@ import groovy.transform.CompileStatic class WorkflowRun { DataType type Workflow workflow - String uniqueId + String sessionId String name Map params } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 05a668c82e..b0bf67aaca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2274,7 +2274,7 @@ class TaskProcessor { * @return The list of paths of scripts in the project bin folder referenced in the task command */ @Memoized - protected List getTaskBinEntries(String script) { + public List getTaskBinEntries(String script) { List result = [] def tokenizer = new StringTokenizer(script," \t\n\r\f()[]{};&|<>`") while( tokenizer.hasMoreTokens() ) { @@ -2307,7 +2307,7 @@ class TaskProcessor { log.info(buffer.toString()) } - protected Map getTaskGlobalVars(TaskRun task) { + public Map getTaskGlobalVars(TaskRun task) { final result = task.getGlobalVars(ownerScript.binding) final directives = getTaskExtensionDirectiveVars(task) result.putAll(directives) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index bde46722f1..df3395d9e8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -979,5 +979,9 @@ class TaskRun implements Cloneable { CondaConfig getCondaConfig() { return processor.session.getCondaConfig() } + + String getStubSource(){ + return config?.getStubBlock()?.source + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy new file mode 100644 index 0000000000..7da3c5a925 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy @@ -0,0 +1,93 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.util + +import groovy.transform.CompileStatic +import nextflow.script.WorkflowMetadata + +import java.nio.file.Path + +/** + * + * @author Ben Sherman + */ +@CompileStatic +class PathNormalizer { + + private URL repository + + private String commitId + + private String projectDir + + private String workDir + + PathNormalizer(WorkflowMetadata metadata) { + repository = metadata.repository ? new URL(metadata.repository) : null + commitId = metadata.commitId + projectDir = metadata.projectDir.normalize().toUriString() + workDir = metadata.workDir.normalize().toUriString() + } + + /** + * Normalize paths against the original remote URL, or + * work directory, where appropriate. + * + * @param path + */ + String normalizePath(Path path) { + normalizePath(path.toUriString()) + } + + String normalizePath(String path) { + if(!path) + return null + // replace work directory with relative path + if( path.startsWith(workDir) ) + return path.replace(workDir, 'work') + + // replace project directory with source URL (if applicable) + if( repository && path.startsWith(projectDir) ) + return getProjectSourceUrl(path) + + // encode local absolute paths as file URLs + if( path.startsWith('/') ) + return 'file://' + path + + return path + } + + /** + * Get the source URL for a project asset. + * + * @param path + */ + private String getProjectSourceUrl(String path) { + switch( repository.host ) { + case 'bitbucket.org': + return path.replace(projectDir, "${repository}/src/${commitId}") + case 'github.com': + return path.replace(projectDir, "${repository}/tree/${commitId}") + case 'gitlab.com': + return path.replace(projectDir, "${repository}/-/tree/${commitId}") + default: + return path + } + } + +} From cdc31163d021cd29a381f38d76f537e3ae252ad3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Mon, 17 Feb 2025 18:53:31 +0100 Subject: [PATCH 07/15] fix test Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserverTest.groovy | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 80c3295f5d..a5c1a3c426 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -19,9 +19,12 @@ package nextflow.data.cid import groovy.json.JsonOutput import nextflow.data.config.DataConfig +import nextflow.processor.TaskProcessor import nextflow.util.CacheHelper +import nextflow.util.PathNormalizer import java.nio.file.Files +import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.FileTime import java.time.Instant @@ -42,9 +45,11 @@ class CidObserverTest extends Specification { def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] def store = new DefaultCidStore(); + def uniqueId = UUID.randomUUID() def session = Mock(Session) { getConfig()>>config getCidStore()>>store + getUniqueId()>>uniqueId } store.open(DataConfig.create(session)) def observer = new CidObserver() @@ -52,15 +57,31 @@ class CidObserverTest extends Specification { and: def hash = HashCode.fromInt(123456789) and: + def processor = Mock(TaskProcessor){ + getTaskGlobalVars(_) >> [:] + getTaskBinEntries(_) >> [] + } def task = Mock(TaskRun) { getId() >> TaskId.of(100) getName() >> 'foo' getHash() >> hash + getProcessor() >> processor + getSource() >> 'echo task source' + } + def normalizer = Mock(PathNormalizer.class) { + normalizePath( _ as Path) >> {Path p -> p?.toString()} + normalizePath( _ as String) >> {String p -> p} } + def expectedString = '{"type":"TaskRun",' + + '"sessionId":"'+uniqueId.toString() + '",' + + '"name":"foo","source":"echo task source",' + + '"inputs": null,"container": null,"conda": null,' + + '"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}' when: - observer.storeTaskRun(task) + observer.storeTaskRun(task, normalizer) then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","inputs": null,"annotations":null}') + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint(expectedString) cleanup: folder?.deleteDir() @@ -97,9 +118,9 @@ class CidObserverTest extends Specification { } and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) - def expectedString = '{"type":"Output",' + + def expectedString = '{"type":"TaskOutput",' + '"path":"' + outFile.toString() + '",' + - '"hash":"'+ fileHash + '",' + + '"checksum":"'+ fileHash + '",' + '"source":"cid://15cd5b07",' + '"size":'+attrs.size() + ',' + '"createdAt":' + attrs.creationTime().toMillis() + ',' + From 82b1ccd140912abd47c1c3abb523acafa189ed26 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 14:21:56 +0100 Subject: [PATCH 08/15] First commit to M1 implementation Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 36 +- .../main/groovy/nextflow/cli/CmdCid.groovy | 134 ++++-- .../main/groovy/nextflow/cli/CmdLog.groovy | 1 - .../nextflow/data/cid/CidHistoryFile.groovy | 144 +++++++ .../nextflow/data/cid/CidObserver.groovy | 287 +++++++++---- .../groovy/nextflow/data/cid/CidStore.groovy | 1 + .../nextflow/data/cid/DefaultCidStore.groovy | 11 +- .../nextflow/data/cid/fs/CidFileSystem.groovy | 130 ++++++ .../data/cid/fs/CidFileSystemProvider.groovy | 320 +++++++++++++++ .../nextflow/data/cid/fs/CidPath.groovy | 381 ++++++++++++++++++ .../data/cid/fs/CidPathFactory.groovy | 61 +++ .../nextflow/data/cid/model/DataPath.groovy | 33 ++ .../nextflow/data/cid/model/DataType.groovy | 3 +- .../nextflow/data/cid/model/Parameter.groovy | 34 ++ .../nextflow/data/cid/model/TaskRun.groovy | 7 +- .../nextflow/data/cid/model/Workflow.groovy | 5 +- .../data/cid/model/WorkflowResults.groovy | 34 ++ .../data/cid/model/WorkflowRun.groovy | 3 +- .../nextflow/data/config/DataConfig.groovy | 8 +- .../nextflow/processor/PublishDir.groovy | 14 +- .../nextflow/script/ScriptRunner.groovy | 3 +- .../trace/DefaultObserverFactory.groovy | 2 +- .../groovy/nextflow/util/HistoryFile.groovy | 93 +---- .../groovy/nextflow/util/WithLockFile.groovy | 78 ++++ .../java.nio.file.spi.FileSystemProvider | 17 + .../groovy/nextflow/cli/CmdCidTest.groovy | 258 ++++++++++++ .../groovy/nextflow/cli/CmdLogTest.groovy | 4 +- .../data/cid/CidHistoryFileTest.groovy | 158 ++++++++ .../nextflow/data/cid/CidObserverTest.groovy | 263 +++++++++++- .../cid/fs/CidFileSystemProviderTest.groovy | 372 +++++++++++++++++ .../nextflow/data/cid/fs/CidPathTest.groovy | 280 +++++++++++++ .../data/cid/fs/CifPathFactoryTest.groovy | 88 ++++ .../nextflow/util/HistoryFileTest.groovy | 76 ++-- .../src/main/nextflow/file/FileHelper.groovy | 2 + 34 files changed, 3036 insertions(+), 305 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy create mode 100644 modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider create mode 100644 modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 78a7e9293b..a7e7f79565 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -19,7 +19,6 @@ package nextflow import nextflow.data.cid.CidStore import nextflow.data.cid.DefaultCidStore import nextflow.data.config.DataConfig -import nextflow.util.CacheHelper import java.nio.file.Files import java.nio.file.Path @@ -263,14 +262,10 @@ class Session implements ISession { boolean getCidEnabled() { cidEnabled } - private HashCode executionHash - private CidStore cidStore CidStore getCidStore() { cidStore } - String getExecutionHash() { executionHash } - private WorkflowMetadata workflowMetadata private WorkflowStatsObserver statsObserver @@ -410,14 +405,19 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) - if (config.cid) { + if(config.navigate('workflow.data')) { this.cidEnabled = true - this.cidStore = new DefaultCidStore() - this.cidStore.open(DataConfig.create(this)) + this.cidStore = createCidStore(this) } } + protected static CidStore createCidStore(Session session){ + final store = new DefaultCidStore() + store.open(DataConfig.create(session)) + return store + } + protected Path cloudCachePath(Map cloudcache, Path workDir) { if( !cloudcache?.enabled ) return null @@ -428,32 +428,12 @@ class Session implements ISession { } return result } - private HashCode generateExecutionHash(ScriptFile scriptFile){ - List keys = [generateScriptHash(scriptFile).toString(), scriptFile?.repository, scriptFile?.commitId, uniqueId, (Map)config.params] - return CacheHelper.hasher(keys).hash() - } - - private HashCode generateScriptHash(ScriptFile scriptFile){ - List keys = [ scriptFile?.scriptId ] - for( Path p : ScriptMeta.allScriptNames().values() ){ - keys << CacheHelper.hasher(p.text).hash().toString() - } - return CacheHelper.hasher(keys).hash() - } /** * Initialize the session workDir, libDir, baseDir and scriptName variables */ Session init( ScriptFile scriptFile, List args=null ) { - if(cidEnabled) { - this.executionHash = generateExecutionHash(scriptFile) - this.outputDir = cidStore.getPath().resolve(executionHash.toString()) - log.warn("CID store enabled. Defined output directory will be ignored and set to ${outputDir}.") - if( !HistoryFile.disabled() && HistoryFile.DEFAULT.exists() ) { - HistoryFile.DEFAULT.updateCidHash(runName,executionHash.toString()) - } - } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index a27bfdfec7..3b17ca35b8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -24,17 +24,18 @@ import groovy.transform.CompileStatic import nextflow.Session import nextflow.config.ConfigBuilder import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidHistoryFile import nextflow.data.cid.CidStore -import nextflow.data.cid.DefaultCidStore import nextflow.data.cid.model.DataType -import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import nextflow.ui.TableBuilder import java.nio.file.Path import java.nio.file.Paths -import static nextflow.data.cid.CidObserver.* +import static nextflow.data.cid.fs.CidPath.CID_PROT +import static nextflow.data.cid.fs.CidPath.METADATA_FILE /** * @@ -54,9 +55,11 @@ class CmdCid extends CmdBase { private List commands = new ArrayList<>() CmdCid() { + commands << new CmdLog() commands << new CmdShow() commands << new CmdLineage() + } @Parameter(hidden = true) @@ -92,6 +95,51 @@ class CmdCid extends CmdBase { throw new AbortOperationException(msg) } + class CmdLog implements SubCmd { + + @Override + String getName() { + return 'log' + } + + @Override + void apply(List args) { + if (args.size() != 0) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + printHistory(session.cidStore) + + } + + private void printHistory(CidStore store) { + + + final historyFile = store.getHistoryFile() + if (historyFile.exists()) { + def table = new TableBuilder(cellSeparator: '\t') + .head('TIMESTAMP') + .head('RUN NAME') + .head('SESSION ID') + .head('RUN CID') + historyFile.eachLine { table.append(CidHistoryFile.CidRecord.parse(it).toList()) } + println table.toString() + } else { + println("No workflow runs CIDs found.") + } + } + + @Override + void usage() { + println 'Usage: nextflow cid log' + } + } class CmdShow implements SubCmd{ @Override @@ -106,13 +154,20 @@ class CmdCid extends CmdBase { usage() return } + if (!args[0].startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") + final key = args[0].substring(CID_PROT.size()) + "/$METADATA_FILE" final config = new ConfigBuilder() .setOptions(getLauncher().getOptions()) .setBaseDir(Paths.get('.')) .build() final session = new Session(config) final store = session.cidStore - println store.load("${args[0]}/$METADATA_FILE").toString() + try { + println store.load(key).toString() + }catch (Throwable e){ + println "Error loading ${args[0]}." + } } @Override @@ -154,7 +209,7 @@ class CmdCid extends CmdBase { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) println("Linage graph for ${args[0]} rendered in ${args[1]}") } catch (Throwable e) { - println("ERROR: rendering lineage graph. ${e.getLocalizedMessage()}") + println("ERROR: rendering lineage graph. ${e.message}") } } @@ -176,8 +231,11 @@ class CmdCid extends CmdBase { } private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { + if (!nodeToRender.startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") final slurper = new JsonSlurper() - final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map + final key = nodeToRender.substring(CID_PROT.size()) + "/$METADATA_FILE" + final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map switch (DataType.valueOf(cidObject.type as String)) { case DataType.TaskOutput: case DataType.WorkflowOutput: @@ -185,11 +243,11 @@ class CmdCid extends CmdBase { final source = cidObject.source as String if (source) { if (source.startsWith(CID_PROT)) { - final cid = source.substring(CID_PROT.size()) - nodes.add(cid) - edges.add(new Edge(cid, nodeToRender)) + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) } else { - lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + final label = convertToLabel(source) + lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); edges.add(new Edge(source, nodeToRender)) } } @@ -197,23 +255,23 @@ class CmdCid extends CmdBase { break; case DataType.WorkflowRun: lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() - final parameters = cidObject.params as Map - parameters.values().each { - lines << " ${it}@{shape: document, label: \"${it}\"}".toString(); - edges.add(new Edge(it.toString(), nodeToRender)) + final parameters = cidObject.params as List + parameters.each { + final label = convertToLabel(it.value.toString()) + lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(it.value.toString(), nodeToRender)) } break; case DataType.TaskRun: lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as List - parameters.each { String source -> - if (source.startsWith(CID_PROT)) { - final cid = source.substring(CID_PROT.size()) - nodes.add(cid) - edges.add(new Edge(cid, nodeToRender)) + final parameters = cidObject.inputs as List + for (nextflow.data.cid.model.Parameter source: parameters){ + if (source.type.equals(nextflow.script.params.FileInParam.simpleName)) { + manageFileInParam(lines, nodeToRender, nodes, edges, source.value) } else { - lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); - edges.add(new Edge(source, nodeToRender)) + final label = convertToLabel(source.value.toString()) + lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source.value.toString(), nodeToRender)) } } break; @@ -222,7 +280,37 @@ class CmdCid extends CmdBase { } } - private String readTemplate() { + private String convertToLabel(String label){ + return label.replace('http', 'h\u200Ettp') + } + + private void manageFileInParam(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, value){ + if (value instanceof Collection) { + value.each { manageFileInParam(lines, nodeToRender, nodes, edges, it) } + return + } + if (value instanceof CharSequence) { + final source = value.toString() + if (source.startsWith(CID_PROT)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + return + } + } + if (value instanceof Map) { + if (value.path) { + final label = convertToLabel(value.path.toString()) + lines << " ${value.path}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(value.path.toString(), nodeToRender)) + return + } + } + final label = convertToLabel(value.toString()) + lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(value.toString(), nodeToRender)) + } + + protected static String readTemplate() { final writer = new StringWriter() final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') int ch diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy index 0c6d4356f3..66d88980d1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy @@ -203,7 +203,6 @@ class CmdLog extends CmdBase implements CacheBase { .head('STATUS') .head('REVISION ID') .head('SESSION ID') - .head('CID HASH') .head('COMMAND') history.eachRow { List row -> diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy new file mode 100644 index 0000000000..07b4e24b16 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy @@ -0,0 +1,144 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import groovy.transform.EqualsAndHashCode +import groovy.util.logging.Slf4j +import nextflow.util.WithLockFile + +import java.nio.file.Path +import java.text.DateFormat +import java.text.SimpleDateFormat + +/** + * File to store a history of the workflow executions and their corresponding CIDs + * + * @author Jorge Ejarque + */ +@Slf4j +class CidHistoryFile extends WithLockFile { + private static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + + CidHistoryFile(Path file) { + super(file.toString()) + } + + void write(String name, UUID key, String runCid, Date date = null) { + assert key + + withFileLock { + def timestamp = date ?: new Date() + log.debug("Writting record for $key in CID history file $this") + this << new CidRecord(timestamp: timestamp, runName: name, sessionId: key, runCid: runCid).toString() << '\n' + } + } + + void update(UUID sessionId, String runCid) { + assert sessionId + + try { + withFileLock { update0(sessionId, runCid) } + } + catch (Throwable e) { + log.warn "Can't update cid history file: $this", e + } + } + + String getRunCid(UUID id){ + assert id + + for (String line: this.readLines()){ + def current = line ? CidRecord.parse(line) : null + if (current.sessionId == id) { + return current.runCid + } + } + log.warn("Can't find session $id in CID history file $this") + return null + } + + private void update0(UUID id, String runCid) { + assert id + def newHistory = new StringBuilder() + + this.readLines().each { line -> + try { + def current = line ? CidRecord.parse(line) : null + if (current.sessionId == id) { + log.debug("Updating record for $id in CID history file $this") + current.runCid = runCid + newHistory << current.toString() << '\n' + } else { + newHistory << line << '\n' + } + } + catch (IllegalArgumentException e) { + log.warn("Can't read CID history file: $this", e) + } + } + + // rewrite the history content + this.setText(newHistory.toString()) + } + + @EqualsAndHashCode(includes = 'runName,sessionId') + static class CidRecord { + Date timestamp + String runName + UUID sessionId + String runCid + + CidRecord(UUID sessionId, String name = null) { + this.runName = name + this.sessionId = sessionId + } + + protected CidRecord() {} + + List toList() { + def line = new ArrayList(4) + line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') + line << (runName ?: '-') + line << (sessionId.toString()) + line << (runCid ?: '-') + } + + @Override + String toString() { + toList().join('\t') + } + + static CidRecord parse(String line) { + def cols = line.tokenize('\t') + if (cols.size() == 2) + return new CidRecord(UUID.fromString(cols[0])) + + if (cols.size() == 4) { + + return new CidRecord( + timestamp: TIMESTAMP_FMT.parse(cols[0]), + runName: cols[1], + sessionId: UUID.fromString(cols[2]), + runCid: cols[3] + ) + } + + throw new IllegalArgumentException("Not a valid history entry: `$line`") + } + } + +} \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index f59b582f60..fe33c95032 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -18,11 +18,19 @@ package nextflow.data.cid import groovy.util.logging.Slf4j +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.WorkflowResults import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper +import nextflow.file.FileHolder import nextflow.script.ScriptMeta +import nextflow.script.params.DefaultInParam +import nextflow.script.params.FileInParam +import nextflow.script.params.InParam import nextflow.util.PathNormalizer +import nextflow.util.TestOnly import java.nio.file.Files import java.nio.file.Path @@ -40,35 +48,69 @@ import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper +import static nextflow.data.cid.fs.CidPath.CID_PROT +import static nextflow.data.cid.fs.CidPath.METADATA_FILE /** + * Observer to write the generated workflow metadata in a CID store. * * @author Paolo Di Tommaso */ @Slf4j @CompileStatic class CidObserver implements TraceObserver { - public static final String METADATA_FILE = '.data.json' - public static final String CID_PROT = 'cid://' + + private String executionHash private CidStore store private Session session + private WorkflowResults workflowResults + private Map outputsStoreDirCid = new HashMap(10) - @Override - void onFlowCreate(Session session) { + CidObserver(Session session){ this.session = session this.store = session.cidStore } + @Override + void onFlowCreate(Session session) { + this.store.getHistoryFile().write(session.runName, session.uniqueId, '-') + } + + @TestOnly + String getExecutionHash(){ executionHash } + + @Override void onFlowBegin() { - storeWorkflowRun() + this.executionHash = storeWorkflowRun() + workflowResults = new WorkflowResults( + DataType.WorkflowResults, + "$CID_PROT${executionHash}", + new ArrayList()) + this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${this.executionHash}") } - protected void storeWorkflowRun() { + @Override + void onFlowComplete(){ + if (this.workflowResults){ + final content = JsonOutput.prettyPrint(JsonOutput.toJson(workflowResults)) + final wfResultsHash = CacheHelper.hasher(content).hash().toString() + this.store.save("${wfResultsHash}/$METADATA_FILE", content) + this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${wfResultsHash}") + } + } + + protected String storeWorkflowRun() { final normalizer = new PathNormalizer(session.workflowMetadata) - final mainScript = normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()) + final mainScript = new DataPath( + normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), + session.workflowMetadata.scriptId + ) final workflow = new Workflow( DataType.Workflow, mainScript, - ScriptMeta.allScriptNames().values().collect {normalizer.normalizePath(it.normalize())}, + ScriptMeta.allScriptNames().values().collect { new DataPath( + normalizer.normalizePath(it.normalize()), + CacheHelper.hasher(it.text).hash().toString()) + }, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -79,20 +121,22 @@ class CidObserver implements TraceObserver { session.runName, getNormalizedParams(session.params, normalizer) ) + final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) - store.save("${session.executionHash}/$METADATA_FILE", content) + final executionHash = CacheHelper.hasher(content).hash().toString() + store.save("${executionHash}/$METADATA_FILE", content) + return executionHash } - private static Map getNormalizedParams(Map params, PathNormalizer normalizer){ - final normalizedParams = new HashMap() + private static List getNormalizedParams(Map params, PathNormalizer normalizer){ + final normalizedParams = new LinkedList() params.each{String key, Object value -> - log.debug("Managing parameter $key , class ${value.class}") - if (value instanceof Path) - normalizedParams.put(key,normalizer.normalizePath(value as Path)) - else if (value instanceof String || value instanceof GString) - normalizedParams.put(key,normalizer.normalizePath(value.toString())) + if( value instanceof Path ) + normalizedParams.add( new Parameter( Path.class.simpleName, key, normalizer.normalizePath( value as Path ) ) ) + else if ( value instanceof CharSequence ) + normalizedParams.add( new Parameter( String.class.simpleName, key, normalizer.normalizePath( value.toString() ) ) ) else - normalizedParams.put(key, value) + normalizedParams.add( new Parameter( value.class.simpleName, key, value) ) } return normalizedParams } @@ -109,53 +153,92 @@ class CidObserver implements TraceObserver { storeTaskRun(task, pathNormalizer) // store all task outputs files final outputs = task.getOutputsByType(FileOutParam) - for( Map.Entry entry : outputs ) { - final value = entry.value - if( value instanceof Path ) { - storeTaskOutput(task, (Path)value) - } - else if( value instanceof Collection ) { - for( Path it : value ) - storeTaskOutput(task, (Path)it) + outputs.forEach { FileOutParam key, Object value -> manageFileOutParams(value, task)} + + } + + private void manageFileOutParams( Object value, TaskRun task) { + if (value instanceof Path) { + storeTaskOutput(task, (Path) value) + } else if (value instanceof Collection) { + for (Path it : value) { + storeTaskOutput(task, (Path) it) } } } - protected void storeTaskRun(TaskRun task, PathNormalizer normalizer) { + protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final value = new nextflow.data.cid.model.TaskRun( DataType.TaskRun, session.uniqueId.toString(), task.getName(), - session.stubRun ? task.stubSource: task.source, - task.inputFilesMap ? convertToReferences(task.inputFilesMap, normalizer): null, + CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), + task.inputs ? manageInputs(task.inputs, normalizer): null, task.isContainerEnabled() ? task.getContainerFingerprint(): null, normalizer.normalizePath(task.getCondaEnv()), normalizer.normalizePath(task.getSpackEnv()), task.config?.getArchitecture()?.toString(), task.processor.getTaskGlobalVars(task), - task.processor.getTaskBinEntries(task.source).collect { Path p -> normalizer.normalizePath(p.normalize()) } - ) + task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath(normalizer.normalizePath(p.normalize()), + CacheHelper.hasher(p).hash().toString() )} + ) + // store in the underlying persistence final key = "${task.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + return task.hash.toString() } protected void storeTaskOutput(TaskRun task, Path path) { - final attrs = readAttributes(path) - final rel = task.workDir.relativize(path).toString() - final cid = "${task.hash}/${rel}" - final key = "${cid}/$METADATA_FILE" - final hash = CacheHelper.hasher(path).hash().toString() - final value = new Output( - DataType.TaskOutput, - path.toString(), - hash, - "$CID_PROT$task.hash", - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - // store in the underlying persistence - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final attrs = readAttributes(path) + final rel = getTaskRelative(task, path) + final cid = "${task.hash}/${rel}" + final key = "${cid}/$METADATA_FILE" + final hash = CacheHelper.hasher(path).hash().toString() + final value = new Output( + DataType.TaskOutput, + path.toString(), + hash, + "$CID_PROT$task.hash", + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } catch (Throwable e) { + log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") + } + } + + protected String getTaskRelative(TaskRun task, Path path){ + if (path.isAbsolute()) { + final rel = getTaskRelative0(task, path) + if (rel) return rel + throw new Exception("Cannot asses the relative path for output $path of ${task.name}") + } else { + //Check if contains workdir or storeDir + final rel = getTaskRelative0(task, path.toAbsolutePath()) + if (rel) return rel + if (path.normalize().getName(0).toString() == "..") + throw new Exception("Cannot asses the relative path for output $path of ${task.name}" ) + return path.normalize().toString() + } + + } + + private String getTaskRelative0(TaskRun task, Path path){ + final workDirAbsolute = task.workDir.toAbsolutePath() + if (path.startsWith(workDirAbsolute)) { + return workDirAbsolute.relativize(path).toString() + } + //If task output is not in the workDir check if output is stored in the task's storeDir + final storeDir = task.getConfig().getStoreDir().toAbsolutePath() + if( storeDir && path.startsWith(storeDir)) { + final rel = storeDir.relativize(path) + //If output stored in storeDir, keep the path in case it is used as workflow output + this.outputsStoreDirCid.put(path.toString(), "$CID_PROT${task.hash}/$rel".toString()) + return rel + } } protected BasicFileAttributes readAttributes(Path path) { @@ -164,20 +247,25 @@ class CidObserver implements TraceObserver { @Override void onFilePublish(Path destination, Path source){ - final hash = CacheHelper.hasher(destination).hash().toString() - final rel = session.outputDir.relativize(destination).toString() - final key = "$session.executionHash/${rel}/$METADATA_FILE" - final sourceReference = getSourceReference(source) - final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, - destination.toString(), - hash, - sourceReference, - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = getWorkflowRelative(destination) + final key = "$executionHash/${rel}/$METADATA_FILE" + final sourceReference = getSourceReference(source) + final attrs = readAttributes(destination) + final value = new Output( + DataType.WorkflowOutput, + destination.toString(), + hash, + sourceReference, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + } catch (Throwable e) { + log.warn("Exception storing CID output $destination for workflow ${executionHash}.", e) + } } String getSourceReference(Path source){ @@ -185,32 +273,75 @@ class CidObserver implements TraceObserver { if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() return "$CID_PROT$hash/$target" + } else { + final storeDirReference = outputsStoreDirCid.get(source.toString()) + if (storeDirReference) + return "$CID_PROT$storeDirReference" } return null } @Override void onFilePublish(Path destination){ - final hash = CacheHelper.hasher(destination).hash().toString() - final rel = session.outputDir.relativize(destination).toString() - final key = "$session.executionHash/${rel}/$METADATA_FILE" - final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, - destination.toString(), - hash, - session.executionHash, - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = getWorkflowRelative(destination) + final key = "$executionHash/${rel}/$METADATA_FILE" + final attrs = readAttributes(destination) + final value = new Output( + DataType.WorkflowOutput, + destination.toString(), + hash, + "${CID_PROT}${executionHash}".toString(), + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + }catch (Throwable e) { + log.warn("Exception storing CID output $destination for workflow ${executionHash}. ${e.getLocalizedMessage()}") + } } - protected List convertToReferences(Map inputs, PathNormalizer normalizer) { - List references = new LinkedList() - inputs.each { name, path -> - final ref = getSourceReference(path) - references.add(ref ? ref : normalizer.normalizePath(path))} - return references + protected String getWorkflowRelative(Path path){ + final outputDirAbs = session.outputDir.toAbsolutePath() + if (path.isAbsolute()) { + if (path.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(path).toString() + } else { + throw new Exception("Cannot asses the relative path for workflow output $path") + } + } else { + final pathAbs = path.toAbsolutePath() + if (pathAbs.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(pathAbs).toString() + } + if (path.normalize().getName(0).toString() == "..") + throw new Exception("Cannot asses the relative path for workflow output $path") + return path.normalize().toString() + } + + } + + protected List manageInputs(Map inputs, PathNormalizer normalizer) { + List managedInputs = new LinkedList() + inputs.forEach{ param, value -> + final type = param.class.simpleName + final name = param.name + if( param instanceof FileInParam ) + managedInputs.add( new Parameter( type, name, manageFileInParam( (List)value , normalizer) ) ) + else if( !(param instanceof DefaultInParam) ) + managedInputs.add( new Parameter( type, name, value) ) + } + return managedInputs + } + + private List manageFileInParam(List files, PathNormalizer normalizer){ + final paths = new LinkedList(); + for( FileHolder it : files ) { + final ref = getSourceReference(it.storePath) + paths.add(ref ? ref : new DataPath(normalizer.normalizePath(it.storePath), CacheHelper.hasher(it.storePath).hash().toString())) + } + return paths } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy index 67017f0bf0..f012c8f130 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -39,5 +39,6 @@ interface CidStore { Path getPath() + CidHistoryFile getHistoryFile() } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 958cc4ef49..9f35052861 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -27,6 +27,7 @@ import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException /** + * Default Implementation for the a CID store. * * @author Paolo Di Tommaso */ @@ -34,12 +35,13 @@ import nextflow.exception.AbortOperationException @CompileStatic class DefaultCidStore implements CidStore { + private static String HISTORY_FILE_NAME =".history" private Path metaLocation private Path location void open(DataConfig config) { location = config.store.location - metaLocation = location.resolve('.meta') + metaLocation = getMetadataPath(config) if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } @@ -69,5 +71,12 @@ class DefaultCidStore implements CidStore { @Override Path getPath(){ location } + @Override + CidHistoryFile getHistoryFile(){ + return new CidHistoryFile(metaLocation.resolve(HISTORY_FILE_NAME)) + } + + static Path getMetadataPath(DataConfig config){ config.store.location.resolve('.meta') } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy new file mode 100644 index 0000000000..d6105624f7 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy @@ -0,0 +1,130 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import nextflow.data.cid.DefaultCidStore + +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.Path +import java.nio.file.PathMatcher +import java.nio.file.WatchService +import java.nio.file.attribute.UserPrincipalLookupService +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig + +/** + * File system for CID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidFileSystem extends FileSystem { + + private CidFileSystemProvider provider + + private Path basePath + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected CidFileSystem(){} + + CidFileSystem(CidFileSystemProvider provider, DataConfig config) { + this.provider = provider + this.basePath = DefaultCidStore.getMetadataPath(config) + } + + Path getBasePath() { + return basePath + } + + @Override + boolean equals( Object other ) { + if( this.class != other.class ) return false + final that = (CidFileSystem)other + this.provider == that.provider && this.basePath == that.basePath + } + + @Override + int hashCode() { + Objects.hash(provider,basePath) + } + + @Override + FileSystemProvider provider() { + return provider + } + + @Override + void close() throws IOException { + + } + + @Override + boolean isOpen() { + return false + } + + @Override + boolean isReadOnly() { + return true + } + + @Override + String getSeparator() { + return CidPath.SEPARATOR + } + + @Override + Iterable getRootDirectories() { + return null + } + + @Override + Iterable getFileStores() { + return null + } + + @Override + Set supportedFileAttributeViews() { + return null + } + + @Override + Path getPath(String first, String... more) { + return new CidPath(this,first,more) + } + + @Override + PathMatcher getPathMatcher(String syntaxAndPattern) { + throw new UnsupportedOperationException(); + } + + @Override + UserPrincipalLookupService getUserPrincipalLookupService() { + throw new UnsupportedOperationException('User Principal Lookup Service not supported') + } + + @Override + WatchService newWatchService() throws IOException { + throw new UnsupportedOperationException('Watch Service not supported') + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy new file mode 100644 index 0000000000..a963c7dd61 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -0,0 +1,320 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.ByteBuffer +import java.nio.channels.SeekableByteChannel +import java.nio.file.AccessDeniedException +import java.nio.file.AccessMode +import java.nio.file.CopyOption +import java.nio.file.DirectoryStream +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.FileSystemNotFoundException +import java.nio.file.LinkOption +import java.nio.file.OpenOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileAttribute +import java.nio.file.attribute.FileAttributeView +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig + +/** + * File System Provider for CID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidFileSystemProvider extends FileSystemProvider { + + public static final String SCHEME = "cid" + + private CidFileSystem fileSystem + + @Override + String getScheme() { + return SCHEME + } + + protected CidPath toCidPath(Path path) { + if (path !instanceof CidPath) + throw new ProviderMismatchException() + return (CidPath) path + } + + private void checkScheme(URI uri) { + final scheme = uri.scheme.toLowerCase() + if( scheme != getScheme() ) + throw new IllegalArgumentException("Not a valid ${getScheme().toUpperCase()} scheme: $scheme") + } + + @Override + synchronized FileSystem newFileSystem(URI uri, Map config) throws IOException { + checkScheme(uri) + if( !fileSystem ) { + //Overwrite default values with provided configuration + final defaultConfig = DataConfig.asMap() + config.each {defaultConfig.put(it.key, it.value)} + fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) + } + return fileSystem + } + + @Override + FileSystem getFileSystem(URI uri) throws FileSystemNotFoundException { + if (!fileSystem) + throw new FileSystemNotFoundException() + return fileSystem + } + + synchronized FileSystem getFileSystemOrCreate(URI uri) { + checkScheme(uri) + if( !fileSystem ) { + fileSystem = (CidFileSystem) newFileSystem(uri, DataConfig.asMap()) + } + return fileSystem + } + + @Override + CidPath getPath(URI uri) { + // the URI authority holds the base component of the CID path + final base = uri.authority + final path = uri.path + return (CidPath) getFileSystemOrCreate(uri).getPath(base, path) + } + + @Override + OutputStream newOutputStream(Path path, OpenOption... options) throws IOException { + throw new UnsupportedOperationException("Write not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + InputStream newInputStream(Path path, OpenOption... options) throws IOException { + final cid = toCidPath(path) + final realPath = cid.getTargetPath() + realPath.fileSystem.provider().newInputStream(realPath, options) + } + + @Override + SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { + final cid = toCidPath(path) + if (options.size() > 0) { + for (OpenOption opt: options) { + // All OpenOption values except for APPEND and WRITE are allowed + if (opt == StandardOpenOption.APPEND || opt == StandardOpenOption.WRITE) + throw new UnsupportedOperationException("'$opt' not allowed"); + } + } + final realPath = cid.getTargetPath() + final channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + + new SeekableByteChannel() { + + @Override + int read(ByteBuffer dst) throws IOException { + channel.read(dst) + } + + @Override + int write(ByteBuffer src) throws IOException { + throw new UnsupportedOperationException("Write operation not supported") + } + + @Override + long position() throws IOException { + channel.position() + } + + @Override + SeekableByteChannel position(long newPosition) throws IOException { + throw new UnsupportedOperationException("Position operation not supported") + } + + @Override + long size() throws IOException { + channel.size() + } + + @Override + SeekableByteChannel truncate(long unused) throws IOException { + throw new UnsupportedOperationException("Truncate operation not supported") + } + + @Override + boolean isOpen() { + channel.isOpen() + } + + @Override + void close() throws IOException { + channel.close() + } + } + } + + @Override + DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { + final cid = toCidPath(path) + final real = cid.getTargetPath() + final stream = real + .getFileSystem() + .provider() + .newDirectoryStream(real, new CidFilter(fileSystem)) + + return new DirectoryStream() { + + @Override + Iterator iterator() { + return new CidIterator(fileSystem, stream.iterator(), cid, real) + } + + @Override + void close() throws IOException { + stream.close() + } + } + } + private class CidFilter implements DirectoryStream.Filter { + + private final CidFileSystem fs + + CidFilter(CidFileSystem fs){ + this.fs = fs + } + + @Override + boolean accept(Path entry) throws IOException { + if( entry.startsWith(fs.getBasePath()) && entry.getFileName().toString() == CidPath.METADATA_FILE ) { + return false + } + return true + } + } + + private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase){ + final fs = cidBase.fileSystem as CidFileSystem + if (toConvert.startsWith(fs.basePath)) { + return new CidPath(fs, toConvert) + } else { + final relative = realBase.relativize(toConvert) + return (CidPath) cidBase.resolve(relative.toString()) + } + } + + private static class CidIterator implements Iterator { + + private final CidFileSystem fs + private final Iterator target + private final CidPath parent + private final Path parentReal + + CidIterator(CidFileSystem fs, Iterator itr, CidPath parent, Path real) { + this.fs = fs + this.target = itr + this.parent = parent + this.parentReal = real + } + + @Override + boolean hasNext() { + return target.hasNext() + } + + @Override + CidPath next() { + final path = target.next() + return path ? fromRealToCidPath(path, parentReal, parent) : null + } + } + + @Override + void createDirectory(Path dir, FileAttribute... attrs) throws IOException { + throw new UnsupportedOperationException("Create directory not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void delete(Path path) throws IOException { + throw new UnsupportedOperationException("Delete not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void copy(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Copy not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void move(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Move not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + boolean isSameFile(Path path, Path path2) throws IOException { + return path == path2 + } + + @Override + boolean isHidden(Path path) throws IOException { + return toCidPath(path).getTargetPath().isHidden() + } + + @Override + FileStore getFileStore(Path path) throws IOException { + throw new UnsupportedOperationException("File store not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void checkAccess(Path path, AccessMode... modes) throws IOException { + final cid = toCidPath(path) + for( AccessMode m : modes ) { + if( m == AccessMode.WRITE ) + throw new AccessDeniedException("Write mode not supported") + if( m == AccessMode.EXECUTE ) + throw new AccessDeniedException("Execute mode not supported") + } + final real = cid.getTargetPath() + real.fileSystem.provider().checkAccess(real, modes) + } + + @Override + V getFileAttributeView(Path path, Class type, LinkOption... options) { + return null + } + + @Override + A readAttributes(Path path, Class type, LinkOption... options) throws IOException { + final cid = toCidPath(path) + final real = cid.getTargetPath() + real.fileSystem.provider().readAttributes(real,type,options) + } + + @Override + Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Read file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Set file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy new file mode 100644 index 0000000000..4d70f5252c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -0,0 +1,381 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import groovy.json.JsonSlurper +import groovy.util.logging.Slf4j +import nextflow.data.cid.model.DataType +import nextflow.util.CacheHelper +import nextflow.util.TestOnly + +import static nextflow.data.cid.fs.CidFileSystemProvider.* + +import java.nio.file.FileSystem +import java.nio.file.LinkOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.WatchEvent +import java.nio.file.WatchKey +import java.nio.file.WatchService + +import groovy.transform.CompileStatic +import nextflow.file.FileHelper + +/** + * CID file system path + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class CidPath implements Path { + + static public String SEPARATOR = '/' + public static final String METADATA_FILE = '.data.json' + public static final String CID_PROT = "${SCHEME}://".toString() + + static private String[] EMPTY = new String[] {} + + private CidFileSystem fileSystem + + // Path of the file in the metadata cid store + private Path storePath + + // String with the cid file path + private String filePath + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected CidPath(){} + + protected CidPath(CidFileSystem fs, Path target) { + this.fileSystem = fs + this.storePath = target + this.filePath = filePath0(fs, target) + } + + CidPath(CidFileSystem fs, String path) { + this(fs, path, EMPTY) + } + + CidPath(CidFileSystem fs, String path, String[] more) { + this.fileSystem = fs + this.storePath = resolve0(fs, norm0(path), norm0(more)) + this.filePath = filePath0(fs, storePath) + } + + @TestOnly + protected String getFilePath(){ this.filePath } + + @TestOnly + protected Path getStorePath(){ this.storePath } + + + /** + * Finds the target path of a CID path + **/ + protected static Path findTarget(Path cidStorePath, CidFileSystem fs, String[] childs=[]){ + assert fs + if( fs.basePath == cidStorePath ) + return null + final metadata = cidStorePath.resolve(METADATA_FILE).toFile() + if ( metadata.exists() ){ + final slurper = new JsonSlurper() + final cidObject = slurper.parse(metadata.text.toCharArray()) as Map + final type = DataType.valueOf(cidObject.type as String) + if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { + // return the real path stored in the metadata + final realPath = Path.of(cidObject.path as String, childs) + if( !realPath.exists() ) + throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") + if( cidObject.checksum && CacheHelper.hasher(realPath).hash().toString() != cidObject.checksum ) { + log.warn("Checksum of $cidStorePath does not match with the one stored in the metadata") + } + return realPath + } + } else { + // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output + final parent = cidStorePath.getParent() + if( parent) { + ArrayList newChilds = new ArrayList() + newChilds.add(cidStorePath.getFileName().toString()) + newChilds.addAll(childs) + return findTarget(parent, fs, newChilds as String[]) + } + } + return null + } + + private static String filePath0(CidFileSystem fs, Path target) { + if( !fs ) + return target.toString() + return fs.basePath != target + ? fs.basePath.relativize(target).toString() + : SEPARATOR + } + + private static Path resolve0(CidFileSystem fs, String base, String[] more) { + if( !base || base == SEPARATOR ) { + return resolveEmptyPathCase(fs, more as List) + } + if( base.contains(SEPARATOR) ) { + final parts = base.tokenize(SEPARATOR) + final remain = parts[1..-1] + more.toList() + return resolve0(fs, parts[0], remain as String[]) + } + final result = fs ? fs.basePath.resolve(base) : Path.of(base) + return more + ? result.resolve(more.join(SEPARATOR)) + : result + } + + private static Path resolveEmptyPathCase(CidFileSystem fs, List more ){ + switch(more.size()) { + case 0: + return fs ? fs.basePath : Path.of("/") + case 1: + return resolve0(fs, more[0], EMPTY) + default: + return resolve0(fs, more[0], more[1..-1] as String[]) + } + + } + + static private String norm0(String path) { + if( !path ) + return "" + if( path==SEPARATOR ) + return path + //Remove repeated elements + path = Path.of(path).normalize().toString() + //Remove initial and final separators + if( path.startsWith(SEPARATOR) ) + path = path.substring(1) + if( path.endsWith(SEPARATOR) ) + path = path.substring(0,path.size()-1) + return path + } + + static private String[] norm0(String... path) { + for( int i=0; i1 ) + return subpath(0,c-1) + if( c==1 ) + return new CidPath(fileSystem,"/") + return null + } + + @Override + int getNameCount() { + return fileSystem ? storePath.nameCount-fileSystem.basePath.nameCount : storePath.nameCount + } + + @Override + Path getName(int index) { + if( index<0 ) + throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") + final c= fileSystem.basePath.nameCount + return new CidPath(index==0 ? fileSystem : null, storePath.getName(c + index).toString()) + } + + @Override + Path subpath(int beginIndex, int endIndex) { + if( beginIndex<0 ) + throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") + final c= fileSystem.basePath.nameCount + return new CidPath(beginIndex==0 ? fileSystem : null, storePath.subpath(c+beginIndex, c+endIndex).toString()) + } + + @Override + Path normalize() { + return new CidPath(fileSystem, storePath.normalize()) + } + + @Override + boolean startsWith(Path other) { + return startsWith(other.toString()) + } + + @Override + boolean startsWith(String other) { + return storePath.startsWith(fileSystem.basePath.resolve(other)) + } + + @Override + boolean endsWith(Path other) { + return endsWith(other.toString()) + } + + @Override + boolean endsWith(String other) { + return storePath.endsWith(other) + } + + @Override + Path resolve(Path other) { + if( CidPath.class != other.class ) + throw new ProviderMismatchException() + + final that = (CidPath)other + + if( that.fileSystem && this.fileSystem != that.fileSystem ) + return other + if( that.isAbsolute() ) { + return that + } + if( that.storePath ) { + final newPath = this.storePath.resolve(that.storePath) + return new CidPath(fileSystem, newPath) + } + return this + } + + @Override + Path resolve(String path) { + if( !path ) + return this + final scheme = FileHelper.getUrlProtocol(path) + if( !scheme ) { + // consider the path as a cid relative path + return resolve(new CidPath(null,path)) + } + if( scheme != SCHEME ) { + throw new ProviderMismatchException() + } + final that = fileSystem.provider().getPath(asUri(path)) + return resolve(that) + } + + + @Override + Path relativize(Path other) { + if( CidPath.class != other.class ) { + throw new ProviderMismatchException() + } + final path = storePath.relativize(((CidPath) other).storePath) + return new CidPath(null , path.getNameCount()>0 ? path.toString(): SEPARATOR) + } + + @Override + URI toUri() { + asUri("${SCHEME}://${filePath}") + } + + String toUriString() { + return toUri().toString() + } + + @Override + Path toAbsolutePath() { + return this + } + + @Override + Path toRealPath(LinkOption... options) throws IOException { + return getTargetPath() + } + + protected Path getTargetPath(){ + final target = findTarget(storePath, fileSystem) + return target ? target : storePath + } + + @Override + File toFile() throws IOException { + throw new UnsupportedOperationException("toFile not supported by CidPath") + } + + @Override + WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { + throw new UnsupportedOperationException("Register not supported by CidPath") + } + + @Override + int compareTo(Path other) { + if( CidPath.class != other.class ) + throw new ProviderMismatchException() + final that = other as CidPath + return this.storePath.compareTo(that.storePath) + } + + @Override + boolean equals(Object other) { + if( CidPath.class != other.class ) { + return false + } + final that = (CidPath)other + return this.fileSystem == that.fileSystem && this.storePath.equals(that.storePath) + } + + /** + * @return The unique hash code for this path + */ + @Override + int hashCode() { + return Objects.hash(fileSystem,storePath) + } + + static URI asUri(String path) { + if (!path) + throw new IllegalArgumentException("Missing 'path' argument") + if (!path.startsWith(CID_PROT)) + throw new IllegalArgumentException("Invalid CID file system path URI - it must start with '${CID_PROT}' prefix - offendinf value: $path") + if (path.startsWith(CID_PROT + SEPARATOR) && path.length() > 7) + throw new IllegalArgumentException("Invalid CID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") + if (path == CID_PROT) //Empty path case + return new URI("") + return new URI(path) + } + + @Override + String toString() { + filePath + } + + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy new file mode 100644 index 0000000000..a7a365a6f7 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy @@ -0,0 +1,61 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +import nextflow.file.FileHelper +import nextflow.file.FileSystemPathFactory + +import static nextflow.data.cid.fs.CidPath.CID_PROT + +/** + * Implements a {@link FileSystemPathFactory} for CID file system + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidPathFactory extends FileSystemPathFactory { + + @Override + protected Path parseUri(String uri) { + return uri.startsWith(CID_PROT) ? create(uri) : null + } + + @Override + protected String toUriString(Path path) { + return path instanceof CidPath ? ((CidPath)path).toUriString() : null + } + + @Override + protected String getBashLib(Path target) { + return null + } + + @Override + protected String getUploadCmd(String source, Path target) { + return null + } + + static CidPath create(String path) { + final uri = CidPath.asUri(path) + return (CidPath) FileHelper.getOrCreateFileSystemFor(uri, DataConfig.asMap()).provider().getPath(uri) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy new file mode 100644 index 0000000000..18d98f9747 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy @@ -0,0 +1,33 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models a data path which includes the path and a checksum to validate the content of the path. + * + * @author Jorge Ejarque */ enum DataType { - TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput + TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput, WorkflowResults } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy new file mode 100644 index 0000000000..11cbe4ee9d --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Model Workflow and Task Parameters. + * + * @author Jorge Ejarque */ @@ -30,13 +31,13 @@ class TaskRun { DataType type String sessionId String name - String source - List inputs + String code + List inputs String container String conda String spack String architecture Map globalVars - List binEntries + List binEntries List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy index 643af9ec7e..c4b8824db4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -22,6 +22,7 @@ import groovy.transform.CompileStatic /** + * Models a workflow definition. * * @author Jorge Ejarque otherScriptFiles + DataPath mainScriptFile + List otherScriptFiles String repository String commitId } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy new file mode 100644 index 0000000000..23d6ad179b --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models the results of a workflow execution. + * + * @author Jorge Ejarque params } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy index 7bd5512480..64564b3e96 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -32,12 +32,16 @@ class DataConfig { final DataStoreOpts store DataConfig(Map opts) { - this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + this.store = new DataStoreOpts(opts.store as Map ?: [:]) + } + + static Map asMap() { + session ? (Map)session.config.navigate('workflow.data') : [:] } static DataConfig create(Session session) { if( session ) { - return new DataConfig(session.config.navigate('cid') as Map ?: Map.of()) + return new DataConfig(session.config.navigate('workflow.data') as Map ?: [:]) } else throw new IllegalStateException("Missing Nextflow session") diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index fce784b543..6d0335f9be 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -146,19 +146,7 @@ class PublishDir { final resolved = value instanceof Closure ? value.call() : value if( resolved instanceof String || resolved instanceof GString ) nullPathWarn = checkNull(resolved.toString()) - if( session?.cidEnabled ){ - final resolvedPath = FileHelper.toPath(resolved) - if (resolvedPath.isAbsolute()){ - log.warn("CID store is enabled but 'publishDir' is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") - this.path = FileHelper.toCanonicalPath(resolved) - } - else{ - this.path = session.outputDir.resolve(resolvedPath) - } - } - else { - this.path = FileHelper.toCanonicalPath(resolved) - } + this.path = FileHelper.toCanonicalPath(resolved) } void setMode( String str ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy index 1ac700e44f..498d50f41f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy @@ -284,8 +284,7 @@ class ScriptRunner { } def revisionId = scriptFile.commitId ?: scriptFile.scriptId - def executionHash = session.executionHash ?: '-' - HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, executionHash, cli ) + HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, cli ) } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 97ccf8a6ff..dd57c4168d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -33,7 +33,7 @@ class DefaultObserverFactory implements TraceObserverFactory { } protected void createCidObserver(Collection result) { - result.add( new CidObserver() ) + result.add( new CidObserver(this.session) ) } protected void createAnsiLogObserver(Collection result) { diff --git a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy index 92a8e93cbe..8e3b8cb73c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy @@ -33,7 +33,7 @@ import nextflow.exception.AbortOperationException * @author Paolo Di Tommaso */ @Slf4j -class HistoryFile extends File { +class HistoryFile extends WithLockFile { static String defaultFileName() { Const.appCacheDir.resolve('history').toString() } @@ -61,14 +61,14 @@ class HistoryFile extends File { super(file.toString()) } - void write( String name, UUID key, String revisionId, String cidHash, args, Date date = null ) { + void write( String name, UUID key, String revisionId, args, Date date = null ) { assert key assert args != null withFileLock { def timestamp = date ?: new Date() def value = args instanceof Collection ? args.join(' ') : args - this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, cidHash: cidHash, command: value).toString() << '\n' + this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, command: value).toString() << '\n' } } @@ -350,41 +350,6 @@ class HistoryFile extends File { } - void updateCidHash(String name, String hashCode) { - assert name - assert hashCode - try { - withFileLock {updateCidHash0(name, hashCode) } - } - catch( Throwable e ) { - log.warn "Can't update history file: $this",e - } - } - - private void updateCidHash0(String name, String hashCode){ - def newHistory = new StringBuilder() - - this.readLines().each { line -> - try { - def current = line ? Record.parse(line) : null - if( current?.runName == name ) { - current.cidHash = hashCode - newHistory << current.toString() << '\n' - } - else { - newHistory << line << '\n' - } - } - catch( IllegalArgumentException e ) { - log.warn("Can't read history file: $this", e) - } - } - - // rewrite the history content - this.setText(newHistory.toString()) - } - - @EqualsAndHashCode(includes = 'runName,sessionId') static class Record { Date timestamp @@ -393,7 +358,6 @@ class HistoryFile extends File { String status String revisionId UUID sessionId - String cidHash String command Record(String sessionId, String name=null) { @@ -416,7 +380,6 @@ class HistoryFile extends File { line << (status ?: '-') line << (revisionId ?: '-') line << (sessionId.toString()) - line << (cidHash ?: '-') line << (command ?: '-') } @@ -430,7 +393,7 @@ class HistoryFile extends File { if( cols.size() == 2 ) return new Record(cols[0]) - if( cols.size()== 8 ) { + if( cols.size()==7 ) { return new Record( timestamp: TIMESTAMP_FMT.parse(cols[0]), @@ -439,8 +402,7 @@ class HistoryFile extends File { status: cols[3] && cols[3] != '-' ? cols[3] : null, revisionId: cols[4], sessionId: UUID.fromString(cols[5]), - cidHash: cols[6], - command: cols[7] + command: cols[6] ) } @@ -448,52 +410,7 @@ class HistoryFile extends File { } } - /** - * Apply the given action by using a file lock - * - * @param action The closure implementing the action to be executed with a file lock - * @return The value returned by the action closure - */ - private withFileLock(Closure action) { - - def rnd = new Random() - long ts = System.currentTimeMillis() - String parent = this.parent ?: new File('.').absolutePath - def file = new File(parent, "${this.name}.lock".toString()) - def fos = new FileOutputStream(file) - try { - Throwable error - FileLock lock = null - try { - while( true ) { - lock = fos.getChannel().tryLock() - if( lock ) break - if( System.currentTimeMillis() - ts < 1_000 ) - sleep rnd.nextInt(75) - else { - error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") - break - } - } - if( lock ) { - return action.call() - } - } - catch( Exception e ) { - return action.call() - } - finally { - if( lock?.isValid() ) lock.release() - } - - if( error ) throw error - } - finally { - fos.closeQuietly() - file.delete() - } - } Set findAllRunNames() { findAll().findResults{ it.runName } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy new file mode 100644 index 0000000000..20f6553bb6 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy @@ -0,0 +1,78 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.nio.channels.FileLock + +/** + * File with a file lock. + * + * @author Jorge Ejarque + */ +class WithLockFile extends File { + + WithLockFile(String filepath){ + super(filepath) + } + + /** + * Apply the given action by using a file lock + * + * @param action The closure implementing the action to be executed with a file lock + * @return The value returned by the action closure + */ + protected withFileLock(Closure action) { + + def rnd = new Random() + long ts = System.currentTimeMillis() + String parent = this.parent ?: new File('.').absolutePath + def file = new File(parent, "${this.name}.lock".toString()) + def fos = new FileOutputStream(file) + try { + Throwable error + FileLock lock = null + + try { + while( true ) { + lock = fos.getChannel().tryLock() + if( lock ) break + if( System.currentTimeMillis() - ts < 1_000 ) + sleep rnd.nextInt(75) + else { + error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") + break + } + } + if( lock ) { + return action.call() + } + } + catch( Exception e ) { + return action.call() + } + finally { + if( lock?.isValid() ) lock.release() + } + + if( error ) throw error + } + finally { + fos.closeQuietly() + file.delete() + } + } +} diff --git a/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider new file mode 100644 index 0000000000..ba80b4b30a --- /dev/null +++ b/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -0,0 +1,17 @@ +# +# Copyright 2013-2024, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nextflow.data.cid.fs.CidFileSystemProvider diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy new file mode 100644 index 0000000000..774a5cd63a --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -0,0 +1,258 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cli + +import groovy.json.JsonOutput + +import java.nio.file.Files + +import nextflow.data.cid.CidHistoryFile +import nextflow.plugin.Plugins + +import org.junit.Rule +import spock.lang.Specification +import test.OutputCapture + +/** + * CLI cid Tests + * + * @author Jorge Ejarque + */ +class CmdCidTest extends Specification { + + def cleanup() { + Plugins.stop() + } + /* + * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html + */ + @Rule + OutputCapture capture = new OutputCapture() + + def 'should print executions cids' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + def uniqueId = UUID.randomUUID() + def date = new Date(); + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def recordEntry = "${CidHistoryFile.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://1234".toString() + historyFile.text = recordEntry + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[1] == recordEntry + + cleanup: + folder?.deleteDir() + } + + def 'should print no history' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No workflow runs CIDs found." + + cleanup: + folder?.deleteDir() + } + + def 'should show cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def cidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + + def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + + '"path":"/path/to/file",' + + '"checksum":"45372qe",' + + '"source":"cid://123987/file.bam",' + + '"size": 1234,' + + '"createdAt": 123456789,' + + '"modifiedAt": 123456789,' + + '"annotations":null}') + cidFile.text = recordEntry + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == recordEntry.readLines().size() + stdout.join('\n') == recordEntry + + cleanup: + folder?.deleteDir() + } + + def 'should warn if no cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Error loading cid://12345." + + cleanup: + folder?.deleteDir() + } + + def 'should get lineage cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + def outputHtml = folder.resolve('lineage.html') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def cidFile = folder.resolve(".meta/12345/file.bam/.data.json") + def cidFile2 = folder.resolve(".meta/123987/file.bam/.data.json") + def cidFile3 = folder.resolve(".meta/123987/.data.json") + def cidFile4 = folder.resolve(".meta/45678/output.txt/.data.json") + def cidFile5 = folder.resolve(".meta/45678/.data.json") + Files.createDirectories(cidFile.parent) + Files.createDirectories(cidFile2.parent) + Files.createDirectories(cidFile3.parent) + Files.createDirectories(cidFile4.parent) + Files.createDirectories(cidFile5.parent) + + def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987/file.bam",' + + '"size": 1234,"createdAt": 123456789, "modifiedAt": 123456789,"annotations":null}') + cidFile.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987",' + + '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') + cidFile2.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + + '"sessionId":"u345-2346-1stw2", "name":"foo","code":"abcde2345",' + + '"inputs": [{"type": "ValueInParam","name": "sample_id","value": "ggal_gut"},' + + '{"type": "FileInParam","name": "reads","value": ["cid://45678/output.txt"]}],' + + '"container": null,"conda": null,"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}') + cidFile3.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://45678",' + + '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') + cidFile4.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + + '"sessionId":"u345-2346-1stw2", "name":"bar","code":"abfs2556",' + + '"inputs": null,"container": null,"conda": null,"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}') + cidFile5.text = recordEntry + final network = """flowchart BT + cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} + cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} + cid://123987@{shape: process, label: "foo"} + ggal_gut@{shape: document, label: "ggal_gut"} + cid://45678/output.txt@{shape: document, label: "cid://45678/output.txt"} + cid://45678@{shape: process, label: "bar"} + + cid://123987/file.bam -->cid://12345/file.bam + cid://123987 -->cid://123987/file.bam + ggal_gut -->cid://123987 + cid://45678/output.txt -->cid://123987 + cid://45678 -->cid://45678/output.txt +""" + final template = CmdCid.CmdLineage.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["lineage", "cid://12345/file.bam", outputHtml.toString()]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + + + cleanup: + folder?.deleteDir() + } + + + + + + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy index 56aced51b9..8deff84359 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy @@ -97,7 +97,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','-','run') + history.write(runName,uuid,'b3d3aca8eb','run') when: def log = new CmdLog(basePath: folder, args: [runName]) @@ -167,7 +167,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','-','run') + history.write(runName,uuid,'b3d3aca8eb','run') when: diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy new file mode 100644 index 0000000000..2b3412466d --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy @@ -0,0 +1,158 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Files +import java.nio.file.Path + +/** + * CID History file tests + * + * @author Jorge Ejarque + */ +class CidHistoryFileTest extends Specification { + + @TempDir + Path tempDir + + Path historyFile + CidHistoryFile cidHistoryFile + + def setup() { + historyFile = tempDir.resolve("cid-history.txt") + Files.createFile(historyFile) + cidHistoryFile = new CidHistoryFile(historyFile) + } + + def "write should append a new record to the file"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "TestRun" + String runCid = "cid://123" + + when: + cidHistoryFile.write(runName, sessionId, runCid) + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.sessionId == sessionId + parsedRecord.runName == runName + parsedRecord.runCid == runCid + } + + def "getRunCid should return correct runCid for existing session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String runCid = "cid://123" + + and: + cidHistoryFile.write(runName, sessionId, runCid) + + expect: + cidHistoryFile.getRunCid(sessionId) == runCid + } + + def "getRunCid should return null if session does not exist"() { + expect: + cidHistoryFile.getRunCid(UUID.randomUUID()) == null + } + + def "update should modify existing runCid for given session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String initialCid = "cid-abc" + String updatedCid = "cid-updated" + + and: + cidHistoryFile.write(runName, sessionId, initialCid) + + when: + cidHistoryFile.update(sessionId, updatedCid) + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.runCid == updatedCid + } + + def "update should do nothing if session does not exist"() { + given: + UUID existingSessionId = UUID.randomUUID() + UUID nonExistingSessionId = UUID.randomUUID() + String runName = "Run1" + String runCid = "cid://123" + + and: + cidHistoryFile.write(runName, existingSessionId, runCid) + + when: + cidHistoryFile.update(nonExistingSessionId, "new-cid") + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.runCid == runCid + } + + def "CidRecord parse should throw for invalid record"() { + when: + CidHistoryFile.CidRecord.parse("invalid-record") + + then: + thrown(IllegalArgumentException) + } + + def "CidRecord parse should handle 4-column record"() { + given: + def timestamp = new Date() + def formattedTimestamp = CidHistoryFile.TIMESTAMP_FMT.format(timestamp) + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123" + + when: + def record = CidHistoryFile.CidRecord.parse(line) + + then: + record.timestamp != null + record.runName == "run-1" + record.runCid == "cid://123" + } + + def "CidRecord toString should produce tab-separated format"() { + given: + UUID sessionId = UUID.randomUUID() + def record = new CidHistoryFile.CidRecord(sessionId, "TestRun") + record.timestamp = new Date() + record.runCid = "cid://123" + + when: + def line = record.toString() + + then: + line.contains("\t") + line.split("\t").size() == 4 + } +} + diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index a5c1a3c426..fee5957b76 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -19,41 +19,85 @@ package nextflow.data.cid import groovy.json.JsonOutput import nextflow.data.config.DataConfig +import nextflow.processor.TaskConfig import nextflow.processor.TaskProcessor +import nextflow.script.ScriptBinding +import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes -import java.nio.file.attribute.FileTime -import java.time.Instant import com.google.common.hash.HashCode import nextflow.Session import nextflow.processor.TaskId import nextflow.processor.TaskRun import spock.lang.Specification + +import static nextflow.data.cid.fs.CidPath.CID_PROT + /** * * @author Paolo Di Tommaso */ class CidObserverTest extends Specification { + def 'should save workflow' (){ + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def store = new DefaultCidStore(); + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> folder.resolve("workDir") + } + def session = Mock(Session) { + getConfig() >> config + getCidStore() >> store + getUniqueId() >> uniqueId + getRunName() >> "test_run" + getWorkflowMetadata() >> metadata + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + + '"mainScriptFile":{"path":"file://' + scriptFile.toString() + '", "checksum": "78910"},' + + '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + + '"commitId": "123456" },' + + '"sessionId": "' + uniqueId + '",' + + '"name": "test_run", "params": []}' + when: + observer.onFlowBegin() + then: + folder.resolve(".meta/${observer.executionHash}/.data.json").text == JsonOutput.prettyPrint(expectedString) + + cleanup: + folder?.deleteDir() + } + def 'should save task run' () { given: def folder = Files.createTempDirectory('test') - def config = [cid:[store:[location:folder.toString()]]] + def config = [workflow:[data:[store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def uniqueId = UUID.randomUUID() def session = Mock(Session) { getConfig()>>config getCidStore()>>store getUniqueId()>>uniqueId + getRunName()>>"test_run" } store.open(DataConfig.create(session)) - def observer = new CidObserver() - observer.onFlowCreate(session) + def observer = new CidObserver(session) and: def hash = HashCode.fromInt(123456789) and: @@ -68,13 +112,14 @@ class CidObserverTest extends Specification { getProcessor() >> processor getSource() >> 'echo task source' } + def sourceHash =CacheHelper.hasher('echo task source').hash().toString() def normalizer = Mock(PathNormalizer.class) { normalizePath( _ as Path) >> {Path p -> p?.toString()} normalizePath( _ as String) >> {String p -> p} } def expectedString = '{"type":"TaskRun",' + '"sessionId":"'+uniqueId.toString() + '",' + - '"name":"foo","source":"echo task source",' + + '"name":"foo","code":"' + sourceHash + '",' + '"inputs": null,"container": null,"conda": null,' + '"spack": null,"architecture": null,' + '"globalVars": {},"binEntries": [],"annotations":null}' @@ -90,15 +135,14 @@ class CidObserverTest extends Specification { def 'should save task output' () { given: def folder = Files.createTempDirectory('test') - def config = [cid:[store:[location:folder.toString()]]] + def config = [workflow:[data:[store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def session = Mock(Session) { getConfig()>>config getCidStore()>>store } store.open(DataConfig.create(session)) - def observer = Spy(new CidObserver()) - observer.onFlowCreate(session) + def observer = Spy(new CidObserver(session)) and: def workDir = folder.resolve('12/34567890') Files.createDirectories(workDir) @@ -140,4 +184,205 @@ class CidObserverTest extends Specification { folder?.deleteDir() } + def 'should relativise task output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + then: + observer.getTaskRelative(task, PATH) == EXPECTED + where: + WORK_DIR | STORE_DIR | PATH | EXPECTED + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/work/12/3456789/relative') | "relative" + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('work/12/3456789/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/work/12/3456789') | Path.of('storeDir') | Path.of('./relative') | "relative" + } + + def 'should return exception when relativize task output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + observer.getTaskRelative(task, PATH) + then: + def e = thrown(Exception) + e.message == "Cannot asses the relative path for output $PATH of ${task.name}".toString() + + where: + WORK_DIR | STORE_DIR | PATH + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/another/path/relative') + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('../path/to/storeDir/relative') + } + + def 'should relativise workflow output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + then: + observer.getWorkflowRelative(PATH) == EXPECTED + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/path/to/outDir/relative') | "relative" + Path.of('outDir') | Path.of('outDir/relative') | "relative" + Path.of('/path/to/outDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/outDir') | Path.of('./relative') | "relative" + + + } + + def 'should return exception when relativise workflow output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + getCidStore()>>store + } + def observer = new CidObserver(session) + observer.getWorkflowRelative(PATH) + then: + def e = thrown(Exception) + e.message == "Cannot asses the relative path for workflow output $PATH" + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/another/path/') | "relative" + Path.of('/path/to/outDir') | Path.of('../relative') | "relative" + + + } + + def 'should save workflow output' (){ + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def store = new DefaultCidStore(); + def outputDir = folder.resolve('results') + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def workDir= folder.resolve("work") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> workDir + } + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + getOutputDir()>>outputDir + getWorkDir() >> workDir + getWorkflowMetadata()>>metadata + getUniqueId()>>uniqueId + getRunName()>>"test_run" + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + + when: 'Starting workflow' + observer.onFlowCreate(session) + observer.onFlowBegin() + then: 'History file should contain execution hash' + def cid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + cid == observer.executionHash + + when: ' publish output with source file' + def outFile1 = outputDir.resolve('foo/file.bam') + Files.createDirectories(outFile1.parent) + outFile1.text = 'some data1' + def sourceFile1 = workDir.resolve('12/3987/file.bam') + Files.createDirectories(sourceFile1.parent) + sourceFile1.text = 'some data1' + observer.onFilePublish(outFile1, sourceFile1) + then: 'check file 1 output metadata in cid store' + def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) + def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() + def expectedString1 = '{"type":"WorkflowOutput",' + + '"path":"' + outFile1.toString() + '",' + + '"checksum":"'+ fileHash1 + '",' + + '"source":"cid://123987/file.bam",' + + '"size":'+attrs1.size() + ',' + + '"createdAt":' + attrs1.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs1.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == JsonOutput.prettyPrint(expectedString1) + + when: 'publish without source path' + def outFile2 = outputDir.resolve('foo/file2.bam') + Files.createDirectories(outFile2.parent) + outFile2.text = 'some data2' + def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) + def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() + observer.onFilePublish(outFile2) + then: 'Check outFile2 metadata in cid store' + def expectedString2 = '{"type":"WorkflowOutput",' + + '"path":"' + outFile2.toString() + '",' + + '"checksum":"'+ fileHash2 + '",' + + '"source":"cid://' + observer.executionHash +'",' + + '"size":'+attrs2.size() + ',' + + '"createdAt":' + attrs2.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs2.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == JsonOutput.prettyPrint(expectedString2) + + when: 'Workflow complete' + observer.onFlowComplete() + then: 'Check history file is updated and Workflow Result is written in the cid store' + def expectedString3 = '{"type":"WorkflowResults",' + + '"run":"cid://' + observer.executionHash +'",' + + '"outputs": [ "cid://'+ observer.executionHash + '/foo/file.bam",' + + '"cid://'+ observer.executionHash + '/foo/file2.bam" ]}' + def finalCid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + finalCid != observer.executionHash + folder.resolve(".meta/${finalCid}/.data.json").text == JsonOutput.prettyPrint(expectedString3) + + cleanup: + folder?.deleteDir() + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy new file mode 100644 index 0000000000..72979f580b --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -0,0 +1,372 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import spock.lang.Shared + +import java.nio.ByteBuffer +import java.nio.file.FileSystemNotFoundException +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification + +/** + * CID File system provider tests + * @author Jorge Ejarque + */ +class CidFileSystemProviderTest extends Specification { + + @Shared def wdir = Files.createTempDirectory('wdir') + @Shared def meta = wdir.resolve('.meta') + @Shared def data = wdir.resolve('work') + + def setupSpec(){ + meta.mkdirs() + data.mkdirs() + } + + def cleanupSpec(){ + wdir.deleteDir() + } + + def 'should return cid scheme' () { + given: + def provider = new CidFileSystemProvider() + expect: + provider.getScheme() == 'cid' + } + + def 'should get cid path' () { + given: + def cid = Mock(CidPath) + and: + def provider = new CidFileSystemProvider() + expect: + provider.toCidPath(cid) == cid + + when: + provider.toCidPath(Path.of('foo')) + then: + thrown(ProviderMismatchException) + } + + def 'should create new file system' () { + given: + def provider = new CidFileSystemProvider() + def config = [store:[location:'/data']] + def cid = CidPath.asUri('cid://12345') + when: + def fs = provider.newFileSystem(cid, config) as CidFileSystem + then: + fs.basePath == Path.of('/data/.meta') + } + + def 'should get a file system' () { + given: + def provider = new CidFileSystemProvider() + def config = [store:[location:'/data']] + def uri = CidPath.asUri('cid://12345') + when: + provider.getFileSystem(uri) + then: + thrown(FileSystemNotFoundException) + + when: + provider.newFileSystem(uri, config) as CidFileSystem + and: + def result = provider.getFileSystem(uri) as CidFileSystem + then: + result.basePath == Path.of('/data/.meta') + } + + def 'should get or create a file system' () { + given: + def config = [workflow:[data:[store:[location:'/this/that']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def uri = CidPath.asUri('cid://12345') + def provider = new CidFileSystemProvider() + + when: + def fs = provider.getFileSystemOrCreate(uri) as CidFileSystem + then: + fs.basePath == Path.of('/this/that/.meta') + + when: + def fs2 = provider.getFileSystemOrCreate(uri) as CidFileSystem + then: + fs2.is(fs) + } + + def 'should get a path' () { + given: + def config = [workflow:[data:[store:[location:'/data']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def uri1 = CidPath.asUri('cid://12345') + def uri2 = CidPath.asUri('cid://12345/foo/bar') + + when: + def cid1 = provider.getPath(uri1) + then: + cid1.getTargetPath() == Path.of('/data/.meta/12345') + + when: + def cid2 = provider.getPath(uri2) + then: + cid2.getTargetPath() == Path.of('/data/.meta/12345/foo/bar') + } + + def 'should create new byte channel' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + when: + def channel = provider.newByteChannel(cid, opts) + and: + def buffer = ByteBuffer.allocate(1000); + def read = channel.read(buffer) + channel.close() + def bytes = new byte[read] + buffer.get(0,bytes) + then: + bytes == "Hello, World!".getBytes() + + cleanup: + outputMeta.deleteDir() + output.delete() + } + + def 'should read cid' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + + expect: + cid.text == "Hello, World!" + + cleanup: + outputMeta.deleteDir() + output.delete() + } + + def 'should not create a directory' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345')) + + when: + provider.createDirectory(cid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should create directory stream' () { + given: + def output1 = data.resolve('path') + output1.mkdir() + output1.resolve('file1.txt').text = 'file1' + output1.resolve('file2.txt').text = 'file2' + output1.resolve('file3.txt').text = 'file3' + meta.resolve('12345/output1').mkdirs() + meta.resolve('12345/output2').mkdirs() + meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' + + and: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output1')) + def cid2 = provider.getPath(CidPath.asUri('cid://12345')) + + expect: + Files.exists(cid) + Files.exists(cid.resolve('file1.txt')) + Files.exists(cid.resolve('file2.txt')) + Files.exists(cid.resolve('file3.txt')) + + when: + def stream = provider.newDirectoryStream(cid2, (p) -> true) + and: + def result = stream.toList() + then: + result.toSet() == [ + cid2.resolve('output1'), + cid2.resolve('output2'), + ] as Set + + when: + def stream2 = provider.newDirectoryStream(cid, (p) -> true) + and: + def result2 = stream2.toList() + then: + result2.toSet() == [ + cid.resolve('file1.txt'), + cid.resolve('file2.txt'), + cid.resolve('file3.txt') + ] as Set + + } + + def 'should not delete a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345')) + + when: + provider.delete(cid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should not copy a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + when: + provider.copy(cid1, cid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should not move a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + when: + provider.move(cid1, cid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should check is same file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + def cid3 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + expect: + !provider.isSameFile(cid1, cid2) + !provider.isSameFile(cid1, cid3) + and: + provider.isSameFile(cid2, cid3) + + cleanup: + folder?.deleteDir() + } + + def 'should check is hidden file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/.foo')) + + expect: + !provider.isHidden(cid1) + provider.isHidden(cid2) + + cleanup: + folder?.deleteDir() + } + + def 'should read file attributes' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def file = data.resolve('abc') + file.text = 'Hello' + meta.resolve('12345/abc').mkdirs() + meta.resolve('12345/abc/.data.json').text = '{"type":"TaskOutput", "path": "' + file.toString() + '"}' + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + + when: + def attr1 = provider.readAttributes(cid1, BasicFileAttributes) + def real1= Files.readAttributes(file,BasicFileAttributes) + then: + !attr1.directory + attr1.isRegularFile() + attr1.size() == real1.size() + attr1.creationTime() == real1.creationTime() + attr1.lastModifiedTime() == real1.lastModifiedTime() + attr1.lastAccessTime() == real1.lastAccessTime() + + cleanup: + file?.delete() + meta.resolve('12345').deleteDir() + } + +} + diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy new file mode 100644 index 0000000000..fc2592d170 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy @@ -0,0 +1,280 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Files +import java.nio.file.Path + +import spock.lang.Shared +import spock.lang.Specification +import spock.lang.Unroll + +/** + * CID Path Tests + * @author Jorge Ejarque + */ +class CidPathTest extends Specification { + + @Shared def BASE = Path.of('/some/base/data') + @Shared def fs = Mock(CidFileSystem){ getBasePath() >> BASE } + @Shared def wdir = Files.createTempDirectory('wdir') + @Shared def cid = wdir.resolve('.meta') + @Shared def data = wdir.resolve('work') + + def cleanupSpec(){ + wdir.deleteDir() + } + + def 'should create correct cid Path' () { + when: + def cid = new CidPath(FS, PATH, MORE) + then: + cid.storePath == EXPECTED_STORE + cid.filePath == EXPECTED_FILE + where: + FS | PATH | MORE | EXPECTED_STORE | EXPECTED_FILE + fs | '/' | [] as String[] | BASE | '/' + null | '/' | [] as String[] | Path.of('/') | '/' + fs | '/' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' + null | '/' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' + fs | '' | [] as String[] | BASE | '/' + null | '' | [] as String[] | Path.of('/') | '/' + fs | '' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' + null | '' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' + fs | '1234' | [] as String[] | BASE.resolve('1234') | '1234' + null | '1234' | [] as String[] | Path.of('1234') | '1234' + fs | '1234' | ['a','b'] as String[] | BASE.resolve('1234/a/b') | '1234/a/b' + null | '1234' | ['a','b'] as String[] | Path.of('1234/a/b') | '1234/a/b' + fs | '1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' + null | '1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' + fs | '1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' + null | '1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' + fs | '/1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' + null | '/1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' + fs | '/1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' + null | '/1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' + } + + def 'should get target path' () { + given: + def output1 = data.resolve('output') + output1.resolve('some/path').mkdirs() + output1.resolve('some/path/file1.txt').text = "this is file1" + def output2 = data.resolve('file2.txt') + output2.text = "this is file2" + def cidFs = Mock(CidFileSystem){ getBasePath() >> cid } + cid.resolve('12345/output1').mkdirs() + cid.resolve('12345/path/to/file2.txt').mkdirs() + cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' + cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + output2.toString() + '"}' + + expect: + new CidPath(cidFs, PATH).getTargetPath() == EXPECTED + where: + PATH | EXPECTED + '/' | cid + '12345' | cid.resolve('12345') + '12345/output1' | data.resolve('output') + '12345/output1/some/path' | data.resolve('output/some/path') + '12345/path/to/' | cid.resolve('12345/path/to/') + '12345/path/to/file2.txt/' | data.resolve('file2.txt') + } + + def 'should get file name' () { + when: + def cid1 = new CidPath(fs, '1234567890/this/file.bam') + then: + cid1.getFileName() == new CidPath(null, 'file.bam') + } + + def 'should get file parent' () { + when: + def cid1 = new CidPath(fs, '1234567890/this/file.bam') + then: + cid1.getParent() == new CidPath(fs, '1234567890/this') + cid1.getParent().getParent() == new CidPath(fs, '1234567890') + cid1.getParent().getParent().getParent() == new CidPath(fs, "/") + cid1.getParent().getParent().getParent().getParent() == null + } + + @Unroll + def 'should get name count' () { + expect: + new CidPath(fs, PATH).getNameCount() == EXPECTED + where: + PATH | EXPECTED + '/' | 0 + '123' | 1 + '123/a' | 2 + '123/a/' | 2 + '123/a/b' | 3 + '' | 0 + } + + @Unroll + def 'should get name by index' () { + expect: + new CidPath(fs, PATH).getName(INDEX) == EXPECTED + where: + PATH | INDEX | EXPECTED + '123' | 0 | new CidPath(fs, '123') + '123/a' | 1 | new CidPath(null, 'a') + '123/a/' | 1 | new CidPath(null, 'a') + '123/a/b' | 2 | new CidPath(null, 'b') + } + + @Unroll + def 'should get subpath' () { + expect: + new CidPath(fs, PATH).subpath(BEGIN,END) == EXPECTED + where: + PATH | BEGIN | END | EXPECTED + '123' | 0 | 1 | new CidPath(fs, '123') + '123/a' | 0 | 2 | new CidPath(fs, '123/a') + '123/a/' | 0 | 2 | new CidPath(fs, '123/a') + '123/a' | 1 | 2 | new CidPath(null, 'a') + '123/a/' | 1 | 2 | new CidPath(null, 'a') + '123/a/b' | 2 | 3 | new CidPath(null, 'b') + '123/a/b' | 1 | 3 | new CidPath(null, 'a/b') + } + + def 'should normalize a path' () { + expect: + new CidPath(fs, '123').normalize() == new CidPath(fs, '123') + new CidPath(fs, '123/a/b').normalize() == new CidPath(fs, '123/a/b') + new CidPath(fs, '123/./a/b').normalize() == new CidPath(fs, '123/a/b') + new CidPath(fs, '123/a/../a/b').normalize() == new CidPath(fs, '123/a/b') + } + + @Unroll + def 'should validate startWith' () { + expect: + new CidPath(fs,PATH).startsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | '12345' | true + '12345/a/b' | '12345/a' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + @Unroll + def 'should validate endsWith' () { + expect: + new CidPath(fs,PATH).endsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | 'b' | true + '12345/a/b' | 'a/b' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + def 'should validate isAbsolute' () { + expect: + new CidPath(fs,'1234/a/b/c').isAbsolute() + new CidPath(fs,'1234/a/b/c').getRoot().isAbsolute() + new CidPath(fs,'1234/a/b/c').getParent().isAbsolute() + new CidPath(fs,'1234/a/b/c').normalize().isAbsolute() + new CidPath(fs,'1234/a/b/c').getName(0).isAbsolute() + new CidPath(fs,'1234/a/b/c').subpath(0,2).isAbsolute() + and: + !new CidPath(fs,'1234/a/b/c').getFileName().isAbsolute() + !new CidPath(fs,'1234/a/b/c').getName(1).isAbsolute() + !new CidPath(fs,'1234/a/b/c').subpath(1,3).isAbsolute() + } + + @Unroll + def 'should get root path' () { + expect: + new CidPath(fs,PATH).getRoot() == new CidPath(fs,EXPECTED) + where: + PATH | EXPECTED + '12345' | '/' + '12345/a' | '/' + } + + def 'should resolve path' () { + when: + def cid1 = new CidPath(fs, '123/a/b/c') + def cid2 = new CidPath(fs, '321/x/y/z') + def rel1 = new CidPath(null, 'foo') + def rel2 = new CidPath(null, 'bar/') + + then: + cid1.resolve(cid2) == cid2 + cid2.resolve(cid1) == cid1 + and: + cid1.resolve(rel1) == new CidPath(fs,'123/a/b/c/foo') + cid1.resolve(rel2) == new CidPath(fs,'123/a/b/c/bar') + and: + rel1.resolve(rel2) == new CidPath(null, 'foo/bar') + rel2.resolve(rel1) == new CidPath(null, 'bar/foo') + } + + def 'should resolve path as string' () { + given: + def pr = Mock(CidFileSystemProvider) + def cidfs = Mock(CidFileSystem){ + getBasePath() >> BASE + provider() >> pr} + + + def cid1 = new CidPath(cidfs, '123/a/b/c') + + expect: + cid1.resolve('x/y') == new CidPath(cidfs, '123/a/b/c/x/y') + cid1.resolve('/x/y/') == new CidPath(cidfs, '123/a/b/c/x/y') + + when: + def result = cid1.resolve('cid://321') + then: + pr.getPath(CidPath.asUri('cid://321')) >> new CidPath(cidfs, '321') + and: + result == new CidPath(cidfs, '321') + } + + @Unroll + def 'should get to uri string' () { + expect: + new CidPath(fs, PATH).toUriString() == EXPECTED + where: + PATH | EXPECTED + '/' | 'cid:///' + '1234' | 'cid://1234' + '1234/a/b/c' | 'cid://1234/a/b/c' + '' | 'cid:///' + } + + @Unroll + def 'should get string' () { + expect: + new CidPath(fs, PATH).toString() == EXPECTED + where: + PATH | EXPECTED + '/' | '/' + '1234' | '1234' + '1234/a/b/c' | '1234/a/b/c' + '' | '/' + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy new file mode 100644 index 0000000000..800a60f637 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy @@ -0,0 +1,88 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Path + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification +import spock.lang.Unroll + +/** + * CID Path Factory tests. + * + * @author Jorge Ejarque + */ +class CifPathFactoryTest extends Specification { + + def setup() { + Global.session = Mock(Session) { getConfig()>> [workflow:[data:[store:[location: '/some/data']]]] } + } + + def cleanup() { + Global.session = null + } + + def 'should create cid path' () { + given: + def factory = new CidPathFactory() + + expect: + factory.parseUri('foo') == null + + when: + def p1 = factory.parseUri('cid://12345') + then: + p1.getTargetPath() == Path.of('/some/data/.meta/12345') + p1.toUriString() == 'cid://12345' + + when: + def p2 = factory.parseUri('cid://12345/x/y/z') + then: + p2.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') + p2.toUriString() == 'cid://12345/x/y/z' + + when: + def p3 = factory.parseUri('cid://12345//x///y/z//') + then: + p3.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') + p2.toUriString() == 'cid://12345/x/y/z' + + when: + factory.parseUri('cid:///12345') + then: + thrown(IllegalArgumentException) + } + + @Unroll + def 'should convert get cid uri string' () { + given: + def factory = new CidPathFactory() + + when: + def cid = CidPathFactory.create(EXPECTED) + then: + factory.toUriString(cid) == EXPECTED + + where: + _ | EXPECTED + _ | 'cid://123' + _ | 'cid://123/a/b/c' + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy index c867304d70..4233f744b1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy @@ -32,10 +32,10 @@ class HistoryFileTest extends Specification { b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa -resume 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa -2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' def 'should support custom base dir' () { @@ -66,9 +66,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa def d1 = new Date(now - 50_000) def d2 = new Date(now - 30_000) def d3 = new Date(now - 10_000) - history.write( 'hello_world', id1, 'abc', '-', [1,2,3], d1 ) - history.write( 'super_star', id2, '123', '-', [1,2,3], d2 ) - history.write( 'slow_food', id3, 'xyz', '-', [1,2,3], d3 ) + history.write( 'hello_world', id1, 'abc', [1,2,3], d1 ) + history.write( 'super_star', id2, '123', [1,2,3], d2 ) + history.write( 'slow_food', id3, 'xyz', [1,2,3], d3 ) then: history.getLast() == new HistoryRecord(sessionId: id3, runName: 'slow_food', timestamp: d3, command: '1 2 3') @@ -243,9 +243,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa then: history.text == ''' 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa - 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello - 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume - 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello + 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello + 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume + 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' .stripIndent() } @@ -306,38 +306,14 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.findAllRunNames() == ['evil_pike', 'gigantic_keller', 'small_cirum', 'modest_bartik'] as Set } - def 'should update cid hash ' () { - given: - def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello -''' - def file = Files.createTempFile('test',null) - file.deleteOnExit() - file.text = source - def history = new HistoryFile(file) - - - when: - history.updateCidHash('evil_pike','cid_hash') - then: - history.text == ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\tcid_hash\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello -''' - } def 'should update the history entries ' () { given: def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' def file = Files.createTempFile('test',null) file.deleteOnExit() @@ -350,10 +326,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('evil_pike',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' when: @@ -361,10 +337,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('small_cirum',false,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' when: @@ -372,10 +348,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('gigantic_keller',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' } diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index 430222ade5..be885ed40e 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -1172,6 +1172,8 @@ class FileHelper { } public static HashCode getTaskHashFromPath(Path sourcePath, Path workPath) { + assert sourcePath + assert workPath if (sourcePath.startsWith(workPath)) { Path relativePath = workPath.relativize(sourcePath) if (relativePath.getNameCount() >= 2) { From edfaf5bc749c561245cca1047e8fe873c8325115 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 15:46:03 +0100 Subject: [PATCH 09/15] fix NPE in tests Signed-off-by: jorgee --- .../main/groovy/nextflow/data/cid/CidObserver.groovy | 12 ++++++++---- .../data/cid/fs/CidFileSystemProvider.groovy | 4 +++- .../groovy/nextflow/data/cid/CidObserverTest.groovy | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index fe33c95032..4a52b2ee75 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -104,13 +104,17 @@ class CidObserver implements TraceObserver { normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), session.workflowMetadata.scriptId ) + List otherScripts = new LinkedList<>() + for (Path p: ScriptMeta.allScriptNames().values()) { + if (p && p != session.workflowMetadata.scriptFile) { + otherScripts.add(new DataPath(normalizer.normalizePath(p.normalize()), + CacheHelper.hasher(p.text).hash().toString())) + } + } final workflow = new Workflow( DataType.Workflow, mainScript, - ScriptMeta.allScriptNames().values().collect { new DataPath( - normalizer.normalizePath(it.normalize()), - CacheHelper.hasher(it.text).hash().toString()) - }, + otherScripts, session.workflowMetadata.repository, session.workflowMetadata.commitId ) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index a963c7dd61..d8a68f65c3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -74,7 +74,9 @@ class CidFileSystemProvider extends FileSystemProvider { if( !fileSystem ) { //Overwrite default values with provided configuration final defaultConfig = DataConfig.asMap() - config.each {defaultConfig.put(it.key, it.value)} + if (config) { + config.forEach {String key,value -> defaultConfig.put(key, value) } + } fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) } return fileSystem diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index fee5957b76..3167350cca 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -76,6 +76,7 @@ class CidObserverTest extends Specification { '"sessionId": "' + uniqueId + '",' + '"name": "test_run", "params": []}' when: + observer.onFlowCreate(session) observer.onFlowBegin() then: folder.resolve(".meta/${observer.executionHash}/.data.json").text == JsonOutput.prettyPrint(expectedString) From c207d9264876fc9cc60369ccb7bb3b7b5f9dc438 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 19:51:47 +0100 Subject: [PATCH 10/15] Fix NPE in tests Signed-off-by: jorgee --- .../nextflow/data/cid/fs/CidFileSystemProvider.groovy | 4 +++- .../src/main/groovy/nextflow/data/config/DataConfig.groovy | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index d8a68f65c3..91f24984ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -75,7 +75,9 @@ class CidFileSystemProvider extends FileSystemProvider { //Overwrite default values with provided configuration final defaultConfig = DataConfig.asMap() if (config) { - config.forEach {String key,value -> defaultConfig.put(key, value) } + for (Map.Entry e : config.entrySet()) { + defaultConfig.put(e.key, e.value) + } } fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy index 64564b3e96..467598218e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -32,16 +32,16 @@ class DataConfig { final DataStoreOpts store DataConfig(Map opts) { - this.store = new DataStoreOpts(opts.store as Map ?: [:]) + this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) } static Map asMap() { - session ? (Map)session.config.navigate('workflow.data') : [:] + session?.config?.navigate('workflow.data') as Map ?: new HashMap() } static DataConfig create(Session session) { if( session ) { - return new DataConfig(session.config.navigate('workflow.data') as Map ?: [:]) + return new DataConfig( session.config.navigate('workflow.data') as Map ?: Map.of()) } else throw new IllegalStateException("Missing Nextflow session") From f4b90318a38feb13f8cc3c70002bcf45931e8212 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 20:35:58 +0100 Subject: [PATCH 11/15] Add CidStore factory Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 9 +--- .../nextflow/data/cid/CidStoreFactory.groovy | 49 +++++++++++++++++++ .../data/cid/DefaultCidStoreFactory.groovy | 38 ++++++++++++++ .../main/resources/META-INF/extensions.idx | 1 + 4 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index a7e7f79565..9e5352520b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -17,6 +17,7 @@ package nextflow import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.DefaultCidStore import nextflow.data.config.DataConfig @@ -407,17 +408,11 @@ class Session implements ISession { if(config.navigate('workflow.data')) { this.cidEnabled = true - this.cidStore = createCidStore(this) + this.cidStore = CidStoreFactory.create(DataConfig.create(this)) } } - protected static CidStore createCidStore(Session session){ - final store = new DefaultCidStore() - store.open(DataConfig.create(session)) - return store - } - protected Path cloudCachePath(Map cloudcache, Path workDir) { if( !cloudcache?.enabled ) return null diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy new file mode 100644 index 0000000000..f27e3f7602 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.config.DataConfig +import nextflow.plugin.Plugins +import org.pf4j.ExtensionPoint + +/** + * Factory for CidStore + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +abstract class CidStoreFactory implements ExtensionPoint { + + protected abstract CidStore newInstance(DataConfig config) + + static CidStore create(DataConfig config){ + final all = Plugins.getPriorityExtensions(CidStoreFactory) + if( !all ) + throw new IllegalStateException("Unable to find Nextflow CID store factory") + final factory = all.first() + log.debug "Using Nextflow CID store factory: ${factory.getClass().getName()}" + return factory.newInstance(config) + + + } + + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy new file mode 100644 index 0000000000..df8e9243a4 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -0,0 +1,38 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +import nextflow.plugin.Priority + +/** + * Default Factory for CidStore + * + * @author Jorge Ejarque + */ +@CompileStatic +@Priority(0) +class DefaultCidStoreFactory extends CidStoreFactory{ + + @Override + protected CidStore newInstance(DataConfig config) { + final cidStore = new DefaultCidStore() + cidStore.open(config) + return cidStore + } +} diff --git a/modules/nextflow/src/main/resources/META-INF/extensions.idx b/modules/nextflow/src/main/resources/META-INF/extensions.idx index 7fb037c37d..e7ba19b1ab 100644 --- a/modules/nextflow/src/main/resources/META-INF/extensions.idx +++ b/modules/nextflow/src/main/resources/META-INF/extensions.idx @@ -25,4 +25,5 @@ nextflow.mail.SimpleMailProvider nextflow.mail.JavaMailProvider nextflow.processor.tip.DefaultTaskTipProvider nextflow.fusion.FusionTokenDefault +nextflow.data.cid.DefaultCidStoreFactory From b89cdf147f5ba12807c3c655e635c64533813e59 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 20:49:54 +0100 Subject: [PATCH 12/15] fix cid paht hash validation Signed-off-by: jorgee --- .../main/groovy/nextflow/data/cid/fs/CidPath.groovy | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 4d70f5252c..98dadb65ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -80,6 +80,15 @@ class CidPath implements Path { this.filePath = filePath0(fs, storePath) } + private static void validateHash(Map cidObject) { + final hashedPath = Path.of(cidObject.path as String) + if( !hashedPath.exists() ) + throw new FileNotFoundException("Target path $cidObject.path does not exists.") + if( cidObject.checksum && CacheHelper.hasher(hashedPath).hash().toString() != cidObject.checksum ) { + log.warn("Checksum of $hashedPath does not match with the one stored in the metadata") + } + } + @TestOnly protected String getFilePath(){ this.filePath } @@ -101,12 +110,10 @@ class CidPath implements Path { final type = DataType.valueOf(cidObject.type as String) if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { // return the real path stored in the metadata + validateHash(cidObject) final realPath = Path.of(cidObject.path as String, childs) if( !realPath.exists() ) throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") - if( cidObject.checksum && CacheHelper.hasher(realPath).hash().toString() != cidObject.checksum ) { - log.warn("Checksum of $cidStorePath does not match with the one stored in the metadata") - } return realPath } } else { From cfb27d0d59a5d187bed5ca09f41d8d64f6d7844a Mon Sep 17 00:00:00 2001 From: "Rintze M. Zelle, PhD" Date: Fri, 28 Feb 2025 05:24:39 -0500 Subject: [PATCH 13/15] Typo fix in container.md (#5825) [ci skip] "paths are configure" > "paths are configured" Signed-off-by: Rintze M. Zelle, PhD --- docs/container.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container.md b/docs/container.md index ba8a563828..e9129a5beb 100644 --- a/docs/container.md +++ b/docs/container.md @@ -59,7 +59,7 @@ In the above example replace `/path/to/apptainer.img` with any Apptainer image o Read the {ref}`config-page` page to learn more about the `nextflow.config` file and how to use it to configure your pipeline execution. :::{note} -Unlike Docker, Nextflow does not automatically mount host paths in the container when using Apptainer. It expects that the paths are configure and mounted system wide by the Apptainer runtime. If your Apptainer installation allows user defined bind points, read the {ref}`Apptainer configuration ` section to learn how to enable Nextflow auto mounts. +Unlike Docker, Nextflow does not automatically mount host paths in the container when using Apptainer. It expects that the paths are configured and mounted system wide by the Apptainer runtime. If your Apptainer installation allows user defined bind points, read the {ref}`Apptainer configuration ` section to learn how to enable Nextflow auto mounts. ::: :::{warning} From cfeedd6f93271c1484716f5e068a7b42b38ba493 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Fri, 28 Feb 2025 18:30:01 +0100 Subject: [PATCH 14/15] Fix Google Batch autoRetryExitCodes bug (#5828) Signed-off-by: jorgee Signed-off-by: Ben Sherman Co-authored-by: Ben Sherman --- .../nextflow/cloud/google/batch/GoogleBatchTaskHandler.groovy | 2 +- .../cloud/google/batch/GoogleBatchTaskHandlerTest.groovy | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/nf-google/src/main/nextflow/cloud/google/batch/GoogleBatchTaskHandler.groovy b/plugins/nf-google/src/main/nextflow/cloud/google/batch/GoogleBatchTaskHandler.groovy index 442fea9a6a..adbdb7f4c9 100644 --- a/plugins/nf-google/src/main/nextflow/cloud/google/batch/GoogleBatchTaskHandler.groovy +++ b/plugins/nf-google/src/main/nextflow/cloud/google/batch/GoogleBatchTaskHandler.groovy @@ -268,7 +268,7 @@ class GoogleBatchTaskHandler extends TaskHandler implements FusionAwareTask { LifecyclePolicy.newBuilder() .setActionCondition( LifecyclePolicy.ActionCondition.newBuilder() - .addExitCodes(50001) + .addAllExitCodes(executor.config.autoRetryExitCodes) ) .setAction(LifecyclePolicy.Action.RETRY_TASK) ) diff --git a/plugins/nf-google/src/test/nextflow/cloud/google/batch/GoogleBatchTaskHandlerTest.groovy b/plugins/nf-google/src/test/nextflow/cloud/google/batch/GoogleBatchTaskHandlerTest.groovy index 37b27e0b5a..be9a6b0bb0 100644 --- a/plugins/nf-google/src/test/nextflow/cloud/google/batch/GoogleBatchTaskHandlerTest.groovy +++ b/plugins/nf-google/src/test/nextflow/cloud/google/batch/GoogleBatchTaskHandlerTest.groovy @@ -146,6 +146,7 @@ class GoogleBatchTaskHandlerTest extends Specification { getBootDiskImage() >> BOOT_IMAGE getCpuPlatform() >> CPU_PLATFORM getMaxSpotAttempts() >> 5 + getAutoRetryExitCodes() >> [50001,50002] getSpot() >> true getNetwork() >> 'net-1' getServiceAccountEmail() >> 'foo@bar.baz' @@ -198,7 +199,9 @@ class GoogleBatchTaskHandlerTest extends Specification { taskSpec.getMaxRunDuration().getSeconds() == TIMEOUT.seconds taskSpec.getVolumes(0).getMountPath() == '/tmp' taskSpec.getMaxRetryCount() == 5 + taskSpec.getLifecyclePolicies(0).getActionCondition().getExitCodesCount() == 2 taskSpec.getLifecyclePolicies(0).getActionCondition().getExitCodes(0) == 50001 + taskSpec.getLifecyclePolicies(0).getActionCondition().getExitCodes(1) == 50002 taskSpec.getLifecyclePolicies(0).getAction().toString() == 'RETRY_TASK' and: runnable.getContainer().getCommandsList().join(' ') == '/bin/bash -o pipefail -c bash .command.run' From c02fe91f4f48dc77d22936437810e4a21eebd1c8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 1 Mar 2025 04:18:23 -0600 Subject: [PATCH 15/15] Use parallelization in Gradle build (#5830) [ci fast] Signed-off-by: Ben Sherman --- gradle.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/gradle.properties b/gradle.properties index 1e989cc8ed..26137bfbae 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,3 @@ org.gradle.caching=true org.gradle.jvmargs=-Xmx4g +org.gradle.parallel=true