diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..e05919e12b --- /dev/null +++ b/.codespellrc @@ -0,0 +1,6 @@ +[codespell] +skip = .git,*.pdf,*.svg,timeline-expected.html,*.fq,*.min.js,ScriptDslTest.groovy +# some cases where we need to catch using regex +ignore-regex = \bhel\*|fo\\ +# some variables, names, etc to ignore +ignore-words-list = splitted,ois,tre,marge,smoot,afile,bams,bais,pre-pending,re-use diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000..8ed3888e2d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +[*] +charset = utf-8 +indent_size = 4 +indent_style = space +tab_width = 4 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c27a11929e..04d8fef5ae 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,6 +12,7 @@ on: - 'master' - 'test*' - 'dev*' + - 'STABLE-*' pull_request: types: [opened, reopened, synchronize] workflow_dispatch: @@ -24,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - java_version: [11, 17, 19] + java_version: [11, 17, 21] steps: - name: Environment @@ -38,7 +39,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v35 + uses: tj-actions/changed-files@v41 with: files_ignore: docs @@ -92,14 +93,14 @@ jobs: any_changed: ${{ steps.changed-files.outputs.any_changed }} test: - if: ${{ !contains(github.event.head_commit.message, '[ci fast]') }} + if: ${{ !contains(github.event.head_commit.message, '[ci fast]') && needs.build.outputs.any_changed == 'true' }} needs: build runs-on: ubuntu-latest strategy: fail-fast: false matrix: - java_version: [11, 19] - test_mode: ["test_integration", "test_aws", "test_azure", "test_google", "test_wave"] + java_version: [11, 21] + test_mode: ["test_integration", "test_docs", "test_aws", "test_azure", "test_google", "test_wave"] steps: - name: Checkout uses: actions/checkout@v3 diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 0000000000..7373affc38 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,22 @@ +--- +name: Codespell + +on: + push: + branches: [master] + pull_request: + branches: [master] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7cb26e6cc7..4259cff330 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,73 +1,40 @@ -# CONTRIBUTING TO NEXTFLOW +# Contributing to Nextflow -This guide documents the best way to make various types of contributions to Nextflow, -including what is required before submitting a code change. +This guide documents the various ways to contribute to Nextflow, including what is required before submitting a code change. -Contributing to Nextflow doesn't just mean writing code. Helping new users on the mailing list, -testing releases and bug fixes, and improving documentation are all essential and valuable contributions. In fact, proposing -significant code changes usually first requires gaining experience and credibility within the -community by helping in other ways. This is also a guide to becoming an effective contributor. +Contributing to Nextflow doesn't just mean writing code. Helping new users in the community, testing releases and bug fixes, and improving documentation are all essential and valuable contributions. Helping in these ways is an excellent way to become an effective contributor and gain credibility within the community, which makes it easier to make larger contributions like code changes and new features. +## Helping Other Users -## Contributing by Helping Other Users +A great way to contribute to Nextflow is to answer user questions on the [community forum](https://community.seqera.io) and the [Nextflow Slack](https://www.nextflow.io/slack-invite.html). Contributors should ideally be active members here and keep up with the latest developments in the Nextflow community. There are always many new Nextflow users, so taking a few minutes to help answer a question is a valuable community service and a great way to demonstrate your expertise. -A great way to contribute to Nextflow is to help answer user questions on the [discussion forum](https://github.com/nextflow-io/nextflow/discussions), -or the [Slack channel](https://www.nextflow.io/slack-invite.html). -There are always many new Nextflow users; taking a few minutes to help answer a question is a very valuable community service. +## Documentation Changes -Contributors should ideally subscribe to these channels and follow them in order to keep up to date -on what's happening in Nextflow. Answering questions is an excellent and visible way to help the -community and also demonstrates your expertise. +Propose changes to the [Nextflow documentation](https://nextflow.io/docs/latest/) by editing the source files in the [docs](https://github.com/nextflow-io/nextflow/tree/master/docs) directory. The `README.md` in that directory describes how to build and preview the docs locally. Finally, open a pull request with the proposed changes. +## Bug Reports -## Contributing Documentation Changes +Submitting a bug report is one of the simplest and most useful ways to contribute, as it helps us to quickly identify and fix issues and thereby make Nextflow more stable. -To propose a change to release documentation (that is, the docs that appear under http://docs.nextflow.io), -edit the documentation source files in Nextflow's [docs/](https://github.com/nextflow-io/nextflow/tree/master/docs) -directory, whose README file shows how to build the documentation locally to test your changes. +Report a bug using the **New issue** button on the [issues page](https://github.com/nextflow-io/nextflow/issues). A good bug report should include a minimal test case that can replicate the reported bug. Please follow the instructions in the issue template when submitting a bug report. -Then open a pull request with the proposed changes. +## Bug Fixes +Contributing bug fixes is the best way to gain experience with the Nextflow codebase and credibility within the community as a project contributor. -## Contributing Bug Reports +If you are new to the Nextflow codebase and want to get involved, check out issues marked as [`help wanted`](https://github.com/nextflow-io/nextflow/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) or [`good first issue`](https://github.com/nextflow-io/nextflow/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+). Feel free to ask for help if you get stuck while trying to implement a fix! -Filling a bug report is likely the simplest and most useful way to contribute to the project. -It helps us to identify issues and provide patches and therefore to make Nextflow more stable -and useful. +## New Features -Report a bug using the "New issue" button in the -[issues page](https://github.com/nextflow-io/nextflow/issues) of this project. +Before contributing a new feature, please submit a new feature proposal on the [issues page](https://github.com/nextflow-io/nextflow/issues) and discuss it with the community. -A good bug report should include a minimal executable test case able to replicate the reported bug. +Submitting a proposal helps identify possible overlaps with other planned features and avoid potential misunderstandings, conflicts, and wasted effort. -Follow the instructions in the bug [report template](https://github.com/nextflow-io/nextflow/blob/master/.github/issue_template.md) that is shown when filling the bug report out. +## Code Changes -## Contributing Bug Fixes +When submitting a contribution, you will be required to sign a [Developer Certificate of Origin (DCO)](https://developercertificate.org/) to certify that you are the author of the source code or otherwise have the right to submit it to the project. -Contributing bug fixes is the best way to gain experience and credibility within the community -and also to become an effective project contributor. - -If you are a novice with the Nextflow code base, start by looking at issues marked -with the [help wanted](https://github.com/nextflow-io/nextflow/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) -label. - -If you have doubts on how to fix an issue, ask for help from senior contributors commenting -in the issue page. - -## Contributing New Features - -Before contributing a new feature, submit a new feature proposal in the -[issues page](https://github.com/nextflow-io/nextflow/issues) of the project and discuss it -with the community. - -This is important to identify possible overlaps with other planned features and avoid misunderstandings and conflicts. - -## Contributing Code Changes - -When submitting a contribution, you will be required to sign a [Developer Certificate of Origin (DCO)](https://developercertificate.org/) to certify that you are the author of the source code or otherwise you have the right to submit it to the project. - -Contributor signatures are provided by adding a `Signed-off-by` line to the commit message -as shown below, or by using the `-s` option for the [git commit command](https://help.github.com/articles/signing-commits/). +Contributor signatures are provided by adding a `Signed-off-by` line to the commit message as shown below, or by using the `-s` option with [`git commit`](https://help.github.com/articles/signing-commits/). For example: ``` This is my commit message @@ -77,29 +44,4 @@ Signed-off-by: Random J Developer The process is automatically managed by the [Probot](https://probot.github.io/apps/dco/) app for GitHub. - -## IDE settings - -The suggested development environment is [IntelliJ IDEA](https://www.jetbrains.com/idea/download/). See the [README](https://github.com/nextflow-io/nextflow/#intellij-idea) for a short primer on how to import -and configure Nextflow to work with it. - -Nextflow does not impose a strict code formatting style, however the following settings should be applied: - -* Use spaces for indentation -* Tab size: 4 -* Indent: 4 -* Use single class import -* Class count to use import with `*`: 99 -* Names count to use static import with `*`: 99 -* Imports layout: - * \ - * `import org.junit.*` - * `import spock.lang.*` - * \ - * `import java.*` - * `import javax.*` - * \ - * *all other imports* - * *all other static imports* - -New files must include the appropriate license header boilerplate and the author name(s) and contact email(s) ([see for example](https://github.com/nextflow-io/nextflow/blob/e8945e8b6fc355d3f2eec793d8f288515db2f409/modules/nextflow/src/main/groovy/nextflow/Const.groovy#L1-L15)). +For more information about working on the Nextflow source code, visit the [Nextflow docs](https://nextflow.io/docs/latest/developer/). diff --git a/Makefile b/Makefile index 6d21ea1548..bf55192bd3 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,7 @@ clean: rm -rf modules/nextflow/.nextflow* rm -rf modules/nextflow/work rm -rf build + rm -rf buildSrc/build rm -rf modules/*/build rm -rf plugins/*/build ./gradlew clean @@ -42,7 +43,7 @@ compile: @echo "DONE `date`" assemble: - ./gradlew compile assemble + ./gradlew buildInfo compile assemble check: ./gradlew check diff --git a/README.md b/README.md index 4f17f1c934..ed754aa081 100644 --- a/README.md +++ b/README.md @@ -1,288 +1,66 @@ -![Nextflow logo](https://github.com/nextflow-io/trademark/blob/master/nextflow2014_no-bg.png) + + + + Nextflow Logo + *"Dataflow variables are spectacularly expressive in concurrent programming"*
[Henri E. Bal , Jennifer G. Steiner , Andrew S. Tanenbaum](https://dl.acm.org/doi/abs/10.1145/72551.72552) +[![Nextflow CI](https://github.com/nextflow-io/nextflow/workflows/Nextflow%20CI/badge.svg)](https://github.com/nextflow-io/nextflow/actions/workflows/build.yml?query=branch%3Amaster+event%3Apush) +[![Nextflow version](https://img.shields.io/github/release/nextflow-io/nextflow.svg?colorB=58bd9f&style=popout)](https://github.com/nextflow-io/nextflow/releases/latest) +[![Nextflow Twitter](https://img.shields.io/twitter/url/https/nextflowio.svg?colorB=58bd9f&&label=%40nextflow&style=popout)](https://twitter.com/nextflowio) +[![Nextflow Publication](https://img.shields.io/badge/Published-Nature%20Biotechnology-26af64.svg?colorB=58bd9f&style=popout)](https://www.nature.com/articles/nbt.3820) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?colorB=58bd9f&style=popout)](http://bioconda.github.io/recipes/nextflow/README.html) +[![Nextflow license](https://img.shields.io/github/license/nextflow-io/nextflow.svg?colorB=58bd9f&style=popout)](https://github.com/nextflow-io/nextflow/blob/master/COPYING) -![Nextflow CI](https://github.com/nextflow-io/nextflow/workflows/Nextflow%20CI/badge.svg) -[![Nextflow version](https://img.shields.io/github/release/nextflow-io/nextflow.svg?colorB=26af64&style=popout)](https://github.com/nextflow-io/nextflow/releases/latest) -[![Nextflow Twitter](https://img.shields.io/twitter/url/https/nextflowio.svg?colorB=26af64&&label=%40nextflow&style=popout)](https://twitter.com/nextflowio) -[![Nextflow Publication](https://img.shields.io/badge/Published-Nature%20Biotechnology-26af64.svg?colorB=26af64&style=popout)](https://www.nature.com/articles/nbt.3820) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?colorB=26af64&style=popout)](http://bioconda.github.io/recipes/nextflow/README.html) -[![Nextflow license](https://img.shields.io/github/license/nextflow-io/nextflow.svg?colorB=26af64&style=popout)](https://github.com/nextflow-io/nextflow/blob/master/COPYING) +Nextflow is a workflow system for creating scalable, portable, and reproducible workflows. It is based on the dataflow programming model, which greatly simplifies the writing of parallel and distributed pipelines, allowing you to focus on the flow of data and computation. Nextflow can deploy workflows on a variety of execution platforms, including your local machine, HPC schedulers, AWS Batch, Azure Batch, Google Cloud Batch, and Kubernetes. Additionally, it supports many ways to manage your software dependencies, including Conda, Spack, Docker, Podman, Singularity, and more. -Quick overview -============== -Nextflow is a bioinformatics workflow manager that enables the development of portable and reproducible workflows. -It supports deploying workflows on a variety of execution platforms including local, HPC schedulers, AWS Batch, -Google Cloud Life Sciences, and Kubernetes. Additionally, it provides support for manage your workflow dependencies -through built-in support for Conda, Spack, Docker, Podman, Singularity, Modules, and more. +## Quick start -## Contents -- [Rationale](#rationale) -- [Quick start](#quick-start) -- [Documentation](#documentation) -- [Tool Management](#tool-management) - - [Conda environments](#conda-environments) - - [Spack environments](#spack-environments) - - [Docker, Podman and Singularity](#containers) - - [Environment Modules](#environment-modules) -- [HPC Schedulers](#hpc-schedulers) - - [SGE](#hpc-schedulers) - - [Univa Grid Engine](#hpc-schedulers) - - [LSF](#hpc-schedulers) - - [SLURM](#hpc-schedulers) - - [PBS/Torque](#hpc-schedulers) - - [HyperQueue (experimental)](#hpc-schedulers) - - [HTCondor (experimental)](#hpc-schedulers) - - [Moab](#hpc-schedulers) -- [Cloud Support](#cloud-support) - - [AWS Batch](#cloud-support) - - [Google Cloud Batch](#cloud-support) - - [Google Life Sciences](#cloud-support) - - [Kubernetes](#cloud-support) -- [Community](#community) -- [Build from source](#build-from-source) -- [Contributing](#contributing) -- [License](#license) -- [Citations](#citations) -- [Credits](#credits) - - -Rationale -========= - -With the rise of big data, techniques to analyse and run experiments on large datasets are increasingly necessary. - -Parallelization and distributed computing are the best ways to tackle this problem, but the tools commonly available to the bioinformatics community often lack good support for these techniques, or provide a model that fits badly with the specific requirements in the bioinformatics domain and, most of the time, require the knowledge of complex tools or low-level APIs. - -Nextflow framework is based on the dataflow programming model, which greatly simplifies writing parallel and distributed pipelines without adding unnecessary complexity and letting you concentrate on the flow of data, i.e. the functional logic of the application/algorithm. - -It doesn't aim to be another pipeline scripting language yet, but it is built around the idea that the Linux platform is the *lingua franca* of data science, since it provides many simple command line and scripting tools, which by themselves are powerful, but when chained together facilitate complex data manipulations. - -In practice, this means that a Nextflow script is defined by composing many different processes. Each process can execute a given bioinformatics tool or scripting language, to which is added the ability to coordinate and synchronize the processes execution by simply specifying their inputs and outputs. - - - -Quick start -============ - -Download the package --------------------- - -Nextflow does not require any installation procedure, just download the distribution package by copying and pasting -this command in your terminal: - -``` -curl -fsSL https://get.nextflow.io | bash -``` - -It creates the ``nextflow`` executable file in the current directory. You may want to move it to a folder accessible from your ``$PATH``. - -Download from Conda -------------------- - -Nextflow can also be installed from Bioconda - -``` -conda install -c bioconda nextflow -``` - -Documentation -============= - -Nextflow documentation is available at this link http://docs.nextflow.io - - -HPC Schedulers -============== - -*Nextflow* supports common HPC schedulers, abstracting the submission of jobs from the user. - -Currently the following clusters are supported: - - + [SGE](https://www.nextflow.io/docs/latest/executor.html#sge) - + [Univa Grid Engine](https://www.nextflow.io/docs/latest/executor.html#sge) - + [LSF](https://www.nextflow.io/docs/latest/executor.html#lsf) - + [SLURM](https://www.nextflow.io/docs/latest/executor.html#slurm) - + [PBS/Torque](https://www.nextflow.io/docs/latest/executor.html#pbs-torque) - + [Flux](https://www.nextflow.io/docs/latest/executor.html#flux) - + [HyperQueue (beta)](https://www.nextflow.io/docs/latest/executor.html#hyperqueue) - + [HTCondor (beta)](https://www.nextflow.io/docs/latest/executor.html#htcondor) - + [Moab](https://www.nextflow.io/docs/latest/executor.html#moab) - -For example to submit the execution to a SGE cluster create a file named `nextflow.config`, in the directory -where the pipeline is going to be launched, with the following content: - -```nextflow -process { - executor='sge' - queue='' -} -``` - -In doing that, processes will be executed by Nextflow as SGE jobs using the `qsub` command. Your -pipeline will behave like any other SGE job script, with the benefit that *Nextflow* will -automatically and transparently manage the processes synchronisation, file(s) staging/un-staging, etc. - - -Cloud support -============= -*Nextflow* also supports running workflows across various clouds and cloud technologies. Managed solutions from major -cloud providers are also supported through AWS Batch, Azure Batch and Google Cloud compute services. -Additionally, *Nextflow* can run workflows on either on-prem or managed cloud Kubernetes clusters. - -Currently supported cloud platforms: - + [AWS Batch](https://www.nextflow.io/docs/latest/awscloud.html#aws-batch) - + [Azure Batch](https://azure.microsoft.com/en-us/services/batch/) - + [Google Cloud Batch](https://cloud.google.com/batch) - + [Google Cloud Life Sciences](https://cloud.google.com/life-sciences) - + [Kubernetes](https://www.nextflow.io/docs/latest/kubernetes.html) - - - -Tool management -================ - -Containers ----------------- - -*Nextflow* has first class support for containerization. It supports both [Docker](https://www.nextflow.io/docs/latest/docker.html) and [Singularity](https://www.nextflow.io/docs/latest/singularity.html) container engines. Additionally, *Nextflow* can easily switch between container engines enabling workflow portability. - -```nextflow -process samtools { - container 'biocontainers/samtools:1.3.1' - - """ - samtools --version - """ - -} -``` - -Conda environments ------------------- - -[Conda environments](https://www.nextflow.io/docs/latest/conda.html) provide another option for managing software packages in your workflow. - - -Spack environments ------------------- - -[Spack environments](https://www.nextflow.io/docs/latest/spack.html) provide an option to build software packages from source using Spack, a popular package manager within the HPC community. - - -Environment Modules -------- - -[Environment modules](https://www.nextflow.io/docs/latest/process.html#module) commonly found in HPC environments can also be used to manage the tools used in a *Nextflow* workflow. - - -Community -========= - -You can post questions, or report problems by using the Nextflow [discussions](https://github.com/nextflow-io/nextflow/discussions) -or the Nextflow [Slack community chat](https://www.nextflow.io/slack-invite.html). - -*Nextflow* also hosts a yearly workshop showcasing researcher's workflows and advancements in the langauge. Talks from the past workshops are available on the [Nextflow YouTube Channel](https://www.youtube.com/channel/UCB-5LCKLdTKVn2F4V4KlPbQ) - -The [nf-core](https://nf-co.re/) project is a community effort aggregating high quality *Nextflow* workflows which can be used by the community. - - -Build from source -================= - -Required dependencies ---------------------- - -* Compiler Java 11 or later -* Runtime Java 11 or later - -Build from source ------------------ - -*Nextflow* is written in [Groovy](http://groovy-lang.org) (a scripting language for the JVM). A pre-compiled, -ready-to-run, package is available at the [Github releases page](https://github.com/nextflow-io/nextflow/releases), -thus it is not necessary to compile it in order to use it. - -If you are interested in modifying the source code, or contributing to the project, it worth knowing that -the build process is based on the [Gradle](http://www.gradle.org/) build automation system. - -You can compile *Nextflow* by typing the following command in the project home directory on your computer: +Install Nextflow with a single command: ```bash -make compile +curl -fsSL https://get.nextflow.io | bash ``` -The very first time you run it, it will automatically download all the libraries required by the build process. -It may take some minutes to complete. - -When complete, execute the program by using the `launch.sh` script in the project directory. +It creates the `nextflow` executable file in the current directory. You can then move it to a directory in your `$PATH` to run it from anywhere. -The self-contained runnable Nextflow packages can be created by using the following command: +Nextflow can also be installed from Bioconda: ```bash -make pack -``` - -Once compiled use the script `./launch.sh` as a replacement for the usual `nextflow` command. - -The compiled packages can be locally installed using the following command: - -```bash -make install +conda install -c bioconda nextflow ``` -A self-contained distribution can be created with the command: `make pack`. To include support of GA4GH and its dependencies in the binary, use `make packGA4GH` instead. - - -IntelliJ IDEA ---------------- - -Nextflow development with [IntelliJ IDEA](https://www.jetbrains.com/idea/) requires a recent version of the IDE (2019.1.2 or later). - -If you have it installed in your computer, follow the steps below in order to use it with Nextflow: - -1. Clone the Nextflow repository to a directory in your computer. -2. Open IntelliJ IDEA and choose "New > Project from Existing Sources..." in the "File" menu bar. -3. Select the Nextflow project root directory in your computer and click "OK". -4. Then, choose the "Gradle" item in the "Import project from external model" list and click on "Finish" button to finalize the import. -5. When the import process completes, select the "Project structure" command in the "File" menu bar. -6. In the showed dialog click on the "Project" item in the list of the left, and make sure that - the "Project SDK" choice on the right contains Java 11 (or later, up to 18). -7. Set the code formatting options with settings provided [here](https://github.com/nextflow-io/nextflow/blob/master/CONTRIBUTING.md#ide-settings). +## Documentation +The Nextflow documentation is available for the latest [stable](https://nextflow.io/docs/latest/) and [edge](https://nextflow.io/docs/edge/) releases. +## Community -Contributing -============ +You can post questions and get help in the [Nextflow community forum](https://community.seqera.io) or the [Nextflow Slack](https://www.nextflow.io/slack-invite.html). Bugs and feature requests should be reported as [GitHub issues](https://github.com/nextflow-io/nextflow/issues/new/choose). -Project contribution are more than welcome. See the [CONTRIBUTING](CONTRIBUTING.md) file for details. +The Nextflow community is highly active with regular community meetings, events, a podcast and more. You can view much of this material on the [Nextflow](https://www.youtube.com/@Nextflow) and [nf-core](https://www.youtube.com/@nf-core) YouTube channels. +The [nf-core](https://nf-co.re/) project is a community effort aggregating high quality Nextflow workflows which can be used by everyone. -Build servers -============= +## Contributing - * [Travis-CI](https://travis-ci.org/nextflow-io/nextflow) - * [GitHub Actions](https://github.com/nextflow-io/nextflow/actions) +Contributions are more than welcome. See the [CONTRIBUTING](CONTRIBUTING.md) file for details. -License -======= +## License -The *Nextflow* framework is released under the Apache 2.0 license. +Nextflow is released under the Apache 2.0 license. Nextflow is a [registered trademark](https://github.com/nextflow-io/trademark). -Citations -========= +## Citations -If you use Nextflow in your research, please cite: +If you use Nextflow in your work, please cite: P. Di Tommaso, et al. Nextflow enables reproducible computational workflows. Nature Biotechnology 35, 316–319 (2017) doi:[10.1038/nbt.3820](http://www.nature.com/nbt/journal/v35/n4/full/nbt.3820.html) -Credits -======= +## Credits -Nextflow is built on two great pieces of open source software, namely Groovy -and Gpars. +Nextflow is built on two \*great* open-source software projects, Groovy +and GPars. -YourKit is kindly supporting this open source project with its full-featured Java Profiler. -Read more http://www.yourkit.com +YourKit is kindly supporting Nextflow with its fully-featured Java Profiler. diff --git a/VERSION b/VERSION index 28d5238c54..b83786f853 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -23.06.0-edge \ No newline at end of file +23.12.0-edge diff --git a/build.gradle b/build.gradle index 82c8422e3d..0c13c17f78 100644 --- a/build.gradle +++ b/build.gradle @@ -24,7 +24,7 @@ plugins { def groovyVer = System.getenv('CI_GROOVY_VERSION') if (groovyVer) { def repo = groovyVer.startsWith('com.github.apache:') ? 'https://jitpack.io' : 'https://oss.jfrog.org/oss-snapshot-local/' - logger.lifecycle "Overriden Groovy dependency to use $groovyVer - repository: $repo" + logger.lifecycle "Overridden Groovy dependency to use $groovyVer - repository: $repo" allprojects { repositories { maven { url repo } @@ -48,8 +48,15 @@ def projects(String...args) { args.collect {project(it)} } +String gitVersion() { + def p = new ProcessBuilder() .command('sh','-c','git rev-parse --short HEAD') .start() + def r = p.waitFor() + return r==0 ? p.text.trim() : '(unknown)' +} + group = 'io.nextflow' version = rootProject.file('VERSION').text.trim() +ext.commitId = gitVersion() allprojects { apply plugin: 'java' @@ -82,6 +89,8 @@ allprojects { mavenCentral() maven { url 'https://repo.eclipse.org/content/groups/releases' } maven { url 'https://oss.sonatype.org/content/repositories/snapshots' } + maven { url = "https://s3-eu-west-1.amazonaws.com/maven.seqera.io/releases" } + maven { url = "https://s3-eu-west-1.amazonaws.com/maven.seqera.io/snapshots" } } configurations { @@ -100,8 +109,8 @@ allprojects { // Documentation required libraries groovyDoc 'org.fusesource.jansi:jansi:1.11' - groovyDoc "org.codehaus.groovy:groovy-groovydoc:3.0.18" - groovyDoc "org.codehaus.groovy:groovy-ant:3.0.18" + groovyDoc "org.codehaus.groovy:groovy-groovydoc:3.0.19" + groovyDoc "org.codehaus.groovy:groovy-ant:3.0.19" } test { @@ -188,20 +197,23 @@ jar.enabled = false */ task buildInfo { doLast { - def file0 = file('./modules/nf-commons/src/main/nextflow/Const.groovy') + def file0 = file('modules/nextflow/src/main/resources/META-INF/build-info.properties') def buildNum = 0 def src = file0.text - src.find(/APP_BUILDNUM *= *([0-9]*)/) { buildNum = it[1]?.toInteger()+1 } - src = src.replaceAll('APP_VER *= *"[0-9a-zA-Z_\\-\\.]+"', "APP_VER = \"${version}\"" as String) - src = src.replaceAll('APP_TIMESTAMP *= *[0-9]*', "APP_TIMESTAMP = ${System.currentTimeMillis()}" as String) - if( buildNum ) { - src = src.replaceAll('APP_BUILDNUM *= *[0-9]*', "APP_BUILDNUM = ${buildNum}" as String) - } - else { + src.find(/build *= *([0-9]*)/) { buildNum = it[1]?.toInteger()+1 } + if( !buildNum ) { println "WARN: Unable to find current build number" } file0.text = src + // -- update build-info file + file0.text = """\ + build=${buildNum} + version=${version} + timestamp=${System.currentTimeMillis()} + commitId=${project.property('commitId')} + """.stripIndent() + // -- update 'nextflow' wrapper file0 = file('nextflow') src = file0.text @@ -236,7 +248,6 @@ task buildInfo { doLast { * Compile sources and copies all libs to target directory */ task compile { - dependsOn buildInfo dependsOn allprojects.classes } @@ -297,12 +308,12 @@ configure(coreProjects) { apply plugin: 'signing' task javadocJar(type: Jar) { - classifier = 'javadoc' + archiveClassifier = 'javadoc' from configurations.groovyDoc } task sourcesJar(type: Jar) { - classifier = 'sources' + archiveClassifier = 'sources' from sourceSets.main.allSource } diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index c6e9e2491e..fa35d4112b 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -1,6 +1,6 @@ plugins { id 'groovy-gradle-plugin' - id "com.gradle.plugin-publish" version "0.12.0" + id "com.gradle.plugin-publish" version "1.2.1" } repositories { @@ -16,24 +16,17 @@ dependencies { } gradlePlugin { - plugins { - nextflowPlugin { - id = 'io.nextflow.nf-build-plugin' - implementationClass = 'io.nextflow.gradle.NextflowBuildPlugin' - } - } -} - -pluginBundle { website = 'https://www.nextflow.io/' vcsUrl = 'https://github.com/nextflow-io/nextflow' description = 'Helper plugin for Nextflow project build' - tags = ['nextflow'] - + plugins { - nextflowPlugin { + create("nextflowPlugin") { + id = 'io.nextflow.nf-build-plugin' + implementationClass = 'io.nextflow.gradle.NextflowBuildPlugin' // id is captured from java-gradle-plugin configuration displayName = 'Nextflow Build plugin' } } } + diff --git a/changelog.txt b/changelog.txt index 2518538584..5eafa10eff 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,11 +1,362 @@ NEXTFLOW CHANGE-LOG =================== +23.10.1 - 12 Jan 2023 +- Fix bug with Fusion symlink resolution (#4593) [f28c9e48] +- Fix Fusion symlinks when publishing files (#4348) [1fa5878a] +- Fix Inspect command fails with Singularity [25883df3] +- Fix Allow the use of error built-in function in onComplete handler (#4458) [ci fast] [4be10cd3] +- Fix Harden regular expression to used to strip secrets in logs (#4563) [ci fast] [0102d4d6] +- Fix custom notification template [40980bcb] +- Fix container environment with special chars (#4594) [f4e00601] +- Fix AZURE_STORAGE_SAS_TOKEN environment variable (#4627) [2e1cb413] +- Fix azure retry policy (#4638) [2bc3cf0e] +- Fix BitBucket get source API with custom branch [5ef75e32] +- Improve error details for AbortOperationException [9e795a62] +- Bump nf-wave@1.0.1 [206f2614] +- Bump nf-azure@1.3.3 [50bcad59] + +23.04.5 - 12 Jan 2023 +- Fix container environment with special chars (#4594) [663b2936] + +23.12.0-edge - 20 Dec 2023 +- Add AWS_SESSION_TOKEN to Fusion environment (#4581) [552f29b0] +- Add ability to disable Cloudinfo service (#4606) [f7251895] +- Add experimental support for Fargate compute type for AWS Batch (#3474) [47cf335b] +- Add support for Instance template to Google Batch [df7ed294] +- Add support for Singularity/Apptainer auto pull mode for OCI containers [b7f1a192] +- Fix BitBucket get source API with custom branch [58937831] +- Fix Fusion tags documentation (#4551) [687e2e96] +- Fix Harden regular expression to used to strip secrets in logs (#4563) [832bff24] +- Fix bug with Fusion symlink resolution (#4593) [09e85582] +- Fix container environment with special chars (#4594) [e0fe952f] +- Fix custom notification template [ccf4f59e] +- Fix fusion symlink test (#4604) [681ace86] +- Fix smoke tests [d3c2f330] +- Improve GLS tests [58590b1c] +- Load nf-amazon when AWS SES is enabled [887f06f4] +- Move build num & timestamp to BuildInfo class [ec8083d4] +- Move app version to BuildInfo class [c7d749e8] +- Remove deprecated Wave observer [0e009ef7] +- Remove undocumented userEmulation mode (#4596) [f6c79788] +- Remove unused DSL2 check [e9ee3b2c] +- Replace each iterator with class for [f7662e68] +- Bump nf-amazon@2.3.0 [0b40b7b9] +- Bump nf-google@1.10.0 [bcb20fcf] +- Bump nf-wave@1.2.0 [aa981814] +- Bump wave-utils@0.8.1 and micromamba@1.5.5 [9cb50035] +- Build optimizations (#4579) [5ad41e44] + +23.11.0-edge - 24 Nov 2023 +- Add `fusion.cacheSize` config option (#4518) [2faadc22] +- Add Topic channel type (experimental) (#4459) [921313d1] +- Add Google Batch native retry on spot termination (#4500) [ea1c1b70] +- Add Retry policy to Google Storage (#4524) [c271bb18] +- Add ability detect Google Batch spot interruption (#4462) [d49f02ae] +- Add doc tests, move some snippets to separate files (#3959) [0ff3b305] +- Add docs section on container requirements (#4501) [3fb29f78] +- Add labels field in Job request for Google Batch (#4538) [627c595e] +- Add note about limitations of glacier auto retrieval (#4514) [82e56799] +- Add note about local executor and enforcing resource limits (#4468) [6a0626f7] +- Add section about sharing modules (#4482) [3e66fba4] +- Add section on process directives to plugin docs (#4477) [d9ee9870] +- Add support for Azure low-priority pool (#4527) [8320ea10] +- Add support for FUSION_AWS_REGION (#4481) [8f8b09fa] +- Add support for Fusion when using Singularity OCI mode (#4508) [4f3aa631] +- Add support for K8s schedulerName pod spec (#4485) [dfc7b7c8] +- Add support for Singularity OCI mode (#4440) [f5362a7b] +- Allow the use of error built-in function in onComplete handler (#4458) [35a4424b] +- Fix Bug in JsonSplitter ordering [8ec14dd2] +- Fix Bypass Google Batch Price query if task cpus and memory are defined (#4521) [7f8f20d3] +- Fix Checkout remote tag if checkout remote branch fails (#4247) [b8907ccb] +- Fix Fusion symlinks when publishing files (#4348) [89f09fe0] +- Fix Inspect command fails with Singularity [f5bb829f] +- Fix ParamsMap copyWith param aliases (#4188) [b480ee0e] +- Fix Singularity docs [e952299f] +- Fix container hashing for Singularity + Wave containers [4c6f2e85] +- Fix detection of Conda local path made by Wave client (#4532) [4d5bc216] +- Fix doc tests to fail on test failure (#4505) [4d326551] +- Fix errors when NXF_HOME contains spaces (#4456) [fe5bea99] +- Fix Google Batch network/subnetwork docs (#4475) [27d132f3] +- Fix rounding error with long durations (#4496) [0356178b] +- Fix security vulnerabilities (#4513) [a310c777] +- Fix Use consistently NXF_TASK_WORKDIR (#4484) [48ee3c64] +- Improve error details for AbortOperationException [35609cb0] +- Improve operator docs (#4502) [38210e11] +- Makefile clean to also remove buildSrc/build (#4517) [2ccb05d0] +- Minor test improvements [171831ea] +- Minor types improvement for mix operator [91c1ab15] +- Normalise channel docs [b641d677] +- Remove deprecated TowerArchiver feature [ff8e06a3] +- Remove dsl1 deprecated code (part 2) [159effb1] +- Remove dsl1 deprecated code [2b433a52] +- Remove incorrect note about workflow inputs (#4509) [54bc0b7d] +- Return error if plugin version is not specified in offline mode (#4487) [f5d7246e] +- Update README.md with new branding color for Nextflow (#4412) [7a13b18b] +- Update background color of docs status badges (#4411) [3cb1c53c] +- Update logging filter for Google Batch provider. (#4488) [66a3ed19] +- Bump Gradle 8.4 and test vs Java 21 (#4450) [8cb2702c] +- Bump nf-amazon@2.2.0 [8e2d7879] +- Bump nf-azure@1.4.0 [7c47d090] +- Bump nf-cloudcache@0.3.1 [65240b75] +- Bump nf-codecommit@0.1.6 [725f0510] +- Bump nf-console@1.0.7 [a307686c] +- Bump nf-ga4gh@1.1.1 [e54ea007] +- Bump nf-google@1.9.0 [033ec92c] +- Bump nf-tower@1.7.0 [836a44a5] +- Bump nf-wave@1.1.0 [620523ef] + +23.10.0 - 15 Oct 2023 +- Add support for K8s hostPath [10c32325] +- Add AWS SES docs [b83e7148] +- Add -with-cloudcache instead of -cloudcache [ef530263] +- Add inspect command validation tests [882f369e] +- Improve warning complaining about tuple but mentioning set (#4400) [0ef48735] +- Improve `-dump-hashes` output adding json format (#4369) [5bdaac94] +- Improve error message on invalid file url prefix [82a3f405] +- Fix conda channels order [6672c6d7] +- Docs: Incorporate DSL2 features (#3793) [efd041cb] +- Docs: aws.batch.retrymode = 'built-in' (#4229) [d5a8098f] +- Docs: Update logos, fonts, and colors (#4407) [7814822c] +- Changelog minor changes [dfb5bf09] +- Bump nf-wave@1.0.0 [795849d7] + +23.09.3-edge - 10 Oct 2023 +- Add -cloudcache CLI option (#4385) [73fda582] +- Add bioconda and seqera Conda default channels (#4359) [ff012dcd] +- Add codespell: config, workflow and have typos fixed (#4324) [e044b7a5] +- Add docs page on caching and resuming (#4371) [21190a3c] +- Add example of using the transpose operator with multiple items in element (#4364) [0bce2be0] +- Add setting to enable the use of sync command [f0d5cc5c] +- Improve S3 endpoint validation [2b9ae6aa] +- Improve Wave config validation [7d5a21b0] +- Improve Wave error handling [d47e8b07] +- Fix Google Batch do not stop running jobs (#4381) [3d6b7358] +- Incorporate README content into Nextflow docs (#4263) [74a0f998] +- Never say (nearly) Oops again (#4356) [4a39542b] +- Remove unsupported K8s device capability [be0cf0c2] +- Remove unused -dsl2 option [3cb91a15] +- Rename Microsoft AAD to Microsoft Entra (#4362) [5e3f2c0a] +- Return -1 when exitcode file is empty (#4354) [d26c42be] +- Support for Fusion unprivileged execution (#4387) [035e6e7b] +- Use HistoryFile.Record instead of Record type conflic [11c3aac5] +- Bump nf-amazon@2.1.4 [a84208a8] +- Bump nf-cloudcache@0.3.0 [b37b4014] +- Bump nf-google@1.8.3 [b4eb8b96] +- Bump nf-tower@1.6.3 [5681ab13] +- Bump nf-wave@0.14.0 [de1de6c7] + +23.09.2-edge - 28 Sep 2023 +- Add conda.enabled to conda config scope docs (#4320) [0b74c4e2] +- Add developer docs (#4065) [06843d87] +- Add fs stat command [a79056b7] +- Add more tests to container name [da7a1942] +- Add procps by default to Conda-based Wave builds [66b2d2d2] +- Add support for Java 21 (#4338) [ac1fc9ee] +- Add Mermaid diagram in HTML DAG (#4337) [0f3e263f] +- Default Conda basePackages to "conda-forge::procps-ng" [367af52f] +- Disable Tower container field for multiple images [b53936b0] +- Document `NXF_DISABLE_CHECK_LATEST` environment var (#4327) [4e3e9aca] +- Document `fs` CLI command (#4328) [1a680495] +- Document source of `vol_ctxt` and `inv_ctxt` trace metrics (#4333) [6525b0a1] +- Document when process directives are evaluated (#4217) [731a29a2] +- Fix List of S3 bucket for custom endpoint [4327fa58] +- Fix Prevent false positive resumable task [144e0a8a] +- Fix Prevent multi attempts to retrieve AWS creds [b30efe36] +- Fix allow_other vulnerability preventing google-batch submissions (#4332) [9b3741e3] +- Fix fs list command [29ab2f24] +- Fix minor typos in changelogs/source code (#4319) [4ce9f1df] +- Fix support for S3 custom endpoint with anonymous access [03752815] +- Fix use of GITHUB_TOKEN variable to access GitHub repo [afe3dc4f] +- Fix Wave does not support 'null' container engine [f3eba3d7] +- Fix Retry TimeoutException in azure file system (#4295) [79248355] +- Improve K8s unit tests (#4196) [5a43a32b] +- Improve Mermaid DAG rendering (#4070) [19587f40] +- Improve error report when exception is thrown in operator ctx [16f54a9c] +- Improve wave container name validation [73eb5a02] +- Minor improvements [fff3d7bd] +- Prevent cache invalidation for task directives (#4339) [aabb6c19] +- Remove line endings from container box id (#4334) [df41e54a] +- Bump Apache 2.0 license to tower client [e4a878b6] +- Bump nf-amazon@2.1.3 [3311172f] +- Bump nf-azure@1.3.2 [a9b735ce] +- Bump nf-google@1.8.2 [7219d7f2] +- Bump nf-tower@1.6.2 [91a935f9] +- Bump nf-wave@0.13.0 [0cf2476c] +- Bump wave-utils@0.7.8 [d0c47d49] + +23.04.4 - 25 Sep 2023 +- Fix use of GITHUB_TOKEN variable to access GitHub repo [108c6b55] +- Fix allow_other vulnerability preventing google-batch submissions (#4332) [4895d547] +- Fix Prevent false positive resumable task [aae87715] +- Fix Always emit publish event for cached task outputs (#4227) [c4cd53c2] +- Fix Too long Http connection pool timeout [ce5e9930] +- Bump nf-google@1.7.3-patch1 [d881728c] + +23.09.1-edge - 11 Sep 2023 +- Revert "Allow setting shell directive when using the trace file (#4210)" [9f9edcdc] + +23.09.0-edge - 10 Sep 2023 +- Add check for latest version (#4194) [3e8cd488] +- Add inspect command (#4069) [090c31ce] +- Add maxSubmitAwait (#3736) [5686bf1d] +- Add scripts logging for troubleshooting [c056a74e] +- Add support for Spack to Singularity builds [23c4ec1d] +- Add support for Wave native build for Singularity [8a434893] +- Add support for inputs and outputs arity [42504d3c] +- Add support for remote debug (#4266) [87e0648a] +- Add warning about using clusterOptions with process directives (#4248) [a68c1a3e] +- Add which to dockerfile build [817aa05b] +- Align Singularity experience with Docker (#4230) [a1e1d3ca] [c1cc60dd] +- Allow setting shell directive when using the trace file (#4210) [7d6ad624] +- Always emit publish event for cached task outputs (#4227) [62686ce8] +- Deprecated Wave report feature [80c5cb27] +- Disable staging script for remote work dir (#4282) [80f7cd46] +- Disable version check on CI tests [db79e13f] +- Docs improvement to clarify the usage of the bin dir (#4121) [2daa4172] +- Document API differences of process path inputs (#4189) [c37e00bc] +- Document use of local variables in closures (#4251) [05ff784a] +- Fix IOException should be thrown when failing to creare Azure directory [b0bdfd79] +- Fix Parallel execution of Conda corrupts data and packages (#4253) [976c282c] +- Fix Parse negative CLI params as numbers (#4238) [1ae70d5d] +- Fix Too long Http connection pool timeout [fa961e7f] +- Fix Wave build for Singularity files [a60ef72b] +- Fix Wave build when Conda package name is quoted [d19cb0b7] +- Fix failing test [2785ffe9] +- Fix fs cp command with remote file [366eedec] +- Fix printf command with negative exit code (#4213) [465468b0] +- Fix security deps in nf-azure plugin [c30d5211] +- Fix setting `executor.queueSize = 0` is ignored (#4228) [6664b578] +- Improve Wave handing of Conda envs [736ab9bb] +- Improve pod options documentation (#4274) [c3aa26e1] +- Make TraceRecord@store field public accessible [07582f0b] +- Remove -dsl1 and -dsl1 rub cli options [b6721b71] +- Remove experimental -dockerize option /2 [7def5542] +- Remove experimental -dockerize option [937c8fb7] +- Report an error on duplicate workflow name definitions (#4088) [fce9702e] +- Undocument internal NXF_DEBUG variable [7955db8d] +- Update AZ File share doc (#4235) [69d317b6] +- Update docs about splitCsv() operator (#4163) [1dfb621c] +- Update documentation of NXF_DEBUG (#4187) [a88a4245] +- Use sipHash24 in place of deprecated murmur32 for script aliasing [bb96763f] +- Bump groovy 3.0.19 [cb411208] +- Bump nf-amazon@2.1.2 [7e5d414e] +- Bump nf-azure@1.3.1 [83410f39] +- Bump nf-wave@0.12.0 [a9f6dd65] + +23.08.1-edge - 17 Aug 2023 +- Add 429 http status code to Wave retriable errors [8eb5f305] +- Add resource labels support for Azure Batch (#4178) [7b5e50a1] +- Apply K8s Pod metadata to Job (#4057) [4d918627] +- Document error about trailing backslash with space (#4180) [245afa5d] +- Enable cloud cache based on environment variable (#4160) [a66b0e63] +- Escape semicolons in paths (#4193) [552501ce] +- FIx nested InvocationTargetException (#4192) [67980f19] +- Fix Execution should fail if report or timeline file already exists [b238d7e2] +- Fix Process hangs when using flatten and finish errorStrategy [d99b3432] +- Fix `workflow.container` map resolution (#4190) [96ab8a69] +- Fix checkpoint thread termination (#4166) [2b449daa] +- Fix env output when changing task workdir [8e4d7fed] +- Fix if-guard on log.trace in trask processor inner class [50f6f6d5] +- Fix typos in source code comments (#4173) [e78bc37e] +- Improve Conda build error report [7b19fb03] +- Improve handling of name-only container env variables [3051cd13] +- Minor changes [7e58c945] +- Remove dockerize launcher classpath file (#4191) [2bae5198] +- Remove lock file from cloudcache (#4167) [6e6ea579] +- Update AWS instructions for creating a custom AMI (#4174) [563bff13] +- Update changelog [98f88a50] +- Update tip about modifying maps (#4153) [30036dbf] +- Use root user in Wave container based on micromamba (#4038) [a3a75ea2] +- Bump nf-azure@1.3.0 [6670bb06] +- Bump nf-cloudcache@0.2.0 [fb8f6681] +- Bump nf-tower@1.6.1 [d06b8365] +- Bump nf-wave@0.11.2 [7555b17d] + +23.04.3 - 11 Aug 2023 +- Increase Wave client max attempts [8c67610a] +- Fix log typo [03e19ea2] +- Add 429 http status code to Wave retriable errors [a8b8c6c5] +- Improve handling Wave server errors [621c9665] +- Bump nf-wave@0.8.4 [d7fa3f26] +- Bump corretto 17.0.8 [7a73a78f] + +23.08.0-edge - 5 Aug 2023 +- Add `-value` option to `config` command (#4142) [57e3100b] +- Add `deleteTasksOnCompletion` to Azure Batch configuration (#4114) [b14674dc] +- Add Tower logs checkpoint (#4132) [71dfecc2] +- Allow use virtual threads in Wave client [dd32f80a] +- Allow workflow entry from module import (#4128) [51f5c842] +- Disable cache backup/restore if cloudcache is used (#4125) [46e828e1] +- Document behavior of withName selector with included aliases (#4129) [8b7e3d48] +- Fix Option fixOwnership traverse parent directories [f2a2ea35] +- Fix Redirection http redirection across different hosts [fcdeec02] +- Fix Wave disable flag [8579e7a4] +- Fix bug with K8s resource labels (#4147) eu-west-1[3f4b8557] +- Fix glob resolution for remove files [19a72c40] +- Fix incorrect error message on missing comma (#4085) eu-west-1[a59af39f] +- Fix missing changelog for version 23.07.0-edge eu-west-1[9a33e936] +- Fix strict mode docs (#4150) [6b46b507] +- Improve plugin docs (#3957) [22638d46] +- Improve Wave config logging [547fad62] +- Improve TaskPollingMonitor logging [077ed5dd] +- Improve Wave and Fusion docs (#4149) [d2229bde] +- Increase Wave client max attempts [fe5dd497] +- Remove module all components import [a6d08c04] +- Restore Tower CacheManager for backward compatibility [6d269070] +- Bump amazoncorretto:17.0.8 [00eb145c] +- Bump nf-wave@0.11.1 [78e4b278] +- Bump nf-tower@1.6.0 [41c8c164] +- Bump nf-azure@1.2.0 [5f33ac17] +- Bump nf-amazon@2.1.1 [981315ad] + +23.07.0-edge - 23 Jul 2023 +- Add CPU model name to trace files and traceRecord (#3946) [e0d91bf7] +- Add ability to disable CLI params type detection [9a1c584d] +- Add cloudcache plugin (#4097) [ac90cc26] +- Add missing header to Wave container await [d39866e6] +- Add remote bin support for TES in a workdir (#3990) [8a22168a] +- Add retry logic to wave image await [9fc1d3bd] +- Add rule to build Nextflow docker image for ARM (#4020) [705d55f5] +- Add support for AWS SSO credentials provider (#4045) [53e33cde] +- Add support for Wave container freeze [9a5903e6] +- Add support legacy Wave retry [73a1e7d4] +- Allow SLURM executor option `--mem-per-cpu` (#4023) [96c04e3b] +- Allow disabling the Wave requirement when Fusion is enabled [9180d633] +- Disable Singularity and Apptainer home mount by default (#4056) [a0ee4657] +- Document `NXF_WRAPPER_STAGE_FILE_THRESHOLD` environment variable (#4113) [bda47567] +- Fix AzFileSystem retry policy [ba9b6d18] [c2f3cc96] +- Fix Improve error message for invalid Azure URI [0f4d8867] +- Fix Treat HTTP headers as case insensitive (#4116) [97fd3358] +- Fix invalid detection of hierarchical namespace stub blobs as files (#4046) [ce06c877] +- Fix stage script in Fusion script launcher (#4109) [0933f47e] +- Ignore accelerator type for AWS Batch (#4043) [263ecca8] +- Implement Weblog feature as an external plugin [f9f2c338] +- Improve "Unexpected input: '{'" error message (#4122) [ef9d3cf0] +- Improve Azure retry logging [de58697a] +- Improve description of channels and channel types (#4120) [8975734d] +- Improve handling Wave server errors [84f7a61a] +- Increase Azure min retry delay to 250ms [2e77e5e4] +- Remove default arch from wave request [f0e5c0c1] +- Remove logging of report JSON data (#4098) [099e5039] +- Wait for all child processes in nxf_parallel (#4050) [60a5f1a7] +- Bump Groovy 3.0.18 [207eb535] +- Bump micromamba 1.4.9 [6307f9b5] +- Bump nf-amazon@2.1.0 [57464746] +- Bump nf-azure@1.1.2 [e1512f6a] +- Bump nf-azure@1.1.3 [27e7f663] +- Bump nf-azure@1.1.4 [1895efc4] +- Bump nf-cloudcache@0.1.0 [cb6242c4] +- Bump nf-ga4gh@1.1.0 [f98feb77] +- Bump nf-google@1.8.1 [6aede7c0] +- Bump nf-tower@1.5.15 [3278d798] +- Bump nf-wave@0.11.0 [2998db5d] + 23.06.0-edge - 14 Jun 2023 - Add AWS Kms integration test [19449bf4] - Add Wave containers reports (preview) [9d9e2758] - Add disk resource with type option for google batch (#3861) [166b3638] - Add httpConnectTimeout and httpReadTimeout to Google options (#3974) [49fa15f7] -- Add plugin cmd help descrption [d3788f9f] +- Add plugin cmd help description [d3788f9f] - Add retry policy on Az blob operations [295bc1ff] - Add retry policy to Wave http client [1daebeef] - Add support for AWS SES as mail sending provider [df85d443] @@ -26,18 +377,28 @@ NEXTFLOW CHANGE-LOG - Fix packing all including Wave [735b8d9c] - Fix static compiler errors [f48a473c] - Improve documentation on Nextflow scripts (#3953) [80050c03] +- Improve description of channels and channel types (#4120) [8975734d] +- Improve "Unexpected input: '{'" error message (#4122) [ef9d3cf0] - Minor change in Wave config [4da0442a] -- Prevent null exit code when Google batch is unable to acces exit status [f68a39ec] +- Prevent null exit code when Google batch is unable to access exit status [f68a39ec] - Refactor Conda and Spack support for Wave to Java [36b9e226] - Remove `--no-home` default option from charliecloud builder (#3956) [915074a4] - Update workflow.revision max length to match the one in Tower (#4010) [1433a903] - Bump amazocorretto:17.0.7 [c8aa1214] - Bump azure-storage-blob:12.22.1 [2a36fa77] -- Bump nf-wave@0.10.0 [0872fba5] -- Bump nf-tower@1.5.14 [64bf3115] -- Bump nf-google@1.8.0 [7ca7d808] -- Bump nf-azure@1.1.1 [afd368c4] -- Bump nf-amazon@2.0.1 [58c332a1] +- Bump nf-wave@0.11.0 [2998db5d] +- Bump nf-tower@1.5.15 [3278d798] +- Bump nf-google@1.8.1 [6aede7c0] +- Bump nf-ga4gh@1.1.0 [f98feb77] +- Bump nf-azure@1.1.4 [1895efc4] +- Bump nf-amazon@2.1.0 [57464746] +- Bump nf-cloudcache@0.1.0 [cb6242c4] + +23.04.2 - 8 Jun 2023 +- Fix non-deterministic null container engine error [f93221ab] +- Add retry policy to Wave client [2f1532f6] +- Fix wave build docs [34a73022] +- Bump nf-wave@0.8.3 [350201b5] 23.05.0-edge - 15 May 2023 - Add support for custom custom root directory to resolve relative paths (#3942) [f06bb1f7] @@ -56,7 +417,7 @@ NEXTFLOW CHANGE-LOG - Enable static compilation to missing classes (#3906) [c3c4b2f0] - Fix -with-conda option in the docs (#3867) [9b350ba8] - Fix AWS SSE env propagation to Fusion [e24608c3] -- Fix Apptainer rendering by removing superflous space [36607ed1] +- Fix Apptainer rendering by removing superfluous space [36607ed1] - Fix Azure jobs correctly deleted after completion (#3927) [b173a983] - Fix Azure pool creation when using scaling formula (#3868) [79984a87] - Fix DSL2 support in nextflow console (#3864) [0253f8d7] @@ -237,7 +598,7 @@ NEXTFLOW CHANGE-LOG - Fix serialization of S3 paths with spaces (#3565) [ce487624] - Fix string literal method names with parens (#3604) [8278078a] - Fix submit command in error message for grid executors that pipe wrapper script (#3548) [40ebd308] -- Fix typos in the documentation [skip ci] (#3640) [eccc1130] +- Fix typos in the documentation (#3640) [eccc1130] - Fix unique fingerprint for Wave bundle resources [b59fa728] - Log exception when an unexpected occurs (#3603) [9aeca99f] - Minor change on container resolution [23d47ded] @@ -618,7 +979,7 @@ NEXTFLOW CHANGE-LOG - Fix unit test setting explicit permissions for test files [1c821139] - Fix Default plugins are overridden by config plugins [46cf3bfa] - Fix S3 transfer download directory [b7bf9fe5] -- Fix NPE while setting S3 ObjectMetada #3031 [d6163431] +- Fix NPE while setting S3 ObjectMetadata #3031 [d6163431] - Fix Unable to retrieve AWS batch instance type #1658 [3c4d4d3b] - Fix AWS Batch job definition conflict (#3048) [e5084418] - Fix JAVA_TOOL_OPTIONS breaking launch #1716 [0e7b416d] @@ -678,7 +1039,7 @@ NEXTFLOW CHANGE-LOG - Add DirWatcher v2 [209c82cd] - Add Moriondo in the list of random names [e0abca58] - Add preview CLI option (#2914) [aa8f1aa4] -- Fix Git config resultion error [64436697] +- Fix Git config resolution error [64436697] - Fix StackOverflowError when dump all profiles (#2922) [28cd11a2] - Fix gradle warning message in nf-sqldb (#2921) [b09ceabe] - Fix log for LsfExecutror perTaskReserve attribute [7c3ec874] @@ -693,14 +1054,14 @@ NEXTFLOW CHANGE-LOG - Add Hyperqueue executor (#2896) [ffa5712e] - Add support for K8s Job resource [c70eb12d] - Add support for time process directive in GLS executor (#2880) [1402e183] -- Add support for priviledge option for K8s containers [7ffe3a02] +- Add support for privilege option for K8s containers [7ffe3a02] - Add DSL1 option to docs (#2836) [d30841a5] - Add support for container options to Azure Batch [3f4f00f9] - Add support for move operation to AWS S3 [8c0ddfd5] - Add K8s execution hostname in the trace file (#2828) [ebaef92a] -- Add support for AWS S3 encyption using a custom KMS key [c1e45aa9] +- Add support for AWS S3 encryption using a custom KMS key [c1e45aa9] - Add support for Micromamba [383e023f] -- Add jaxb-api dependecy to nf-amazon [c1a09f87] +- Add jaxb-api dependency to nf-amazon [c1a09f87] - Add strict mode config setting [696e70b5] - Add -head-prescript option to kuberun (#2830) [9e387055] - Fix missing err message on submit failure [233e67f0] (#2899) @@ -805,7 +1166,7 @@ NEXTFLOW CHANGE-LOG - Prevent invalid use of large maxForks values [5de0c200] - Prevent race condition on thread pool creation [3e707a1d] - Refactor AWS batch job unique token generation [d64f5a21] -- Decrese log verbosity [9237d587] [d1a5e5a8] +- Decrease log verbosity [9237d587] [d1a5e5a8] - Throw an error if a required secret is missing [8e5129d6] - Wait for pod to stop running before shutdown (#2667) [505c5ad6]) - Restore AZ tests [858a4568] @@ -816,7 +1177,7 @@ NEXTFLOW CHANGE-LOG 22.02.0-edge - 25 Feb 2022 - Fix Prevent S3 tagging with empty list [ae7db466] - Fix Azure repos tests [856aacf0] -- Fix isse on includeConfig resolution #2675 [6591a9e1] +- Fix issue on includeConfig resolution #2675 [6591a9e1] - Fix NPE when accessing not existing Git file [6c79f8fd] - Fix Batch Job name exceed max allowed size [e8b70ca3] - Fix detect and skip bom mark in files (#2645) [02b06ba3] @@ -976,7 +1337,7 @@ NEXTFLOW CHANGE-LOG - Fix verbose logging [92b113134] - Fix docker image url [ed24a475d] - Fix Temporary directories not deleted in on_exit #2406 [181bdf528] -- Fix implict authentication for resources served by Tower [e30310b4f] +- Fix implicit authentication for resources served by Tower [e30310b4f] - Fix No more of 255 channels can be defined into a workflow context #2417 [7f91ecb39] - Add tip about escaping variables in env scope #2366 - Add K8s env fieldPath option to pod directive #2363 @@ -1064,7 +1425,7 @@ NEXTFLOW CHANGE-LOG 21.06.0-edge - 3 Jul 2021 - Add Keep alive option to Google LS executor [994222074] - Add support for FTP proxy and proxy servers authentication [8ec33b04d] -- Add support for plugins groupping [aa5bc7d34] +- Add support for plugins grouping [aa5bc7d34] - Add More details about the reason why a AWS Batch job has failed #2146 [27a4e88b2] - Add support for extensions priority [ae083f1aa] - Add stubRun attribute to workflow metadata #2164 [86cfc66fd] @@ -1129,7 +1490,7 @@ NEXTFLOW CHANGE-LOG - Add readOnly flag to k8s volume mounts (#2013) [33bd24ac] - Add Start default plugins on-demand #1964 [cf2a9d73] - Add support for tower workspaceId [e2977688] -- Improve suppor for BitBucket server repos [eb946e27] +- Improve support for BitBucket server repos [eb946e27] - Fix toInt/Long/Float/Double operator hangs with value channel [7c35d985] - Fix regression caused by nested params handling #1923 [b25af49b] - Fix Cache invalidation when repo is cloned between runs #1989 [ac526f38] @@ -1138,7 +1499,7 @@ NEXTFLOW CHANGE-LOG - Fix az path handling curly brackets globs #1969 [c78a1bbf] - Fix Missing git credentials when checking last commit id [f101308e] - Fix plugins version check against local installation [535c2bb5] -- Fix local dependant formatting #1962 [2a5cbac1] +- Fix local dependent formatting #1962 [2a5cbac1] - Fix Encode path parameter at GitLab getContentUrl [6f80f651] - Fix Only use sudo rm with docker #1956 [f9490423] - Prevent AZ NPE when missing vmtype + better logging #1992 [711362ca] @@ -1206,7 +1567,7 @@ NEXTFLOW CHANGE-LOG - Add support for basic auth for http/s file provider [6027fcba] - Add User-Agent info to httpfs requests [222637de] - Update CPU and RAM for containers (#1833) [ccf5c8ba] -- Move test containes to quay.io [7db053eb] [773be707] [c10a7f53] +- Move test contains to quay.io [7db053eb] [773be707] [c10a7f53] 20.12.0-edge - 12 Dec 2020 - Add support for charliecloud container engine #1809 @@ -1275,7 +1636,7 @@ NEXTFLOW CHANGE-LOG - Remove unneeded parentheses from stdout declaration [dc029c04] - Merge execution logs command docs [577ec744] - Passing exact memory to TES executor in GiB #1696 [c8a15545] -- Improve docs replacing suscribe{println it} with view() [f84ccb6f] [cb2d607d] +- Improve docs replacing subscribe{println it} with view() [f84ccb6f] [cb2d607d] - Use current environment Bash #1598 #1614 [e70ef87a] - Include GA4GH support into the standalone binary (make pack) #1666 [e2175d45] - Update Bitbucket docs [b9b57f83] @@ -1286,7 +1647,7 @@ NEXTFLOW CHANGE-LOG - see 20.07.0 and 20.07.0-RC1 20.07.0 - (skipped) -- Allow unqualified stdin/stdout defintions with DSL2 [bcdcaab6] +- Allow unqualified stdin/stdout definitions with DSL2 [bcdcaab6] 20.07.0-RC1 - 21 Jul 2020 - Add Dsl2 enable flag [08238109] @@ -1580,7 +1941,7 @@ NEXTFLOW CHANGE-LOG - Add Pbs Pro execution documentation [1d39ab31] - Add code of conduct document - Fix missing LFS executor register invocation #1234 [b1b7ab1] -- Fix "invalid pod status" when running scrips with the kubernetes executor #1242 [11caac68] +- Fix "invalid pod status" when running scripts with the kubernetes executor #1242 [11caac68] - Revert Fix Environment activation fails with recent versions of Conda #1195 [98c0d1f8] - (includes changes from `19.07.0-edge`, `19.05.0-edge` and `19.05.0-edge`) @@ -1715,7 +2076,7 @@ NEXTFLOW CHANGE-LOG - Fixed remote bin directory when using bucketDir option [6d796783] - Fixed foreign file system detection [1489cc72] - Fixed Ansi logger division by zero [ae2b1380] -- Fixed Unparseable Data format on http file #962 +- Fixed Unparsable Data format on http file #962 - Fixed Log command history #405 - Fixed Too long process tag name can cause SLURM failure #941 - Fixed LSF can return an inconsistent jobs status #927 @@ -1767,7 +2128,7 @@ NEXTFLOW CHANGE-LOG - Fixed remote bin directory when using buckerDir option [6d796783] - Fixed foreign file system detection [1489cc72] - Fixed Ansi logger division by zero [ae2b1380] -- Fixed Unparseable Data format on http file #962 +- Fixed Unparsable Data format on http file #962 - Fixed Log command history #405 - Fixed Too long process tag name can cause SLURM failure #941 - Fixed LSF can return an inconsistent jobs status #927 @@ -2269,7 +2630,7 @@ NEXTFLOW CHANGE-LOG 0.23.3 - 9 Feb 2017 - Fixed Job termination is not recognised if the starter file is not created #268 - Fixed Allow NF console to resume process executions (second take) #280 -- Fixed PublishDir may throw a FileAlredyExistsException #283 +- Fixed PublishDir may throw a FileAlreadyExistsException #283 - Increased logger file appender buffer size to 64kb - Updated Logback to version 1.1.10 - Updated copyright info diff --git a/docker/Dockerfile b/docker/Dockerfile index 26e49b8ed5..e016583e98 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ -FROM amazoncorretto:17.0.7 -RUN yum install -y procps-ng shadow-utils +FROM amazoncorretto:17.0.9 +RUN yum install -y procps-ng shadow-utils which ENV NXF_HOME=/.nextflow ARG TARGETPLATFORM=linux/amd64 @@ -11,7 +11,6 @@ COPY nextflow /usr/local/bin/nextflow # download runtime RUN mkdir /.nextflow \ - && touch /.nextflow/dockerized \ && chmod 755 /usr/local/bin/nextflow \ && chmod 755 /usr/local/bin/entry.sh \ && nextflow info diff --git a/docs/.dockerignore b/docs/.dockerignore index 550793affd..8896a7a43b 100644 --- a/docs/.dockerignore +++ b/docs/.dockerignore @@ -1 +1,2 @@ +** !requirements.txt \ No newline at end of file diff --git a/docs/_static/dag.mmd b/docs/_static/dag.mmd new file mode 100644 index 0000000000..0963e2c106 --- /dev/null +++ b/docs/_static/dag.mmd @@ -0,0 +1,25 @@ +flowchart TB + subgraph " " + v0["Channel.fromFilePairs"] + v1["transcriptome"] + v7["config"] + end + subgraph RNASEQ + v2([INDEX]) + v3([FASTQC]) + v4([QUANT]) + end + v8([MULTIQC]) + subgraph " " + v9[" "] + end + v5(( )) + v0 --> v3 + v0 --> v4 + v1 --> v2 + v2 --> v4 + v3 --> v5 + v4 --> v5 + v7 --> v8 + v5 --> v8 + v8 --> v9 diff --git a/docs/_static/degular/Degular-Bold.woff b/docs/_static/degular/Degular-Bold.woff new file mode 100644 index 0000000000..189e5c40ef Binary files /dev/null and b/docs/_static/degular/Degular-Bold.woff differ diff --git a/docs/_static/degular/Degular-Bold.woff2 b/docs/_static/degular/Degular-Bold.woff2 new file mode 100644 index 0000000000..5f49fd7355 Binary files /dev/null and b/docs/_static/degular/Degular-Bold.woff2 differ diff --git a/docs/_static/degular/Degular-BoldItalic.woff b/docs/_static/degular/Degular-BoldItalic.woff new file mode 100644 index 0000000000..164d8a58ce Binary files /dev/null and b/docs/_static/degular/Degular-BoldItalic.woff differ diff --git a/docs/_static/degular/Degular-BoldItalic.woff2 b/docs/_static/degular/Degular-BoldItalic.woff2 new file mode 100644 index 0000000000..803beafde3 Binary files /dev/null and b/docs/_static/degular/Degular-BoldItalic.woff2 differ diff --git a/docs/_static/degular/Degular-Italic.woff b/docs/_static/degular/Degular-Italic.woff new file mode 100644 index 0000000000..201357edbc Binary files /dev/null and b/docs/_static/degular/Degular-Italic.woff differ diff --git a/docs/_static/degular/Degular-Italic.woff2 b/docs/_static/degular/Degular-Italic.woff2 new file mode 100644 index 0000000000..7455e42dc5 Binary files /dev/null and b/docs/_static/degular/Degular-Italic.woff2 differ diff --git a/docs/_static/degular/Degular-Regular.woff b/docs/_static/degular/Degular-Regular.woff new file mode 100644 index 0000000000..18f88acff7 Binary files /dev/null and b/docs/_static/degular/Degular-Regular.woff differ diff --git a/docs/_static/degular/Degular-Regular.woff2 b/docs/_static/degular/Degular-Regular.woff2 new file mode 100644 index 0000000000..1fb959582c Binary files /dev/null and b/docs/_static/degular/Degular-Regular.woff2 differ diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico new file mode 100644 index 0000000000..4b1d11608e Binary files /dev/null and b/docs/_static/favicon.ico differ diff --git a/docs/images/nextflow-k8s-min.png b/docs/_static/nextflow-k8s-min.png similarity index 100% rename from docs/images/nextflow-k8s-min.png rename to docs/_static/nextflow-k8s-min.png diff --git a/docs/_static/nextflow-logo-bg-dark.png b/docs/_static/nextflow-logo-bg-dark.png new file mode 100644 index 0000000000..172969ef46 Binary files /dev/null and b/docs/_static/nextflow-logo-bg-dark.png differ diff --git a/docs/_static/nextflow-logo-bg-light.png b/docs/_static/nextflow-logo-bg-light.png new file mode 100644 index 0000000000..ece1912ef0 Binary files /dev/null and b/docs/_static/nextflow-logo-bg-light.png differ diff --git a/docs/_static/nextflow-logo.png b/docs/_static/nextflow-logo.png new file mode 100644 index 0000000000..1097ba0f19 Binary files /dev/null and b/docs/_static/nextflow-logo.png differ diff --git a/docs/images/report-resource-cpu.png b/docs/_static/report-resource-cpu.png similarity index 100% rename from docs/images/report-resource-cpu.png rename to docs/_static/report-resource-cpu.png diff --git a/docs/images/report-resource-io-read.png b/docs/_static/report-resource-io-read.png similarity index 100% rename from docs/images/report-resource-io-read.png rename to docs/_static/report-resource-io-read.png diff --git a/docs/images/report-resource-io-write.png b/docs/_static/report-resource-io-write.png similarity index 100% rename from docs/images/report-resource-io-write.png rename to docs/_static/report-resource-io-write.png diff --git a/docs/images/report-resource-job-duration.png b/docs/_static/report-resource-job-duration.png similarity index 100% rename from docs/images/report-resource-job-duration.png rename to docs/_static/report-resource-job-duration.png diff --git a/docs/images/report-resource-memory-pctram.png b/docs/_static/report-resource-memory-pctram.png similarity index 100% rename from docs/images/report-resource-memory-pctram.png rename to docs/_static/report-resource-memory-pctram.png diff --git a/docs/images/report-resource-memory-ram.png b/docs/_static/report-resource-memory-ram.png similarity index 100% rename from docs/images/report-resource-memory-ram.png rename to docs/_static/report-resource-memory-ram.png diff --git a/docs/images/report-resource-memory-vmem.png b/docs/_static/report-resource-memory-vmem.png similarity index 100% rename from docs/images/report-resource-memory-vmem.png rename to docs/_static/report-resource-memory-vmem.png diff --git a/docs/images/report-resource-memory.png b/docs/_static/report-resource-memory.png similarity index 100% rename from docs/images/report-resource-memory.png rename to docs/_static/report-resource-memory.png diff --git a/docs/images/report-resources-min.png b/docs/_static/report-resources-min.png similarity index 100% rename from docs/images/report-resources-min.png rename to docs/_static/report-resources-min.png diff --git a/docs/images/report-summary-min.png b/docs/_static/report-summary-min.png similarity index 100% rename from docs/images/report-summary-min.png rename to docs/_static/report-summary-min.png diff --git a/docs/images/report-tasks-min.png b/docs/_static/report-tasks-min.png similarity index 100% rename from docs/images/report-tasks-min.png rename to docs/_static/report-tasks-min.png diff --git a/docs/_static/seqera-logo.png b/docs/_static/seqera-logo.png deleted file mode 100644 index 7854bd3f7a..0000000000 Binary files a/docs/_static/seqera-logo.png and /dev/null differ diff --git a/docs/_static/seqera-logo.svg b/docs/_static/seqera-logo.svg new file mode 100644 index 0000000000..99a758d21a --- /dev/null +++ b/docs/_static/seqera-logo.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/theme.css b/docs/_static/theme.css index fc8ef01f3b..f4fc4528b3 100644 --- a/docs/_static/theme.css +++ b/docs/_static/theme.css @@ -1,10 +1,172 @@ +/* Inter font */ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap'); -/* navbar -- Seqera logo */ +/* Degular font */ +@font-face { + font-family: "Degular"; + src: url('degular/Degular-Regular.woff2'); + src: url('degular/Degular-Regular.woff2') format('woff2'), + url('degular/Degular-Regular.woff') format('woff'); + font-weight: normal; + font-style: normal; +} +@font-face { + font-family: "Degular"; + src: url('degular/Degular-Italic.woff2'); + src: url('degular/Degular-Italic.woff2') format('woff2'), + url('degular/Degular-Italic.woff') format('woff'); + font-weight: normal; + font-style: italic; +} +@font-face { + font-family: "Degular"; + src: url('degular/Degular-Bold.woff2'); + src: url('degular/Degular-Bold.woff2') format('woff2'), + url('degular/Degular-Bold.woff') format('woff'); + font-weight: bold; + font-style: normal; +} +@font-face { + font-family: "Degular"; + src: url('degular/Degular-BoldItalic.woff2'); + src: url('degular/Degular-BoldItalic.woff2') format('woff2'), + url('degular/Degular-BoldItalic.woff') format('woff'); + font-weight: bold; + font-style: italic; +} + +/* fonts */ +body { + font-family: 'Inter', sans-serif; +} + +.btn { + font-family: 'Inter', sans-serif; +} + +.rst-content .toctree-wrapper>p.caption, +h1, +h2, +h3, +h4, +h5, +h6, +legend { + font-family: 'Degular', sans-serif; +} + +/* colors */ +.rst-content .danger { + background: #FEEDEC; +} + +.rst-content .danger .admonition-title { + background: #C3514D; +} + +.rst-content .warning { + background: #FDF0E9; +} + +.rst-content .warning .admonition-title { + background: #BC6437; +} + +.rst-content .note { + background: #E8F2FF; +} + +.rst-content .note .admonition-title { + background: #3787E5; +} + +.rst-content .tip { + background: #E2F7F3; +} + +.rst-content .tip .admonition-title { + background: #0CAE8E; +} + +a, a:visited { + color: #3787E5; +} + +a:hover { + color: #77B5FE; +} + +.rst-content code, +.rst-content tt, +code { + color: #C3514D; +} + +.wy-menu-vertical header, +.wy-menu-vertical p.caption { + color: #0DC09D; +} + +.wy-menu-vertical a { + color: #EAEBEB; +} + +.wy-menu-vertical a:hover { + background-color: #7B7B7B; +} + +.wy-menu-vertical a:active { + background-color: #0DC09D; +} + +.wy-side-nav-search { + background-color: #0DC09D; +} + +.wy-side-nav-search input[type=text] { + border-color: #0DC09D; +} + +.wy-side-nav-search img { + background-color: #0DC09D; +} + +.wy-nav-side { + background: #160F26; +} + +.wy-nav-top { + background: #0DC09D; +} + +.wy-nav-top a { + color: #fff; +} + +.wy-nav-top img { + width: auto; + background-color: inherit; + border-radius: unset; +} + +footer { + color: #7B7B7B; +} + +#search-results .context { + color: #7B7B7B; +} + +.rst-content code.literal, +.rst-content tt.literal { + color: #C3514D; +} +/* Custom footer with logo */ .nav-footer-logo { width: 300px; margin-top: 2rem; - border-top: 1px solid #666666; + border-top: 1px solid #7B7B7B; text-align: center; font-size: 80%; } @@ -12,7 +174,7 @@ .nav-footer-logo a { display: block; padding: 2rem 0 0.5rem; - color: #666666; + color: #7B7B7B; } .nav-footer-logo a img { @@ -21,7 +183,9 @@ } /* Version change admonitions */ -div.deprecated, div.versionadded, div.versionchanged { +div.deprecated, +div.versionadded, +div.versionchanged { border-left: 0.2rem solid; border-radius: 0.25rem; box-shadow: 0 0.2rem 0.5rem var(--pst-color-shadow),0 0 0.0625rem var(--pst-color-shadow)!important; @@ -33,11 +197,15 @@ div.deprecated, div.versionadded, div.versionchanged { transition: color .25s,background-color .25s,border-color .25s; vertical-align: middle; } -div.deprecated>p, div.versionadded>p, div.versionchanged>p { +div.deprecated>p, +div.versionadded>p, +div.versionchanged>p { margin-top: 0.5rem; margin-bottom: 0.5rem; } -div.deprecated>p:first-child::before, div.versionadded>p:first-child::before, div.versionchanged>p:first-child::before { +div.deprecated>p:first-child::before, +div.versionadded>p:first-child::before, +div.versionchanged>p:first-child::before { font-family: FontAwesome; display: inline-block; font-style: normal; @@ -50,9 +218,9 @@ div.deprecated>p:first-child::before, div.versionadded>p:first-child::before, di padding: 0.4rem 0.6rem; margin: -0.2rem 0rem -0.2rem -0.6rem; } -div.versionadded { border-color: #1abc9c; background-color: #dbfaf4; } -div.versionadded p:first-child::before { color: #1abc9c; } -div.versionchanged { border-color: #f0b37e; background-color: #ffedcc; } -div.versionchanged p:first-child::before { color: #f0b37e; } -div.deprecated { border-color: #f29f97; background-color: #fdf3f2; } -div.deprecated p:first-child::before { color: #f29f97; } +div.versionadded { border-color: #0CAE8E; background-color: #E2F7F3; } +div.versionadded p:first-child::before { color: #0CAE8E; } +div.versionchanged { border-color: #BC6437; background-color: #FDF0E9; } +div.versionchanged p:first-child::before { color: #BC6437; } +div.deprecated { border-color: #C3514D; background-color: #FEEDEC; } +div.deprecated p:first-child::before { color: #C3514D; } diff --git a/docs/images/timeline-min.png b/docs/_static/timeline-min.png similarity index 100% rename from docs/images/timeline-min.png rename to docs/_static/timeline-min.png diff --git a/docs/images/workflow-notification-min.png b/docs/_static/workflow-notification-min.png similarity index 100% rename from docs/images/workflow-notification-min.png rename to docs/_static/workflow-notification-min.png diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index b08dd9ff4e..af1d87ad40 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -5,7 +5,7 @@ {% endblock %} diff --git a/docs/amazons3.md b/docs/amazons3.md index bfcabb23c9..41bcb407fa 100644 --- a/docs/amazons3.md +++ b/docs/amazons3.md @@ -1,8 +1,8 @@ (amazons3-page)= -# Amazon S3 storage +# AWS S3 storage -Nextflow includes support for Amazon S3 storage. Files stored in an S3 bucket can be accessed transparently in your pipeline script like any other file in the local file system. +Nextflow includes support for AWS S3 storage. Files stored in an S3 bucket can be accessed transparently in your pipeline script like any other file in the local file system. ## S3 path @@ -24,10 +24,10 @@ See the {ref}`script-file-io` section to learn more about available file operati ## Security credentials -Amazon access credentials can be provided in two ways: +AWS access credentials can be provided in two ways: 1. Using AWS access and secret keys in your pipeline configuration. -2. Using IAM roles to grant access to S3 storage on Amazon EC2 instances. +2. Using IAM roles to grant access to S3 storage on AWS EC2 instances. ### AWS access and secret keys @@ -52,13 +52,13 @@ If the access credentials are not found in the above file, Nextflow looks for AW More information regarding [AWS Security Credentials](http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html) are available in the AWS documentation. -### IAM roles with Amazon EC2 instances +### IAM roles with AWS EC2 instances When running your pipeline in an EC2 instance, IAM roles can be used to grant access to AWS resources. In this scenario, you only need to launch the EC2 instance with an IAM role which includes the `AmazonS3FullAccess` policy. Nextflow will detect and automatically acquire the permission to access S3 storage, without any further configuration. -Learn more about [Using IAM Roles to Delegate Permissions to Applications that Run on Amazon EC2](http://docs.aws.amazon.com/IAM/latest/UserGuide/roles-usingrole-ec2instance.html) in the Amazon documentation. +Learn more about [Using IAM Roles to Delegate Permissions to Applications that Run on AWS EC2](http://docs.aws.amazon.com/IAM/latest/UserGuide/roles-usingrole-ec2instance.html) in the AWS documentation. ## China regions diff --git a/docs/aws.md b/docs/aws.md index edc1853595..acd6bbd01b 100644 --- a/docs/aws.md +++ b/docs/aws.md @@ -1,26 +1,35 @@ (aws-page)= -# Amazon Web Services +# AWS Cloud ## AWS security credentials Nextflow uses the [AWS security credentials](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html) to make programmatic calls to AWS services. -You can provide your AWS access keys using the standard AWS variables shown below: +The AWS credentials are selected from the following sources, in order of descending priority: -- `AWS_ACCESS_KEY_ID` -- `AWS_SECRET_ACCESS_KEY` -- `AWS_DEFAULT_REGION` +1. Nextflow configuration file - `aws.accessKey` and `aws.secretKey`. See {ref}`AWS configuration` for more details. -If `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` are not defined in the environment, Nextflow will attempt to -retrieve credentials from your `~/.aws/credentials` and `~/.aws/config` files. The `default` profile can be -overridden via the environmental variable `AWS_PROFILE` (or `AWS_DEFAULT_PROFILE`). +2. A custom profile in `$HOME/.aws/credentials` and/or `$HOME/.aws/config`. The profile can be supplied from the `aws.profile` config option, or the `AWS_PROFILE` or `AWS_DEFAULT_PROFILE` environmental variables. -Alternatively AWS credentials and profile can be specified in the Nextflow configuration file. See {ref}`AWS configuration` for more details. +3. Environment variables - `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. -:::{note} -Credentials can also be provided by using an IAM Instance Role. The benefit of this approach is that it spares you from managing/distributing AWS keys explicitly. Read the [IAM Roles](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) documentation and [this blog post](https://aws.amazon.com/blogs/security/granting-permission-to-launch-ec2-instances-with-iam-roles-passrole-permission/) for more details. -::: +4. The `default` profile in `~/.aws/credentials` and/or `~/.aws/config`. + +5. Single Sign-On (SSO) credentials. See the [AWS documentation](https://docs.aws.amazon.com/cli/latest/userguide/sso-configure-profile-token.html) for more details. + + :::{versionadded} 23.07.0-edge + ::: + +6. EC2 instance profile credentials. See the [AWS documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) and [this blog post](https://aws.amazon.com/blogs/security/granting-permission-to-launch-ec2-instances-with-iam-roles-passrole-permission/) for more details. + +The AWS region is selected from the following sources, in order of descending priority: + +1. Nextflow configuration file - `aws.region` +2. Environment variables - `AWS_REGION` or `AWS_DEFAULT_REGION` +3. EC2 instance metadata (if Nextflow is running in an EC2 instance) + +SSO credentials and instance profile credentials are the most recommended because they don't require you to manage and distribute AWS keys explicitly. SSO credentials are ideal for launching pipelines from outside of AWS (e.g. your laptop), while instance profile credentials are ideal for launching pipelines within AWS (e.g. an EC2 instance). ## AWS IAM policies @@ -30,7 +39,7 @@ Minimal permissions policies to be attached to the AWS account used by Nextflow - To use AWS Batch: - ``` + ```json "batch:DescribeJobQueues" "batch:CancelJob" "batch:SubmitJob" @@ -44,7 +53,7 @@ Minimal permissions policies to be attached to the AWS account used by Nextflow - To view [EC2](https://aws.amazon.com/ec2/) instances: - ``` + ```json "ecs:DescribeTasks" "ec2:DescribeInstances" "ec2:DescribeInstanceTypes" @@ -55,7 +64,7 @@ Minimal permissions policies to be attached to the AWS account used by Nextflow - To pull container images from [ECR](https://aws.amazon.com/ecr/) repositories: - ``` + ```json "ecr:GetAuthorizationToken" "ecr:BatchCheckLayerAvailability" "ecr:GetDownloadUrlForLayer" @@ -123,30 +132,24 @@ See the [bucket policy documentation](https://docs.aws.amazon.com/config/latest/ ## AWS Batch -[AWS Batch](https://aws.amazon.com/batch/) is a managed computing service that allows the execution of containerised workloads in the Amazon cloud infrastructure. It dynamically provisions the optimal quantity and type of compute resources (e.g., CPU or memory optimized compute resources) based on the volume and specific resource requirements of the jobs submitted. +[AWS Batch](https://aws.amazon.com/batch/) is a managed computing service that allows the execution of containerised workloads in the AWS cloud infrastructure. It dynamically provisions the optimal quantity and type of compute resources (e.g., CPU or memory optimized compute resources) based on the volume and specific resource requirements of the jobs submitted. Nextflow provides built-in support for AWS Batch, allowing the seamless deployment of Nextflow pipelines in the cloud, in which tasks are offloaded as Batch jobs. Read the {ref}`AWS Batch executor ` section to learn more about the `awsbatch` executor in Nextflow. -(aws-batch-config)= +(aws-batch-cli)= ### AWS CLI -Nextflow needs the [AWS command line tool](https://aws.amazon.com/cli/) (`aws`) to be available in the container in which tasks are executed, in order to stage input files and output files to and from S3 storage. - :::{tip} -When using {ref}`wave-page` and {ref}`fusion-page`, the AWS command line tool is not needed for task containers or the underlying EC2 instances when running Nextflow on AWS Batch. See the {ref}`fusion-page` documentation for more details. +The need for the AWS CLI is considered a legacy requirement for the deployment of Nextflow pipelines with AWS Batch. +Instead, consider using {ref}`wave-page` and {ref}`fusion-page` to facilitate access to S3 without using the AWS CLI. ::: -The `aws` command can be made available in the container in two ways: +Nextflow uses the [AWS command line tool](https://aws.amazon.com/cli/) (`aws`) to stage input files and output files between S3 and the task containers. -1. Installed in the Docker image(s) used during the pipeline execution, -2. Installed in a custom [AMI (Amazon Machine Image)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) to use in place of the default AMI when configuring AWS Batch (see next section). - -The latter approach is preferred because it allows the use of existing Docker images without having to add the AWS CLI to each one. - -See the sections below to learn how to create a custom AMI and install the AWS CLI tool in it. +The `aws` command can be made available by either (1) installing it in the container image(s) or (2) installing it in a {ref}`custom AMI ` to be used instead of the default AMI when configuring AWS Batch. ### Get started @@ -185,7 +188,7 @@ process { aws { batch { - // NOTE: this setting is only required if the AWS CLI tool is installed in a custom AMI + // NOTE: this setting is only required if the AWS CLI is installed in a custom AMI cliPath = '/home/ec2-user/miniconda/bin/aws' } region = 'us-east-1' @@ -240,6 +243,8 @@ containerOptions '--ulimit nofile=1280:2560 --ulimit nproc=16:32 --privileged' Check the [AWS documentation](https://docs.aws.amazon.com/batch/latest/APIReference/API_ContainerProperties.html) for further details. +(aws-custom-ami)= + ## Custom AMI There are several reasons why you might need to create your own [AMI (Amazon Machine Image)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) to use in your Compute Environments: @@ -251,10 +256,11 @@ There are several reasons why you might need to create your own [AMI (Amazon Mac ### Create your custom AMI -From the EC2 Dashboard, select **Launch Instance**, then select **AWS Marketplace** in the left-hand pane and search for "ECS". In the result list, select **Amazon ECS-Optimized Amazon Linux 2 AMI**, then continue as usual to configure and launch the instance. +From the EC2 Dashboard, select **Launch Instance**, then select **Browse more AMIs**. In the new page, select +**AWS Marketplace AMIs**, and then search for **Amazon ECS-Optimized Amazon Linux 2 (AL2) x86_64 AMI**. Select the AMI and continue as usual to configure and launch the instance. :::{note} -The selected instance has a bootstrap volume of 8GB and a second EBS volume of 30GB for scratch storage, which is not enough for real genomic workloads. Make sure to specify an additional volume with enough storage for your pipeline execution. +The selected instance has a root volume of 30GB. Make sure to increase its size or add a second EBS volume with enough storage for real genomic workloads. ::: When the instance is running, SSH into it (or connect with the Session Manager service), install the AWS CLI, and install any other tool that may be required (see following sections). @@ -272,10 +278,11 @@ Any additional software must be installed on the EC2 instance *before* creating ### AWS CLI installation :::{tip} -When using {ref}`wave-page` and {ref}`fusion-page`, the AWS command line tool is not needed for task containers or the underlying EC2 instances when running Nextflow on AWS Batch. See the {ref}`fusion-page` documentation for more details. +The need for the AWS CLI is considered a legacy requirement for the deployment of Nextflow pipelines with AWS Batch. +Instead, consider using {ref}`wave-page` and {ref}`fusion-page` to facilitate access to S3 without using the AWS CLI. ::: -The [AWS CLI tool](https://aws.amazon.com/cli) should be installed in your custom AMI using a self-contained package manager such as [Conda](https://conda.io). That way, you can control which version of Python is used by the AWS CLI (which is written in Python). +The [AWS CLI](https://aws.amazon.com/cli) should be installed in your custom AMI using a self-contained package manager such as [Conda](https://conda.io). That way, you can control which version of Python is used by the AWS CLI (which is written in Python). If you don't use Conda, the `aws` command will attempt to use the version of Python that is installed in the container, and it won't be able to find the necessary dependencies. @@ -294,7 +301,7 @@ Afterwards, verify that the AWS CLI package works correctly: ```console $ ./miniconda/bin/aws --version -aws-cli/1.19.79 Python/3.8.5 Linux/4.14.231-173.361.amzn2.x86_64 botocore/1.20.79 +aws-cli/1.29.20 Python/3.11.4 Linux/4.14.318-241.531.amzn2.x86_64 botocore/1.31.20 ``` :::{note} @@ -319,7 +326,7 @@ The grandparent directory of the `aws` tool will be mounted into the container a ### Docker installation -Docker is required by Nextflow to execute tasks on AWS Batch. The **Amazon ECS-Optimized Amazon Linux 2** AMI has Docker installed, however, if you create your AMI from a different AMI that does not have Docker installed, you will need to install it manually. +Docker is required by Nextflow to execute tasks on AWS Batch. The **Amazon ECS-Optimized Amazon Linux 2 (AL2) x86_64 AMI** has Docker installed, however, if you create your AMI from a different AMI that does not have Docker installed, you will need to install it manually. The following snippet shows how to install Docker on an Amazon EC2 instance: @@ -344,7 +351,7 @@ These steps must be done *before* creating the AMI from the current EC2 instance The [ECS container agent](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_agent.html) is a component of Amazon Elastic Container Service (Amazon ECS) and is responsible for managing containers on behalf of ECS. AWS Batch uses ECS to execute containerized jobs, therefore it requires the agent to be installed on EC2 instances within your Compute Environments. -The ECS agent is included in the **Amazon ECS-Optimized Amazon Linux 2** AMI. If you use a different AMI, you can also install the agent on any EC2 instance that supports the Amazon ECS specification. +The ECS agent is included in the **Amazon ECS-Optimized Amazon Linux 2 (AL2) x86_64 AMI** . If you use a different base AMI, you can also install the agent on any EC2 instance that supports the Amazon ECS specification. To install the agent, follow these steps: @@ -364,6 +371,10 @@ curl -s http://localhost:51678/v1/metadata | python -mjson.tool (test) The `AmazonEC2ContainerServiceforEC2Role` policy must be attached to the instance role in order to be able to connect the EC2 instance created by the Compute Environment to the ECS container. ::: +:::{note} +The `AmazonEC2ContainerRegistryReadOnly` policy should be attached to the instance role in order to get read-only access to Amazon EC2 Container Registry repositories. +::: + ## Jobs & Execution ### Custom job definition @@ -396,7 +407,7 @@ The bucket path should include at least a top level directory name, e.g. `s3://m Nextflow allows the use of multiple executors in the same workflow application. This feature enables the deployment of hybrid workloads in which some jobs are executed in the local computer or local computing cluster and some jobs are offloaded to AWS Batch. -To enable this feature, use one or more {ref}`config-process-selectors` in your Nextflow configuration to apply the AWS Batch {ref}`configuration ` to the subset of processes that you want to offload. For example: +To enable this feature, use one or more {ref}`config-process-selectors` in your Nextflow configuration to apply the AWS Batch configuration to the subset of processes that you want to offload. For example: ```groovy aws { @@ -485,6 +496,35 @@ There are multiple reasons why this can happen. They are mainly related to the C This [AWS page](https://aws.amazon.com/premiumsupport/knowledge-center/batch-job-stuck-runnable-status/) provides several resolutions and tips to investigate and work around the issue. +## AWS Fargate + +:::{versionadded} 23.12.0-edge +::: + +Nextflow provides experimental support for the execution of [AWS Batch jobs with Fargate resources](https://docs.aws.amazon.com/batch/latest/userguide/fargate.html). + +AWS Fargate is a technology that you can use with AWS Batch to run containers without having to manage servers or EC2 instances. +With AWS Fargate, you no longer have to provision, configure, or scale clusters of virtual machines to run containers. + +To enable the use of AWS Fargate in your pipeline use the following settings in your `nextflow.config` file: + +```groovy +process.executor = 'awsbatch' +process.queue = '' +aws.region = '' +aws.batch.platformType = 'fargate' +aws.batch.jobRole = 'JOB ROLE ARN' +aws.batch.executionRole = 'EXECUTION ROLE ARN' +wave.enabled = true +``` + +See the AWS documentation for details how to create the required AWS Batch queue for Fargate, the Batch Job Role +and the Batch Execution Role. + +:::{note} +This feature requires the use {ref}`Wave ` container provisioning service. +::: + ## Advanced configuration Read the {ref}`AWS configuration` section to learn more about advanced configuration options. diff --git a/docs/azure.md b/docs/azure.md index aebf7bb9c1..406aa2c751 100644 --- a/docs/azure.md +++ b/docs/azure.md @@ -63,7 +63,7 @@ azure { } ``` -The files in the File share are available to the task in the directory: `/`. +The files in the File share are available to the task in the directory: ``. For instance, given the following configuration: @@ -73,15 +73,20 @@ azure { // ... fileShares { - dir1 { - mountPath = "/mnt/mydata/" + rnaseqResources { + mountPath = "/mnt/mydata/myresources" } } } } ``` -The task can access the File share in `/mnt/mydata/dir1`. +The task can access the File share in `/mnt/mydata/myresources`. Note: The string `rnaseqResources` in the above config can be any name of your choice, and it does not affect the underlying mount. + +:::{warning} +Azure File shares do not support authentication and management with Active Directory. The storage account key must be +set in the configuration if a share is mounted. +::: (azure-batch)= @@ -216,7 +221,7 @@ The pool name can only contain alphanumeric, hyphen and underscore characters. ::: :::{warning} -If the pool name includes a hyphen, make sure to wrap it with single quotes. For example:: +If the pool name includes a hyphen, make sure to wrap it with single quotes. For example: ```groovy azure { @@ -369,7 +374,7 @@ The value of the setting must be the identifier of a subnet available in the vir Batch Authentication with Shared Keys does not allow to link external resources (like Virtual Networks) to the pool. Therefore, Active Directory Authentication must be used in conjunction with the `virtualNetwork` setting. ::: -## Active Directory Authentication +## Microsoft Entra (formerly Active Directory Authentication) :::{versionadded} 22.11.0-edge ::: diff --git a/docs/cache-and-resume.md b/docs/cache-and-resume.md new file mode 100644 index 0000000000..55b890ef43 --- /dev/null +++ b/docs/cache-and-resume.md @@ -0,0 +1,225 @@ +(cache-resume-page)= + +# Caching and resuming + +One of the core features of Nextflow is the ability to cache task executions and re-use them in subsequent runs to minimize duplicate work. Resumability is useful both for recovering from errors and for iteratively developing a pipeline. It is similar to [checkpointing](https://en.wikipedia.org/wiki/Application_checkpointing), a common practice used by HPC applications. + +You can enable resumability in Nextflow with the `-resume` flag when launching a pipeline with `nextflow run`. In most cases, that is all you need to do and resumability will "just work". This page describes Nextflow's caching behavior in more detail in order to help advanced users understand how the cache works and troubleshoot it when it doesn't work. + +## Task cache + +All task executions are automatically saved to the task cache, regardless of the `-resume` option (so that you always have the option to resume later). The task cache is a key-value store, where each key-value pair corresponds to a previously-executed task. + +The task cache is used in conjunction with the [work directory](#work-directory) to recover cached tasks in a resumed run. It is also used by the {ref}`cli-log` sub-command to query task metadata. + +### Task hash + +The task hash is computed from the following metadata: + +- Session ID (see `workflow.sessionId` in {ref}`metadata-workflow`) +- Task name (see `name` in {ref}`trace-report`) +- Task container image (if applicable) +- Task {ref}`environment modules ` (if applicable) +- Task {ref}`Conda environment ` (if applicable) +- Task {ref}`Spack environment ` and {ref}`CPU architecture ` (if applicable) +- Task {ref}`process-ext` directive (if applicable) +- Task {ref}`inputs ` +- Task {ref}`script ` +- Any global variables referenced in the task script +- Any {ref}`bundled scripts ` used in the task script +- Whether the task is a {ref}`stub run ` +- Task attempt + +:::{versionchanged} 23.09.2-edge +The {ref}`process-ext` directive was added to the task hash. +::: + +Nextflow computes this hash for every task when it is created but before it is executed. If resumability is enabled and there is an entry in the task cache with the same hash, Nextflow tries to recover the previous task execution. A cache hit does not guarantee that the task will be resumed, because it must also recover the task outputs from the [work directory](#work-directory). + +Note that files are hashed differently depending on the caching mode. See the {ref}`process-cache` directive for more details. + +### Task entry + +The task entry is a serialized blob of the task metadata required to resume a task, including the fields used by the {ref}`trace-report` and the task input variables. + +### Cache stores + +The default cache store uses the `.nextflow/cache` directory, relative to the launch directory (i.e. `workflow.launchDir`), to store the task cache, with a separate subdirectory for each session ID backed by [LevelDB](https://github.com/dain/leveldb). + +Due to the limitations of LevelDB, the database for a given session ID can only be accessed by one reader/writer at a time. This means, for example, that you cannot use `nextflow log` to query the task metadata for a pipeline run while it is still running. + +:::{versionadded} 23.07.0-edge +::: + +The cloud cache is an alternative cache store that uses cloud storage instead of the local cache directory. You can use it by setting the `NXF_CLOUDCACHE_PATH` environment variable to the desired cache path (e.g. `s3://my-bucket/cache`) and providing the necessary credentials. + +The cloud cache is particularly useful when launching Nextflow from within the cloud, where the default cache would be lost once the pipeline completes and the VM instance is terminated. Furthermore, because it is backed by cloud storage, it can support multiple readers and writers. + +## Work directory + +While the [task cache](#task-cache) stores the task metadata for subsequent runs, the work directory stores various files used during a pipeline run. + +Each task uses a unique directory based on its hash. When a task is created, Nextflow stages the task input files, script, and other helper files into the task directory. The task writes any output files to this directory during its execution, and Nextflow uses these output files for downstream tasks and/or publishing. + +When a previous task is retrieved from the task cache on a resumed run, Nextflow then checks the corresponding task directory in the work directory. If all the required outputs are present and the exit code is valid, then the task is successfully cached; otherwise, the task is re-executed. + +For this reason, it is important to preserve both the task cache (`.nextflow/cache`) and work directories in order to resume runs successfully. You can use the {ref}`cli-clean` command to delete specific runs from the cache. + +## Troubleshooting + +Cache failures happen when either (1) a task that was supposed to be cached was re-executed, or (2) a task that was supposed to be re-executed was cached. + +When this happens, consider the following questions: + +- Is resume enabled via `-resume`? +- Is the {ref}`process-cache` directive set to a non-default value? +- Is the task still present in the task cache and work directory? +- Were any of the task inputs changed? + +Changing any of the inputs included in the [task hash](#task-hash) will invalidate the cache, for example: + +- Resuming from a different session ID +- Changing the process name +- Changing the task container image or Conda environment +- Changing the task script +- Changing an input file or bundled script used by the task + +While the following examples would not invalidate the cache: + +- Changing the value of a directive (other than {ref}`process-ext`), even if that directive is used in the task script + +In many cases, cache failures happen because of a change to the pipeline script or configuration, or because the pipeline itself has some non-deterministic behavior. + +Here are some common reasons for cache failures: + +### Modified input files + +Make sure that your input files have not been changed. Keep in mind that the default caching mode uses the complete file path, the last modified timestamp, and the file size. If any of these attributes change, the task will be re-executed, even if the file content is unchanged. + +### Process that modifies its inputs + +If a process modifies its own input files, it cannot be resumed for the reasons described in the previous point. As a result, processes that modify their own input files are considered an anti-pattern and should be avoided. + +### Inconsistent file attributes + +Some shared file systems, such as NFS, may report inconsistent file timestamps, which can invalidate the cache. If you encounter this problem, you can avoid it by using the `'lenient'` {ref}`caching mode `, which ignores the last modified timestamp and uses only the file path and size. + +(cache-global-var-race-condition)= + +### Race condition on a global variable + +While Nextflow tries to make it easy to write safe concurrent code, it is still possible to create race conditions, which can in turn impact the caching behavior of your pipeline. + +Consider the following example: + +```groovy +Channel.of(1,2,3) | map { it -> X=it; X+=2 } | view { "ch1 = $it" } +Channel.of(1,2,3) | map { it -> X=it; X*=2 } | view { "ch2 = $it" } +``` + +The problem here is that `X` is declared in each `map` closure without the `def` keyword (or other type qualifier). Using the `def` keyword makes the variable local to the enclosing scope; omitting the `def` keyword makes the variable global to the entire script. + +Because `X` is global, and operators are executed concurrently, there is a *race condition* on `X`, which means that the emitted values will vary depending on the particular order of the concurrent operations. If the values were passed as inputs into a process, the process would execute different tasks on each run due to the race condition. + +The solution is to not use a global variable where a local variable is enough (or in this simple example, avoid the variable altogether): + +```groovy +// local variable +Channel.of(1,2,3) | map { it -> def X=it; X+=2 } | view { "ch1 = $it" } + +// no variable +Channel.of(1,2,3) | map { it -> it * 2 } | view { "ch2 = $it" } +``` + +(cache-nondeterministic-inputs)= + +### Non-deterministic process inputs + +Sometimes a process needs to merge inputs from different sources. Consider the following example: + +```groovy +workflow { + ch_foo = Channel.of( ['1', '1.foo'], ['2', '2.foo'] ) + ch_bar = Channel.of( ['2', '2.bar'], ['1', '1.bar'] ) + gather(ch_foo, ch_bar) +} + +process gather { + input: + tuple val(id), file(foo) + tuple val(id), file(bar) + """ + merge_command $foo $bar + """ +} +``` + +It is tempting to assume that the process inputs will be matched by `id` like the {ref}`operator-join` operator. But in reality, they are simply merged like the {ref}`operator-merge` operator. As a result, not only will the process inputs be incorrect, they will also be non-deterministic, thus invalidating the cache. + +The solution is to explicitly join the two channels before the process invocation: + +```groovy +workflow { + ch_foo = Channel.of( ['1', '1.foo'], ['2', '2.foo'] ) + ch_bar = Channel.of( ['2', '2.bar'], ['1', '1.bar'] ) + gather(ch_foo.join(ch_bar)) +} + +process gather { + input: + tuple val(id), file(foo), file(bar) + """ + merge_command $foo $bar + """ +} +``` + +## Tips + +### Resuming from a specific run + +Nextflow resumes from the previous run by default. If you want to resume from an earlier run, simply specify the session ID for that run with the `-resume` option: + +```bash +nextflow run rnaseq-nf -resume 4dc656d2-c410-44c8-bc32-7dd0ea87bebf +``` + +You can use the {ref}`cli-log` command to view all previous runs as well as the task executions for each run. + +(cache-compare-hashes)= + +### Comparing the hashes of two runs + +One way to debug a resumed run is to compare the task hashes of each run using the `-dump-hashes` option. + +1. Perform an initial run: `nextflow -log run_initial.log run -dump-hashes` +2. Perform a resumed run: `nextflow -log run_resumed.log run -dump-hashes -resume` +3. Extract the task hash lines from each log (search for `cache hash:`) +4. Compare the runs with a diff viewer + +While some manual effort is required, the final diff can often reveal the exact change that caused a task to be re-executed. + +:::{versionadded} 23.10.0 +::: + +When using `-dump-hashes json`, the task hashes can be more easily extracted into a diff. Here is an example Bash script to perform two runs and produce a diff: + +```bash +nextflow -log run_1.log run $pipeline -dump-hashes json +nextflow -log run_2.log run $pipeline -dump-hashes json -resume + +get_hashes() { + cat $1 \ + | grep 'cache hash:' \ + | cut -d ' ' -f 10- \ + | sort \ + | awk '{ print; print ""; }' +} + +get_hashes run_1.log > run_1.tasks.log +get_hashes run_2.log > run_2.tasks.log + +diff run_1.tasks.log run_2.tasks.log +``` + +You can then view the `diff` output or use a graphical diff viewer to compare `run_1.tasks.log` and `run_2.tasks.log`. diff --git a/docs/channel.md b/docs/channel.md index fcab491dca..157ae08534 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -2,12 +2,12 @@ # Channels -Nextflow is based on the Dataflow programming model in which processes communicate through channels. +Nextflow is based on the dataflow programming model in which processes communicate through channels. A channel has two major properties: -1. Sending a message is an *asynchronous* operation which completes immediately, without having to wait for the receiving process. -2. Receiving data is a blocking operation which stops the receiving process until the message has arrived. +1. Sending a message is an *asynchronous* (i.e. non-blocking) operation, which means the sender doesn't have to wait for the receiving process. +2. Receiving a message is a *synchronous* (i.e. blocking) operation, which means the receiving process must wait until a message has arrived. (channel-types)= @@ -19,7 +19,8 @@ In Nextflow there are two kinds of channels: *queue channels* and *value channel ### Queue channel -A *queue channel* is a non-blocking unidirectional FIFO queue which connects two processes, channel factories, or operators. +A *queue channel* is a non-blocking unidirectional FIFO queue connecting a *producer* process (i.e. outputting a value) +to a consumer process, or an operators. A queue channel can be created by factory methods ([of](#of), [fromPath](#frompath), etc), operators ({ref}`operator-map`, {ref}`operator-flatmap`, etc), and processes (see {ref}`Process outputs `). @@ -27,11 +28,12 @@ A queue channel can be created by factory methods ([of](#of), [fromPath](#frompa ### Value channel -A *value channel* a.k.a. *singleton channel* is bound to a single value and can be read any number of times without being consumed. +A *value channel* can be bound (i.e. assigned) with one and only one value, and can be consumed any number of times by +a process or an operator. -A value channel can be created with the [value](#value) factory method or by any operator that produces a single value ({ref}`operator-first`, {ref}`operator-collect`, {ref}`operator-reduce`, etc). Additionally, a process will emit value channels if it is invoked with all value channels, including simple values which are implicitly wrapped in a value channel. - -A value channel is implicitly created by a process when it is invoked with a simple value. Furthermore, a value channel is also implicitly created as output for a process whose inputs are all value channels. +A value channel can be created with the [value](#value) factory method or by any operator that produces a single value +({ref}`operator-first`, {ref}`operator-collect`, {ref}`operator-reduce`, etc). Additionally, a process will emit value +channels if it is invoked with all value channels, including simple values which are implicitly wrapped in a value channel. For example: @@ -54,7 +56,8 @@ workflow { } ``` -In the above example, since the `foo` process is invoked with a simple value instead of a channel, the input is implicitly converted to a value channel, and the output is also emitted as a value channel. +In the above example, since the `foo` process is invoked with a simple value instead of a channel, the input is implicitly +wrapped in a value channel, and the output is also emitted as a value channel. See also: {ref}`process-multiple-input-channels`. @@ -65,14 +68,15 @@ See also: {ref}`process-multiple-input-channels`. Channels may be created explicitly using the following channel factory methods. :::{versionadded} 20.07.0 -`channel` was introduced as an alias of `Channel`, allowing factory methods to be specified as `channel.of()` or `Channel.of()`, and so on. +`channel` was introduced as an alias of `Channel`, allowing factory methods to be specified as `channel.of()` or +`Channel.of()`, and so on. ::: (channel-empty)= ### empty -The `empty` factory method, by definition, creates a channel that doesn't emit any value. +The `channel.empty` factory method, by definition, creates a channel that doesn't emit any value. See also: {ref}`operator-ifempty`. @@ -81,13 +85,13 @@ See also: {ref}`operator-ifempty`. ### from :::{deprecated} 19.09.0-edge -Use [of](#of) or [fromList](#fromlist) instead. +Use [channel.of](#of) or [channel.fromList](#fromlist) instead. ::: -The `from` method allows you to create a channel emitting any sequence of values that are specified as the method argument, for example: +The `channel.from` method allows you to create a channel emitting any sequence of values that are specified as the method argument, for example: ```groovy -ch = Channel.from( 1, 3, 5, 7 ) +ch = channel.from( 1, 3, 5, 7 ) ch.subscribe { println "value: $it" } ``` @@ -103,25 +107,25 @@ value: 7 The following example shows how to create a channel from a *range* of numbers or strings: ```groovy -zeroToNine = Channel.from( 0..9 ) -strings = Channel.from( 'A'..'Z' ) +zeroToNine = channel.from( 0..9 ) +strings = channel.from( 'A'..'Z' ) ``` :::{note} -When the `from` argument is an object implementing the (Java) [Collection](http://docs.oracle.com/javase/7/docs/api/java/util/Collection.html) interface, the resulting channel emits the collection entries as individual items. +When the `channel.from` argument is an object implementing the (Java) [Collection](http://docs.oracle.com/javase/7/docs/api/java/util/Collection.html) interface, the resulting channel emits the collection entries as individual items. ::: Thus the following two declarations produce an identical result even though in the first case the items are specified as multiple arguments while in the second case as a single list object argument: ```groovy -Channel.from( 1, 3, 5, 7, 9 ) -Channel.from( [1, 3, 5, 7, 9] ) +channel.from( 1, 3, 5, 7, 9 ) +channel.from( [1, 3, 5, 7, 9] ) ``` But when more than one argument is provided, they are always managed as *single* emissions. Thus, the following example creates a channel emitting three entries each of which is a list containing two elements: ```groovy -Channel.from( [1, 2], [5,6], [7,9] ) +channel.from( [1, 2], [5,6], [7,9] ) ``` (channel-fromlist)= @@ -131,10 +135,10 @@ Channel.from( [1, 2], [5,6], [7,9] ) :::{versionadded} 19.10.0 ::: -The `fromList` method allows you to create a channel emitting the values provided as a list of elements, for example: +The `channel.fromList` method allows you to create a channel emitting the values provided as a list of elements, for example: ```groovy -Channel +channel .fromList( ['a', 'b', 'c', 'd'] ) .view { "value: $it" } ``` @@ -148,28 +152,31 @@ value: c value: d ``` -See also: [of](#of) factory method. +See also: [channel.of](#of) factory method. (channel-path)= ### fromPath -You can create a channel emitting one or more file paths by using the `fromPath` method and specifying a path string as an argument. For example: +You can create a channel emitting one or more file paths by using the `channel.fromPath` method and specifying a path +string as an argument. For example: ```groovy -myFileChannel = Channel.fromPath( '/data/some/bigfile.txt' ) +myFileChannel = channel.fromPath( '/data/some/bigfile.txt' ) ``` -The above line creates a channel and binds it to a [Path](http://docs.oracle.com/javase/7/docs/api/java/nio/file/Path.html) object for the specified file. +The above line creates a channel and binds it to a [Path](http://docs.oracle.com/javase/7/docs/api/java/nio/file/Path.html) +object for the specified file. :::{note} -`fromPath` does not check whether the file exists. +`channel.fromPath` does not check whether the file exists. ::: -Whenever the `fromPath` argument contains a `*` or `?` wildcard character it is interpreted as a [glob][glob] path matcher. For example: +Whenever the `channel.fromPath` argument contains a `*` or `?` wildcard character it is interpreted as a [glob][glob] path matcher. +For example: ```groovy -myFileChannel = Channel.fromPath( '/data/big/*.txt' ) +myFileChannel = channel.fromPath( '/data/big/*.txt' ) ``` This example creates a channel and emits as many `Path` items as there are files with `txt` extension in the `/data/big` folder. @@ -181,9 +188,9 @@ Two asterisks, i.e. `**`, works like `*` but crosses directory boundaries. This For example: ```groovy -files = Channel.fromPath( 'data/**.fa' ) -moreFiles = Channel.fromPath( 'data/**/*.fa' ) -pairFiles = Channel.fromPath( 'data/file_{1,2}.fq' ) +files = channel.fromPath( 'data/**.fa' ) +moreFiles = channel.fromPath( 'data/**/*.fa' ) +pairFiles = channel.fromPath( 'data/file_{1,2}.fq' ) ``` The first line returns a channel emitting the files ending with the suffix `.fa` in the `data` folder *and* recursively in all its sub-folders. While the second one only emits the files which have the same suffix in *any* sub-folder in the `data` path. Finally the last example emits two files: `data/file_1.fq` and `data/file_2.fq`. @@ -195,15 +202,15 @@ As in Linux Bash, the `*` wildcard does not catch hidden files (i.e. files whose Multiple paths or glob patterns can be specified using a list: ```groovy -Channel.fromPath( ['/some/path/*.fq', '/other/path/*.fastq'] ) +channel.fromPath( ['/some/path/*.fq', '/other/path/*.fastq'] ) ``` In order to include hidden files, you need to start your pattern with a period character or specify the `hidden: true` option. For example: ```groovy -expl1 = Channel.fromPath( '/path/.*' ) -expl2 = Channel.fromPath( '/path/.*.fa' ) -expl3 = Channel.fromPath( '/path/*', hidden: true ) +expl1 = channel.fromPath( '/path/.*' ) +expl2 = channel.fromPath( '/path/.*.fa' ) +expl3 = channel.fromPath( '/path/*', hidden: true ) ``` The first example returns all hidden files in the specified path. The second one returns all hidden files ending with the `.fa` suffix. Finally the last example returns all files (hidden and non-hidden) in that path. @@ -213,8 +220,8 @@ By default a [glob][glob] pattern only looks for regular file paths that match t You can use the `type` option specifying the value `file`, `dir` or `any` in order to define what kind of paths you want. For example: ```groovy -myFileChannel = Channel.fromPath( '/path/*b', type: 'dir' ) -myFileChannel = Channel.fromPath( '/path/a*', type: 'any' ) +myFileChannel = channel.fromPath( '/path/*b', type: 'dir' ) +myFileChannel = channel.fromPath( '/path/a*', type: 'any' ) ``` The first example will return all *directory* paths ending with the `b` suffix, while the second will return any file or directory starting with a `a` prefix. @@ -246,10 +253,11 @@ Available options: ### fromFilePairs -The `fromFilePairs` method creates a channel emitting the file pairs matching a [glob][glob] pattern provided by the user. The matching files are emitted as tuples in which the first element is the grouping key of the matching pair and the second element is the list of files (sorted in lexicographical order). For example: +The `channel.fromFilePairs` method creates a channel emitting the file pairs matching a [glob][glob] pattern provided +by the user. The matching files are emitted as tuples in which the first element is the grouping key of the matching pair and the second element is the list of files (sorted in lexicographical order). For example: ```groovy -Channel +channel .fromFilePairs('/my/data/SRR*_{1,2}.fastq') .view() ``` @@ -272,13 +280,13 @@ The glob pattern must contain at least one `*` wildcard character. Multiple glob patterns can be specified using a list: ```groovy -Channel.fromFilePairs( ['/some/data/SRR*_{1,2}.fastq', '/other/data/QFF*_{1,2}.fastq'] ) +channel.fromFilePairs( ['/some/data/SRR*_{1,2}.fastq', '/other/data/QFF*_{1,2}.fastq'] ) ``` Alternatively, it is possible to implement a custom file pair grouping strategy providing a closure which, given the current file as parameter, returns the grouping key. For example: ```groovy -Channel +channel .fromFilePairs('/some/data/*', size: -1) { file -> file.extension } .view { ext, files -> "Files with the extension $ext are $files" } ``` @@ -313,10 +321,10 @@ Available options: :::{versionadded} 19.04.0 ::: -The `fromSRA` method queries the [NCBI SRA](https://www.ncbi.nlm.nih.gov/sra) database and returns a channel emitting the FASTQ files matching the specified criteria i.e project or accession number(s). For example: +The `channel.fromSRA` method queries the [NCBI SRA](https://www.ncbi.nlm.nih.gov/sra) database and returns a channel emitting the FASTQ files matching the specified criteria i.e project or accession number(s). For example: ```groovy -Channel +channel .fromSRA('SRP043510') .view() ``` @@ -337,7 +345,7 @@ Multiple accession IDs can be specified using a list object: ```groovy ids = ['ERR908507', 'ERR908506', 'ERR908505'] -Channel +channel .fromSRA(ids) .view() ``` @@ -358,7 +366,7 @@ To access the ESearch API, you must provide your [NCBI API keys](https://ncbiins - The `apiKey` option: ```groovy - Channel.fromSRA(ids, apiKey:'0123456789abcdef') + channel.fromSRA(ids, apiKey:'0123456789abcdef') ``` - The `NCBI_API_KEY` variable in your environment: @@ -387,14 +395,15 @@ Available options: :::{versionadded} 19.10.0 ::: -The `of` method allows you to create a channel that emits the arguments provided to it, for example: +The `channel.of` method allows you to create a channel that emits the arguments provided to it, for example: ```groovy -ch = Channel.of( 1, 3, 5, 7 ) +ch = channel.of( 1, 3, 5, 7 ) ch.view { "value: $it" } ``` -The first line in this example creates a variable `ch` which holds a channel object. This channel emits the arguments supplied to the `of` method. Thus the second line prints the following: +The first line in this example creates a variable `ch` which holds a channel object. This channel emits the arguments +supplied to the `of` method. Thus the second line prints the following: ``` value: 1 @@ -406,7 +415,7 @@ value: 7 Ranges of values are expanded accordingly: ```groovy -Channel +channel .of(1..23, 'X', 'Y') .view() ``` @@ -424,37 +433,144 @@ X Y ``` -See also: [fromList](#fromlist) factory method. +See also: [channel.fromList](#fromlist) factory method. + +(channel-topic)= + +### topic + +:::{versionadded} 23.11.0-edge +::: + +:::{note} +This feature requires the `nextflow.preview.topic` feature flag to be enabled. +::: + +A *topic* is a channel type introduced as of Nextflow 23.11.0-edge along with {ref}`channel-type-value` and +{ref}`channel-type-queue`. + +A *topic channel*, similarly to a *queue channel*, is non-blocking unidirectional FIFO queue, however it connects +multiple *producer* processes with multiple *consumer* processes or operators. + +:::{tip} +You can think about it as a channel that is shared across many different process using the same *topic name*. +::: + +A process output can be assigned to a topic using the `topic` option on an output, for example: + +```groovy +process foo { + output: + val('foo'), topic: my_topic +} + +process bar { + output: + val('bar'), topic: my_topic +} +``` + +The `channel.topic` method allows referencing the topic channel with the specified name, which can be used as a process +input or operator composition as any other Nextflow channel: + +```groovy +channel.topic('my-topic').view() +``` + +This approach is a convenient way to collect related items from many different sources without explicitly defining +the logic connecting many different queue channels altogether, commonly using the `mix` operator. + +:::{warning} +Any process that consumes a channel topic should not send any outputs to that topic, or else the pipeline will hang forever. +::: + +See also: {ref}`process-additional-options` for process outputs. + +(channel-topic)= + +### topic + +:::{versionadded} 23.11.0-edge +::: + +:::{note} +This feature requires the `nextflow.preview.topic` feature flag to be enabled. +::: + +A *topic* is a channel type introduced as of Nextflow 23.11.0-edge along with {ref}`channel-type-value` and +{ref}`channel-type-queue`. + +A *topic channel*, similarly to a *queue channel*, is non-blocking unidirectional FIFO queue, however it connects +multiple *producer* processes with multiple *consumer* processes or operators. + +:::{tip} +You can think about it as a channel that is shared across many different process using the same *topic name*. +::: + +A process output can be assigned to a topic using the `topic` option on an output, for example: + +```groovy +process foo { + output: + val('foo'), topic: my_topic +} + +process bar { + output: + val('bar'), topic: my_topic +} +``` + +The `channel.topic` method allows referencing the topic channel with the specified name, which can be used as a process +input or operator composition as any other Nextflow channel: + +```groovy +Channel.topic('my-topic').view() +``` + +This approach is a convenient way to collect related items from many different sources without explicitly defining +the logic connecting many different queue channels altogether, commonly using the `mix` operator. + +:::{warning} +Any process that consumes a channel topic should not send any outputs to that topic, or else the pipeline will hang forever. +::: + +See also: {ref}`process-additional-options` for process outputs. (channel-value)= ### value -The `value` method is used to create a value channel. An optional (not `null`) argument can be specified to bind the channel to a specific value. For example: +The `channel.value` method is used to create a value channel. An optional (not `null`) argument can be specified to bind +the channel to a specific value. For example: ```groovy -expl1 = Channel.value() -expl2 = Channel.value( 'Hello there' ) -expl3 = Channel.value( [1,2,3,4,5] ) +expl1 = channel.value() +expl2 = channel.value( 'Hello there' ) +expl3 = channel.value( [1,2,3,4,5] ) ``` -The first line in the example creates an 'empty' variable. The second line creates a channel and binds a string to it. The third line creates a channel and binds a list object to it that will be emitted as a single value. +The first line in the example creates an 'empty' variable. The second line creates a channel and binds a string to it. +The third line creates a channel and binds a list object to it that will be emitted as a single value. (channel-watchpath)= ### watchPath -The `watchPath` method watches a folder for one or more files matching a specified pattern. As soon as there is a file that meets the specified condition, it is emitted over the channel that is returned by the `watchPath` method. The condition on files to watch can be specified by using `*` or `?` wildcard characters i.e. by specifying a [glob][glob] path matching criteria. +The `channel.watchPath` method watches a folder for one or more files matching a specified pattern. As soon as there +is a file that meets the specified condition, it is emitted over the channel that is returned by the `watchPath` method. +The condition on files to watch can be specified by using `*` or `?` wildcard characters i.e. by specifying a [glob][glob] path matching criteria. For example: ```groovy -Channel +channel .watchPath( '/path/*.fa' ) .subscribe { println "Fasta file: $it" } ``` -By default it watches only for new files created in the specified folder. Optionally, it is possible to provide a second argument that specifies what event(s) to watch. The supported events are: +By default it watches only for new files created in the specified folder. Optionally, it is possible to provide a second +argument that specifies what event(s) to watch. The supported events are: - `create`: A new file is created (default) - `modify`: A file is modified @@ -463,15 +579,17 @@ By default it watches only for new files created in the specified folder. Option You can specify more than one of these events by using a comma separated string as shown below: ```groovy -Channel +channel .watchPath( '/path/*.fa', 'create,modify' ) .subscribe { println "File created or modified: $it" } ``` :::{warning} -The `watchPath` factory waits endlessly for files that match the specified pattern and event(s), which means that it will cause your pipeline to run forever. Consider using the `take` or `until` operator to close the channel when a certain condition is met (e.g. after receiving 10 files, receiving a file named `DONE`). +The `channel.watchPath` factory waits endlessly for files that match the specified pattern and event(s), which means +that it will cause your pipeline to run forever. Consider using the `take` or `until` operator to close the channel when +a certain condition is met (e.g. after receiving 10 files, receiving a file named `DONE`). ::: -See also: [fromPath](#frompath) factory method. +See also: [channel.fromPath](#frompath) factory method. [glob]: http://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob diff --git a/docs/cli.md b/docs/cli.md index 164ca94a04..988a1d5586 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -31,6 +31,8 @@ Available options: : Add the specified file to configuration set. `-d, -dockerize` +: :::{deprecated} 23.09.0-edge + ::: : Launch nextflow via Docker (experimental). `-h` @@ -42,6 +44,9 @@ Available options: `-q, -quiet` : Do not print information messages. +`-remote-debug` +: Enable JVM interactive remote debugging (experimental). + `-syslog` : Send logs to syslog server (e.g. localhost:514). @@ -225,6 +230,8 @@ The `-v` option prints out information about Nextflow, such as the version and b ## Commands +(cli-clean)= + ### clean Clean up *cache* and *work* directories. @@ -411,6 +418,11 @@ The `config` command is used for printing the project's configuration i.e. the ` `-sort` : Sort config attributes. +`-value` +: :::{versionadded} 23.08.0-edge + ::: +: Print the value of a config option, or fail if the option is not defined. + **Examples** Print out the inferred config using a the default group key-value notation. @@ -445,6 +457,13 @@ docker.enabled = true process.executor = local ``` +Print out the value of a specific configuration property. + +```console +$ nextflow config -value process.executor +local +``` + Print out all profiles from the project's configuration. ```console @@ -469,6 +488,8 @@ profiles { } ``` +(cli-console)= + ### console Launch the Nextflow interactive console. @@ -531,6 +552,66 @@ Forcefully drop the `nextflow-io/hello` pipeline, ignoring any local changes. $ nextflow drop nextflow-io/hello -f ``` +### fs + +Perform basic filesystem operations. + +**Usage** + +```console +$ nextflow fs [subcommands] +``` + +**Description** + +The `fs` command is used to perform filesystem operations like copy, move, delete, list directory, etc. Like the `file()` method, it can work with local files, remote URLs, and remote object storage. Storage credentials can be provided through the same manner as launching a pipeline (Nextflow config, environment vars, etc). + +**Options** + +`-h, -help` +: Print the command usage. + +**Examples** + +List a directory. + +```console +$ nextflow fs list +``` + +Print the contents of a file to standard output. + +```console +$ nextflow fs cat +``` + +Copy a file or directory. + +```console +$ nextflow fs cp +``` + +Move a file or directory. + +```console +$ nextflow fs mv +``` + +Delete a file or directory. + +```console +$ nextflow fs rm +``` + +:::{versionadded} 23.10.0 +::: + +Print file or directory attributes. + +```console +$ nextflow fs stat +``` + ### help Print the top-level help or specific help for a command. @@ -629,6 +710,57 @@ $ nextflow info nextflow-io/hello v1.2 [t] ``` +### inspect + +:::{versionadded} 23.09.0-edge +::: + +Inspect process settings in a pipeline project. Currently only supports the `container` directive. + +**Usage** + +```console +$ nextflow inspect [options] [project] +``` + +**Description** + +The `inspect` command allows you to determine the container for each process in a pipeline without running the pipeline. It prints to stdout a listing of containers for each process, formatted either as JSON or Nextflow configuration. + +**Options** + +`-concretize` +: Build the container images resolved by the inspect command. + +`-format` (`json`) +: Inspect output format. Can be `json` or `config`. + +`-i, -ignore-errors` +: Ignore errors while inspecting the pipeline. + +`-params-file` +: Load script parameters from a JSON/YAML file. + +`-profile` +: Use the given configuration profile(s). + +`-r, revision` +: Revision of the project to inspect (either a git branch, tag or commit SHA number). + +**Examples** + +Get the list of containers used by a pipeline. + +```console +$ nextflow inspect nextflow-io/hello +``` + +Specify parameters as with the `run` command: + +```console +$ nextflow inspect main.nf --alpha 1 --beta foo +``` + ### kuberun Launch a Nextflow pipeline on a Kubernetes cluster. @@ -757,6 +889,8 @@ nextflow-io/hello nextflow-hub/fastqc ``` +(cli-log)= + ### log Print the execution history and log information. @@ -886,6 +1020,26 @@ $ nextflow log tiny_leavitt -F 'process =~ /splitLetters/' work/1f/f1ea9158fb23b53d5083953121d6b6 ``` +(cli-plugin)= + +### plugin + +Manage plugins and run plugin-specific commands. + +```console +$ nextflow plugin [options] +``` + +The `plugin` command provides several subcommands for managing and using plugins: + +`install ` + +: Install a plugin. Multiple plugins can be specified as a comma-separated list. Each plugin id consists of a name and optional version separated by a `@`. + +`: [options]` + +: Execute a plugin-specific command. + ### pull Download or update a project. @@ -987,16 +1141,23 @@ The `run` command is used to execute a local pipeline script or remote pipeline : Prevent the cancellation of child jobs on execution termination `-dsl1` +: :::{deprecated} 23.09.0-edge + ::: : Execute the workflow using DSL1 syntax. `-dsl2` +: :::{deprecated} 23.09.0-edge + ::: : Execute the workflow using DSL2 syntax. `-dump-channels` : Dump channels for debugging purpose. `-dump-hashes` -: Dump task hash keys for debugging purpose. +: Dump task hash keys for debugging purposes. +: :::{versionadded} 23.10.0 + You can use `-dump-hashes json` to dump the task hash keys as JSON for easier post-processing. See the {ref}`caching and resuming tips ` for more details. + ::: `-e.=` : Add the specified variable to execution environment. @@ -1036,7 +1197,7 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-preview` : :::{versionadded} 22.06.0-edge ::: -: Run the workflow script skipping the execution of all processes +: Run the workflow script skipping the execution of all processes. `-process.=` : Set process config options. @@ -1069,11 +1230,17 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-with-charliecloud` : Enable process execution in a Charliecloud container. +`-with-cloudcache` +: Enable the use of the Cloud cache plugin for storing cache metadata to an object storage bucket. + `-with-conda` : Use the specified Conda environment package or file (must end with `.yml` or `.yaml`) -`-with-dag` (`dag.dot`) +`-with-dag` (`dag-.html`) : Create pipeline DAG file. +: :::{versionchanged} 23.10.0 + The default format was changed from `dot` to `html`. + ::: `-with-docker` : Enable process execution in a Docker container. @@ -1084,7 +1251,7 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-with-podman` : Enable process execution in a Podman container. -`-with-report` (`report.html`) +`-with-report` (`report-.html`) : Create workflow execution HTML report. `-with-singularity` @@ -1093,19 +1260,19 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-with-spack` : Use the specified Spack environment package or file (must end with `.yaml`) -`-with-timeline` (`timeline.html`) +`-with-timeline` (`timeline-.html`) : Create workflow execution timeline. -`-with-tower` +`-with-tower` (`https://api.tower.nf`) : Monitor workflow execution with [Tower](https://cloud.tower.nf/). -`-with-trace` (`trace.txt`) +`-with-trace` (`trace-.txt`) : Create workflow execution trace file. -`-with-wave` +`-with-wave` (`https://wave.seqera.io`) : Enable the use of Wave containers. -`-with-weblog` +`-with-weblog` (`http://localhost`) : Send workflow status messages via HTTP to target URL. `-without-conda` @@ -1155,13 +1322,7 @@ The `run` command is used to execute a local pipeline script or remote pipeline $ nextflow run nextflow-io/hello -qs 4 ``` -- Execute the pipeline with DSL-2 syntax. - - ```console - $ nextflow run nextflow-io/hello -dsl2 - ``` - -- Execute a pipeline with a specific workflow as the entry-point, this option is meant to be used with DSL-2. For more information on DSL-2, please refer to {ref}`dsl2-page` +- Invoke the pipeline with a specific workflow as the entry-point. ```console $ nextflow run main.nf -entry workflow_A diff --git a/docs/conda.md b/docs/conda.md index b4f05092ef..d3ea9ee56c 100644 --- a/docs/conda.md +++ b/docs/conda.md @@ -76,6 +76,20 @@ dependencies: - bwa=0.7.15 ``` +This other example shows how to leverage a Conda environment file to install Python packages from the [PyPI repository](https://pypi.org/)), through the `pip` package manager (which must also be explicitly listed as a required package): + +```yaml +name: my-env-2 +channels: + - defaults +dependencies: + - pip + - pip: + - numpy + - pandas + - matplotlib +``` + Read the Conda documentation for more details about how to create [environment files](https://conda.io/docs/user-guide/tasks/manage-environments.html#creating-an-environment-file-manually). The path of an environment file can be specified using the `conda` directive: diff --git a/docs/conf.py b/docs/conf.py index 1104ea6aac..08a0aff2a1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,6 +29,8 @@ # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ 'sphinx.ext.mathjax', + 'sphinxcontrib.mermaid', + 'sphinxext.rediraffe', 'sphinx_rtd_theme', 'myst_parser' ] @@ -37,6 +39,10 @@ myst_heading_anchors = 3 +rediraffe_redirects = { + 'dsl2.md': 'dsl1.md' +} + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -58,9 +64,9 @@ # built documents. # # The short X.Y version. -version = '23.06' +version = '23.12' # The full version, including alpha/beta/rc tags. -release = '23.06.0-edge' +release = '23.12.0-edge' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -74,7 +80,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ['_build', '**README.md'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None @@ -91,7 +97,7 @@ #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = 'default' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] @@ -135,12 +141,12 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = 'images/nextflow-logo.png' +html_logo = '_static/nextflow-logo.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'images/favicon.ico' +html_favicon = '_static/favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -216,7 +222,7 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -latex_logo = 'images/nextflow-logo.png' +latex_logo = '_static/nextflow-logo.png' # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. @@ -298,7 +304,7 @@ # The format is a list of tuples containing the path and title. #epub_pre_files = [] -# HTML files shat should be inserted after the pages created by sphinx. +# HTML files that should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_post_files = [] diff --git a/docs/config.md b/docs/config.md index 5f43dd876f..26db447620 100644 --- a/docs/config.md +++ b/docs/config.md @@ -87,7 +87,7 @@ The `apptainer` scope controls how [Apptainer](https://apptainer.org) containers The following settings are available: `apptainer.autoMounts` -: When `true` Nextflow automatically mounts host paths in the executed container. It requires the `user bind control` feature to be enabled in your Apptainer installation (default: `false`). +: When `true` Nextflow automatically mounts host paths in the executed container. It requires the `user bind control` feature to be enabled in your Apptainer installation (default: `true`). `apptainer.cacheDir` : The directory where remote Apptainer images are stored. When using a computing cluster it must be a shared folder accessible to all compute nodes. @@ -104,6 +104,11 @@ The following settings are available: `apptainer.noHttps` : Pull the Apptainer image with http protocol (default: `false`). +`apptainer.ociAutoPull` +: :::{versionadded} 23.12.0-edge + ::: +: When enabled, OCI (and Docker) container images are pulled and converted to the SIF format by the Apptainer run command, instead of Nextflow (default: `false`). + `apptainer.pullTimeout` : The amount of time the Apptainer pull can last, exceeding which the process is terminated (default: `20 min`). @@ -169,8 +174,13 @@ The following settings are available: `aws.batch.delayBetweenAttempts` : Delay between download attempts from S3 (default: `10 sec`). +`aws.batch.executionRole` +: :::{versionadded} 23.12.0-edge + ::: +: The AWS Batch Execution Role ARN that needs to be used to execute the Batch Job. This is mandatory when using AWS Fargate platform type. See [AWS documentation](https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html) for more details. + `aws.batch.jobRole` -: The AWS Job Role ARN that needs to be used to execute the Batch Job. +: The AWS Batch Job Role ARN that needs to be used to execute the Batch Job. `aws.batch.logsGroup` : :::{versionadded} 22.09.0-edge @@ -188,8 +198,13 @@ The following settings are available: `aws.batch.maxTransferAttempts` : Max number of downloads attempts from S3 (default: `1`). +`aws.batch.platformType` +: :::{versionadded} 23.12.0-edge + ::: +: Allow specifying the compute platform type used by AWS Batch, that can be either `ec2` or `fargate`. See AWS documentation to learn more about [AWS Fargate platform type](https://docs.aws.amazon.com/batch/latest/userguide/fargate.html) for AWS Batch. + `aws.batch.retryMode` -: The retry mode configuration setting, to accommodate rate-limiting on [AWS services](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html) (default: `standard`) +: The retry mode configuration setting, to accommodate rate-limiting on [AWS services](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html) (default: `standard`, other options: `legacy`, `adaptive`); this handling is delegated to AWS. To have Nextflow handle retries instead, use `built-in`. `aws.batch.schedulingPriority` : :::{versionadded} 23.01.0-edge @@ -220,7 +235,10 @@ The following settings are available: : :::{versionadded} 22.12.0-edge ::: : *Experimental: may change in a future release.* -: Enable auto retrieval of S3 objects stored with Glacier class store (default: `false`). +: Enable auto retrieval of S3 objects with a Glacier storage class (default: `false`). +: :::{note} + This feature only works for S3 objects that are downloaded by Nextflow directly. It is not supported for tasks (e.g. when using the AWS Batch executor), since that would lead to many tasks sitting idle for several hours and wasting resources. If you need to restore many objects from Glacier, consider restoring them in a script prior to launching the pipeline. + ::: `aws.client.glacierExpirationDays` : :::{versionadded} 22.12.0-edge @@ -276,7 +294,7 @@ The following settings are available: `aws.client.storageKmsKeyId` : :::{versionadded} 22.05.0-edge ::: -: The AWS KMS key Id to be used to encrypt files stored in the target S3 bucket (). +: The AWS KMS key Id to be used to encrypt files stored in the target S3 bucket. `aws.client.userAgent` : The HTTP user agent header passed with all HTTP requests. @@ -330,14 +348,20 @@ The following settings are available: `azure.batch.copyToolInstallMode` : Specify where the `azcopy` tool used by Nextflow. When `node` is specified it's copied once during the pool creation. When `task` is provider, it's installed for each task execution (default: `node`). -`azure.batch.terminateJobsOnCompletion` -: Enables the Batch Job to automatically terminate a job once all tasks have completed (default: `true`). - `azure.batch.deleteJobsOnCompletion` -: Enable the automatic deletion of jobs created by the pipeline execution (default: `true`). +: Delete all jobs when the workflow completes (default: `false`). +: :::{versionchanged} 23.08.0-edge + Default value was changed from `true` to `false`. + ::: `azure.batch.deletePoolsOnCompletion` -: Enable the automatic deletion of compute node pools upon pipeline completion (default: `false`). +: Delete all compute node pools when the workflow completes (default: `false`). + +`azure.batch.deleteTasksOnCompletion` +: :::{versionadded} 23.08.0-edge + ::: +: Delete each task when it completes (default: `true`). +: Although this setting is enabled by default, failed tasks will not be deleted unless it is explicitly enabled. This way, the default behavior is that successful tasks are deleted while failed tasks are preserved for debugging purposes. `azure.batch.endpoint` : The batch service endpoint e.g. `https://nfbatch1.westeurope.batch.azure.com`. @@ -345,6 +369,11 @@ The following settings are available: `azure.batch.location` : The name of the batch service region, e.g. `westeurope` or `eastus2`. This is not needed when the endpoint is specified. +`azure.batch.terminateJobsOnCompletion` +: :::{versionadded} 23.05.0-edge + ::: +: When the workflow completes, set all jobs to terminate on task completion. (default: `true`). + `azure.batch.pools..autoScale` : Enable autoscaling feature for the pool identified with ``. @@ -352,6 +381,10 @@ The following settings are available: : *New in `nf-azure` version `0.11.0`* : If mounting File Shares, this is the internal root mounting point. Must be `/mnt/resource/batch/tasks/fsmounts` for CentOS nodes or `/mnt/batch/tasks/fsmounts` for Ubuntu nodes (default is for CentOS). +`azure.batch.pools..lowPriority` +: *New in `nf-azure` version `1.4.0`* +: Enable the use of low-priority VMs (default: `false`). + `azure.batch.pools..maxVmCount` : Specify the max of virtual machine when using auto scale option. @@ -469,6 +502,9 @@ The `conda` scope controls the creation of a Conda environment by the Conda pack The following settings are available: +`conda.enabled` +: Enable Conda execution (default: `false`). + `conda.cacheDir` : Defines the path where Conda environments are stored. When using a compute cluster make sure to provide a shared file system path accessible from all compute nodes. @@ -492,20 +528,38 @@ Read the {ref}`conda-page` page to learn more about how to use Conda environment ### Scope `dag` -The `dag` scope controls the layout of the execution graph diagram generated by Nextflow. +The `dag` scope controls the workflow diagram generated by Nextflow. The following settings are available: `dag.enabled` -: When `true` turns on the generation of the DAG file (default: `false`). +: When `true` enables the generation of the DAG file (default: `false`). + +`dag.depth` +: :::{versionadded} 23.10.0 + ::: +: *Only supported by the HTML and Mermaid renderers.* +: Controls the maximum depth at which to render sub-workflows (default: no limit). + +`dag.direction` +: :::{versionadded} 23.10.0 +::: +: *Only supported by the HTML and Mermaid renderers.* +: Controls the direction of the DAG, can be `'LR'` (left-to-right) or `'TB'` (top-to-bottom) (default: `'TB'`). `dag.file` -: Graph file name (default: `dag-.dot`). +: Graph file name (default: `dag-.html`). `dag.overwrite` -: When `true` overwrites any existing DAG file with the same name. +: When `true` overwrites any existing DAG file with the same name (default: `false`). -Read the {ref}`dag-visualisation` page to learn more about the execution graph that can be generated by Nextflow. +`dag.verbose` +: :::{versionadded} 23.10.0 + ::: +: *Only supported by the HTML and Mermaid renderers.* +: When `false`, channel names are omitted, operators are collapsed, and empty workflow inputs are removed (default: `false`). + +Read the {ref}`dag-visualisation` page to learn more about the workflow graph that can be generated by Nextflow. (config-docker)= @@ -608,6 +662,12 @@ The following settings are available: `executor.name` : The name of the executor to be used (default: `local`). +`executor.perCpuMemAllocation` +: :::{versionadded} 23.07.0-edge + ::: +: *Used only by the {ref}`slurm-executor` executor.* +: When `true`, specifies memory allocations for SLURM jobs as `--mem-per-cpu ` instead of `--mem `. + `executor.perJobMemLimit` : Specifies Platform LSF *per-job* memory limit mode. See {ref}`lsf-executor`. @@ -623,7 +683,7 @@ The following settings are available: : Determines how job status is retrieved. When `false` only the queue associated with the job execution is queried. When `true` the job status is queried globally i.e. irrespective of the submission queue (default: `false`). `executor.queueSize` -: The number of tasks the executor will handle in a parallel manner. Default varies for each executor (see below). +: The number of tasks the executor will handle in a parallel manner. A queue size of zero corresponds to no limit. Default varies for each executor (see below). `executor.queueStatInterval` : Determines how often to fetch the queue status from the scheduler (default: `1min`). Used only by grid executors. @@ -710,7 +770,90 @@ The `google` scope allows you to configure the interactions with Google Cloud, i Read the {ref}`google-page` page for more information. -The following settings are available: +#### Cloud Batch + +The following settings are available for Google Cloud Batch: + +`google.enableRequesterPaysBuckets` +: When `true` uses the given Google Cloud project ID as the billing project for storage access. This is required when accessing data from *requester pays enabled* buckets. See [Requester Pays on Google Cloud Storage documentation](https://cloud.google.com/storage/docs/requester-pays) (default: `false`). + +`google.httpConnectTimeout` +: :::{versionadded} 23.06.0-edge + ::: +: Defines the HTTP connection timeout for Cloud Storage API requests (default: `'60s'`). + +`google.httpReadTimeout` +: :::{versionadded} 23.06.0-edge + ::: +: Defines the HTTP read timeout for Cloud Storage API requests (default: `'60s'`). + +`google.location` +: The Google Cloud location where jobs are executed (default: `us-central1`). + +`google.batch.maxSpotAttempts` +: :::{versionadded} 23.11.0-edge + ::: +: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `5`). + +`google.project` +: The Google Cloud project ID to use for pipeline execution + +`google.batch.allowedLocations` +: :::{versionadded} 22.12.0-edge + ::: +: Define the set of allowed locations for VMs to be provisioned. See [Google documentation](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#locationpolicy) for details (default: no restriction). + +`google.batch.bootDiskSize` +: Set the size of the virtual machine boot disk, e.g `50.GB` (default: none). + +`google.batch.cpuPlatform` +: Set the minimum CPU Platform, e.g. `'Intel Skylake'`. See [Specifying a minimum CPU Platform for VM instances](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform#specifications) (default: none). + +`google.batch.network` +: The URL of an existing network resource to which the VM will be attached. + + You can specify the network as a full or partial URL. For example, the following are all valid URLs: + + - https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network} + - projects/{project}/global/networks/{network} + - global/networks/{network} + +`google.batch.serviceAccountEmail` +: Define the Google service account email to use for the pipeline execution. If not specified, the default Compute Engine service account for the project will be used. + +`google.batch.spot` +: When `true` enables the usage of *spot* virtual machines or `false` otherwise (default: `false`). + +`google.batch.subnetwork` +: The URL of an existing subnetwork resource in the network to which the VM will be attached. + + You can specify the subnetwork as a full or partial URL. For example, the following are all valid URLs: + + - https://www.googleapis.com/compute/v1/projects/{project}/regions/{region}/subnetworks/{subnetwork} + - projects/{project}/regions/{region}/subnetworks/{subnetwork} + - regions/{region}/subnetworks/{subnetwork} + +`google.batch.usePrivateAddress` +: When `true` the VM will NOT be provided with a public IP address, and only contain an internal IP. If this option is enabled, the associated job can only load docker images from Google Container Registry, and the job executable cannot use external services other than Google APIs (default: `false`). + +`google.storage.maxAttempts` +: :::{versionadded} 23.11.0-edge + ::: +: Max attempts when retrying failed API requests to Cloud Storage (default: `10`). + +`google.storage.maxDelay` +: :::{versionadded} 23.11.0-edge + ::: +: Max delay when retrying failed API requests to Cloud Storage (default: `'90s'`). + +`google.storage.multiplier` +: :::{versionadded} 23.11.0-edge + ::: +: Delay multiplier when retrying failed API requests to Cloud Storage (default: `2.0`). + +#### Cloud Life Sciences + +The following settings are available for Cloud Life Sciences: `google.enableRequesterPaysBuckets` : When `true` uses the given Google Cloud project ID as the billing project for storage access. This is required when accessing data from *requester pays enabled* buckets. See [Requester Pays on Google Cloud Storage documentation](https://cloud.google.com/storage/docs/requester-pays) (default: `false`). @@ -732,11 +875,9 @@ The following settings are available: : The Google Cloud project ID to use for pipeline execution `google.region` -: *Available only for Google Life Sciences* : The Google Cloud region where jobs are executed. Multiple regions can be provided as a comma-separated list. Cannot be used with the `google.zone` option. See the [Google Cloud documentation](https://cloud.google.com/compute/docs/regions-zones/) for a list of available regions and zones. `google.zone` -: *Available only for Google Life Sciences* : The Google Cloud zone where jobs are executed. Multiple zones can be provided as a comma-separated list. Cannot be used with the `google.region` option. See the [Google Cloud documentation](https://cloud.google.com/compute/docs/regions-zones/) for a list of available regions and zones. `google.batch.allowedLocations` @@ -750,6 +891,11 @@ The following settings are available: `google.batch.cpuPlatform` : Set the minimum CPU Platform, e.g. `'Intel Skylake'`. See [Specifying a minimum CPU Platform for VM instances](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform#specifications) (default: none). +`google.batch.installGpuDrivers` +: :::{versionadded} 23.08.0-edge + ::: +: When `true` automatically installs the appropriate GPU drivers to the VM when a GPU is requested (default: `false`). Only needed when using an instance template. + `google.batch.network` : Set network name to attach the VM's network interface to. The value will be prefixed with `global/networks/` unless it contains a `/`, in which case it is assumed to be a fully specified network resource URL. If unspecified, the global default network is used. @@ -860,6 +1006,11 @@ The following settings are available: ::: : If you trace the hostname, activate this option (default: `false`). +`k8s.fuseDevicePlugin` +: :::{versionadded} 24.01.0-edge + ::: +: The FUSE device plugin to be used when enabling Fusion in unprivileged mode (default: `['nextflow.io/fuse': 1]`). + `k8s.httpConnectTimeout` : :::{versionadded} 22.10.0 ::: @@ -1039,8 +1190,8 @@ Read the {ref}`sharing-page` page to learn how to publish your pipeline to GitHu The `notification` scope allows you to define the automatic sending of a notification email message when the workflow execution terminates. -`notification.binding` -: An associative array modelling the variables in the template file. +`notification.attributes` +: A map object modelling the variables that can be used in the template file. `notification.enabled` : Enables the sending of a notification message when the workflow execution completes. @@ -1156,6 +1307,10 @@ process { } ``` +:::{note} +The `withName` selector applies to a process even when it is included from a module under an alias. For example, `withName: hello` will apply to any process originally defined as `hello`, regardless of whether it is included under an alias. Similarly, it will not apply to any process not originally defined as `hello`, even if it is included under the alias `hello`. +::: + :::{tip} Label and process names do not need to be enclosed with quotes, provided the name does not include special characters (`-`, `!`, etc) and is not a keyword or a built-in type identifier. When in doubt, you can enclose the label name or process name with single or double quotes. ::: @@ -1273,7 +1428,10 @@ The `singularity` scope controls how [Singularity](https://sylabs.io/singularity The following settings are available: `singularity.autoMounts` -: When `true` Nextflow automatically mounts host paths in the executed container. It requires the `user bind control` feature to be enabled in your Singularity installation (default: `false`). +: When `true` Nextflow automatically mounts host paths in the executed container. It requires the `user bind control` feature to be enabled in your Singularity installation (default: `true`). +: :::{versionchanged} 23.09.0-edge + Default value was changed from `false` to `true`. + ::: `singularity.cacheDir` : The directory where remote Singularity images are stored. When using a computing cluster it must be a shared folder accessible to all compute nodes. @@ -1290,6 +1448,16 @@ The following settings are available: `singularity.noHttps` : Pull the Singularity image with http protocol (default: `false`). +`singularity.ociAutoPull` +: :::{versionadded} 23.12.0-edge + ::: +: When enabled, OCI (and Docker) container images are pull and converted to a SIF image file format implicitly by the Singularity run command, instead of Nextflow. Requires Singularity 3.11 or later (default: `false`). + +`singularity.ociMode` +: :::{versionadded} 23.12.0-edge + ::: +: Enable OCI-mode, that allows running native OCI compliant container image with Singularity using `crun` or `runc` as low-level runtime. Note: it requires Singularity 4 or later. See `--oci` flag in the [Singularity documentation](https://docs.sylabs.io/guides/4.0/user-guide/oci_runtime.html#oci-mode) for more details and requirements (default: `false`). + `singularity.pullTimeout` : The amount of time the Singularity pull can last, exceeding which the process is terminated (default: `20 min`). @@ -1404,20 +1572,6 @@ trace { Read the {ref}`trace-report` page to learn more about the execution report that can be generated by Nextflow. -(config-weblog)= - -### Scope `weblog` - -The `weblog` scope allows you to send detailed {ref}`trace ` information as HTTP POST requests to a webserver, shipped as a JSON object. - -Detailed information about the JSON fields can be found in the {ref}`weblog description`. - -`weblog.enabled` -: If `true` it will send HTTP POST requests to a given url. - -`weblog.url` -: The url where to send HTTP POST requests (default: `http:localhost`). - (config-miscellaneous)= ### Miscellaneous @@ -1510,6 +1664,11 @@ The following environment variables control the configuration of the Nextflow ru `NXF_ASSETS` : Defines the directory where downloaded pipeline repositories are stored (default: `$NXF_HOME/assets`) +`NXF_CLOUDCACHE_PATH` +: :::{versionadded} 23.07.0-edge + ::: +: Defines the base cache path when using the cloud cache store. + `NXF_CHARLIECLOUD_CACHEDIR` : Directory where remote Charliecloud images are stored. When using a computing cluster it must be a shared folder accessible from all compute nodes. @@ -1527,19 +1686,42 @@ The following environment variables control the configuration of the Nextflow ru ::: : Enable the use of Conda recipes defined by using the {ref}`process-conda` directive. (default: `false`). -`NXF_DEBUG` -: Defines scripts debugging level: `1` dump task environment variables in the task log file; `2` enables command script execution tracing; `3` enables command wrapper execution tracing. - `NXF_DEFAULT_DSL` : :::{versionadded} 22.03.0-edge ::: : Defines the DSL version that should be used in not specified otherwise in the script of config file (default: `2`) +`NXF_DISABLE_CHECK_LATEST` +: :::{versionadded} 23.09.0-edge + ::: +: Nextflow automatically checks for a newer version of itself unless this option is enabled (default: `false`). + `NXF_DISABLE_JOBS_CANCELLATION` : :::{versionadded} 21.12.0-edge ::: : Disables the cancellation of child jobs on workflow execution termination. +`NXF_DISABLE_PARAMS_TYPE_DETECTION` +: :::{versionadded} 23.07.0-edge + ::: +: Disables the automatic type detection of command line parameters. + +`NXF_DISABLE_WAVE_SERVICE` +: :::{versionadded} 23.08.0-edge + ::: +: Disables the requirement for Wave service when enabling the Fusion file system. + +`NXF_ENABLE_AWS_SES` +: :::{versionadded} 23.06.0-edge + ::: +: Enable to use of AWS SES native API for sending emails in place of legacy SMTP settings (default: `false`) + + +`NXF_ENABLE_FS_SYNC` +: :::{versionadded} 23.10.0 + ::: +: When enabled the job script will execute Linux `sync` command on job completion. This may be useful to synchronize the job state over shared file systems (default: `false`) + `NXF_ENABLE_SECRETS` : :::{versionadded} 21.09.0-edge ::: @@ -1553,6 +1735,12 @@ The following environment variables control the configuration of the Nextflow ru `NXF_EXECUTOR` : Defines the default process executor e.g. `sge` +`NXF_FILE_ROOT` +: :::{versionadded} 23.05.0-edge + ::: +: The file storage path against which relative file paths are resolved. +: For example, with `NXF_FILE_ROOT=/some/root/path`, the use of `file('foo')` will be resolved to the absolute path `/some/root/path/foo`. A remote root path can be specified using the usual protocol prefix, e.g. `NXF_FILE_ROOT=s3://my-bucket/data`. Files defined using an absolute path are not affected by this setting. + `NXF_HOME` : Nextflow home directory (default: `$HOME/.nextflow`). @@ -1565,7 +1753,13 @@ The following environment variables control the configuration of the Nextflow ru : Allows the setting Java VM options. This is similar to `NXF_OPTS` however it's only applied the JVM running Nextflow and not to any java pre-launching commands. `NXF_OFFLINE` -: When `true` disables the project automatic download and update from remote repositories (default: `false`). +: When `true` prevents Nextflow from automatically downloading and updating remote project repositories (default: `false`). +: :::{versionchanged} 23.09.0-edge + This option also disables the automatic version check (see `NXF_DISABLE_CHECK_LATEST`). + ::: +: :::{versionchanged} 23.11.0-edge + This option also prevents plugins from being downloaded. Plugin versions must be specified in offline mode, or else Nextflow will fail. + ::: `NXF_OPTS` : Provides extra options for the Java and Nextflow runtime. It must be a blank separated list of `-Dkey[=value]` properties. @@ -1581,6 +1775,17 @@ The following environment variables control the configuration of the Nextflow ru `NXF_PID_FILE` : Name of the file where the process PID is saved when Nextflow is launched in background. +`NXF_PLUGINS_DEFAULT` +: Whether to use the default plugins when no plugins are specified in the Nextflow configuration (default: `true`). + +`NXF_PLUGINS_DIR` +: The path where the plugin archives are loaded and stored (default: `$NXF_HOME/plugins`). + +`NXF_PLUGINS_TEST_REPOSITORY` +: :::{versionadded} 23.04.0 + ::: +: Defines a custom plugin registry or plugin release URL for testing plugins outside of the main registry. See {ref}`testing-plugins` for more information. + `NXF_SCM_FILE` : :::{versionadded} 20.10.0 ::: @@ -1611,11 +1816,11 @@ The following environment variables control the configuration of the Nextflow ru `NXF_WORK` : Directory where working files are stored (usually your *scratch* directory) -`NXF_FILE_ROOT` +`NXF_WRAPPER_STAGE_FILE_THRESHOLD` : :::{versionadded} 23.05.0-edge ::: -: The file storage path against which relative file paths are resolved. -: For example, with `NXF_FILE_ROOT=/some/root/path`, the use of `file('foo')` will be resolved to the absolute path `/some/root/path/foo`. A remote root path can be specified using the usual protocol prefix, e.g. `NXF_FILE_ROOT=s3://my-bucket/data`. Files defined using an absolute path are not affected by this setting. +: Defines the minimum size of the `.command.run` staging script for it to be written to a separate `.command.stage` file (default: `'1 MB'`). +: This setting is useful for executors that impose a size limit on job scripts. `JAVA_HOME` : Defines the path location of the Java VM installation used to run Nextflow. @@ -1680,11 +1885,11 @@ Some features can be enabled using the `nextflow.enable` and `nextflow.preview` - When merging params from a config file with params from the command line, Nextflow will fail if a param is specified from both sources but with different types - - When using the `join` operator, the `failOnDuplicate` option is `true` by default + - When using the `join` operator, the `failOnDuplicate` option is `true` regardless of any user setting - - When using the `join` operator, the `failOnMismatch` option is `true` by default (unless `remainder` is also `true`) + - When using the `join` operator, the `failOnMismatch` option is `true` (unless `remainder` is also `true`) regardless of any user setting - - When using the `publishDir` process directive, the `failOnError` option is `true` by default + - When using the `publishDir` process directive, the `failOnError` option is `true` regardless of any user setting - In a process definition, Nextflow will fail if an input or output tuple has only one element @@ -1706,3 +1911,12 @@ Some features can be enabled using the `nextflow.enable` and `nextflow.preview` : *Experimental: may change in a future release.* : When `true`, enables process and workflow recursion. See [this GitHub discussion](https://github.com/nextflow-io/nextflow/discussions/2521) for more information. + +`nextflow.preview.topic` + +: :::{versionadded} 23.11.0-edge + ::: + +: *Experimental: may change in a future release.* + +: When `true`, enables {ref}`topic channels ` feature. diff --git a/docs/container.md b/docs/container.md index da44cf565a..cb5d50547d 100644 --- a/docs/container.md +++ b/docs/container.md @@ -5,7 +5,7 @@ Nextflow supports a variety of container runtimes. Containerization allows you to write self-contained and truly reproducible computational pipelines, by packaging the binary dependencies of a script into a standard and portable format that can be executed on any platform that supports a container runtime. Furthermore, the same pipeline can be transparently executed with any of the supported container runtimes, depending on which runtimes are available in the target compute environment. :::{note} -When creating your container image to use with Nextflow, make sure that Bash (3.x or later) and `ps` are installed in your image, along with other tools required for collecting metrics (See {ref}`this section `). Also, Bash should be available on the path `/bin/bash` and it should be the container entrypoint. +When creating a container image to use with Nextflow, make sure that Bash (3.x or later) and `ps` are installed in the image, along with other tools required for collecting metrics (See {ref}`this section `). Bash should be available on the path `/bin/bash` and it should be the container entrypoint. ::: (container-apptainer)= @@ -35,7 +35,7 @@ If your Apptainer installation support the "user bind control" feature, enable t ### How it works -The integration for Apptainer follows the same execution model implemented for Docker. You won't need to modify your Nextflow script in order to run it with Apptainer. Simply specify the Apptainer image file from where the containers are started by using the `-with-apptainer` command line option. For example:: +The integration for Apptainer follows the same execution model implemented for Docker. You won't need to modify your Nextflow script in order to run it with Apptainer. Simply specify the Apptainer image file from where the containers are started by using the `-with-apptainer` command line option. For example: ```bash nextflow run -with-apptainer [apptainer image file] @@ -66,13 +66,13 @@ Unlike Docker, Nextflow does not automatically mount host paths in the container When a process input is a *symbolic link* file, make sure the linked file is stored in a host folder that is accessible from a bind path defined in your Apptainer installation. Otherwise the process execution will fail because the launched container won't be able to access the linked file. ::: -:::{versionchanged} 22.07.0-edge +:::{versionchanged} 23.07.0-edge Nextflow no longer mounts the home directory when launching an Apptainer container. To re-enable the old behavior, set the environment variable `NXF_APPTAINER_HOME_MOUNT` to `true`. ::: ### Multiple containers -It is possible to specify a different Apptainer image for each process definition in your pipeline script. For example, let's suppose you have two processes named `foo` and `bar`. You can specify two different Apptainer images specifying them in the `nextflow.config` file as shown below:: +It is possible to specify a different Apptainer image for each process definition in your pipeline script. For example, let's suppose you have two processes named `foo` and `bar`. You can specify two different Apptainer images specifying them in the `nextflow.config` file as shown below: ```groovy process { @@ -579,10 +579,19 @@ Unlike Docker, Nextflow does not automatically mount host paths in the container When a process input is a *symbolic link* file, make sure the linked file is stored in a host folder that is accessible from a bind path defined in your Singularity installation. Otherwise the process execution will fail because the launched container won't be able to access the linked file. ::: -:::{versionchanged} 22.07.0-edge +:::{versionchanged} 23.07.0-edge Nextflow no longer mounts the home directory when launching a Singularity container. To re-enable the old behavior, set the environment variable `NXF_SINGULARITY_HOME_MOUNT` to `true`. ::: +:::{versionchanged} 23.09.0-edge +Nextflow automatically mounts the required host paths in the container. To re-enable the old behavior, set the environment variable `NXF_SINGULARITY_AUTO_MOUNTS` to `false` or set `singularity.autoMounts=false` in the Nextflow configuration file. +::: + +:::{versionchanged} 23.09.0-edge +Nextflow uses the command `run` to carry out the execution of Singularity containers instead of the `exec` command. +To re-enable the old behavior, set the environment variable `NXF_SINGULARITY_RUN_COMMAND` to `exec`. +::: + ### Multiple containers It is possible to specify a different Singularity image for each process definition in your pipeline script. For example, let's suppose you have two processes named `foo` and `bar`. You can specify two different Singularity images specifying them in the `nextflow.config` file as shown below: diff --git a/docs/developer/diagrams/.gitignore b/docs/developer/diagrams/.gitignore new file mode 100644 index 0000000000..657f47cd79 --- /dev/null +++ b/docs/developer/diagrams/.gitignore @@ -0,0 +1 @@ +nextflow-merged.mmd diff --git a/docs/developer/diagrams/README.md b/docs/developer/diagrams/README.md new file mode 100644 index 0000000000..392029fbe3 --- /dev/null +++ b/docs/developer/diagrams/README.md @@ -0,0 +1,17 @@ +# Class Diagrams + +This directory contains class diagrams of the Nextflow source code, abridged and annotated for relevance and ease of use. + +Each node is a class. Fields are selectively documented in order to show only core data structures and the classes that "own" them. Methods are not explicitly documented, but they are mentioned in certain links where appropriate. + +Links between classes denote one of the following relationships: + +- Inheritance (`A <|-- B`): `B` is a subclass of `A` +- Composition (`A --* B`): `A` contains `B` +- Instantiation (`A --> B : f`): `A` creates instance(s) of `B` at runtime via `A::f()` + +Some links are commented out or not included at all, in order to focus on the most important classes and relationships. You can view these "hidden" links by simply uncommenting them, but I have found that their significance is sufficiently clear from the description files. + +A separate diagram description is provided for each package. These files are interoperable, which means that you can combine any subset of files into a larger diagram description. The `merge-diagrams.sh` can create a merged file for you automatically, and it includes a sensible default set of packages. + +You can use the [Mermaid Live Editor](https://mermaid-js.github.io/mermaid-live-editor/edit) or the [Mermaid CLI](https://github.com/mermaid-js/mermaid-cli) to render the diagram in a variety of image formats. diff --git a/docs/developer/diagrams/merge-diagrams.sh b/docs/developer/diagrams/merge-diagrams.sh new file mode 100755 index 0000000000..a76321ca92 --- /dev/null +++ b/docs/developer/diagrams/merge-diagrams.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +packages=() +packages+=("nextflow") +# packages+=("nextflow.ast") +packages+=("nextflow.cache") +packages+=("nextflow.cli") +# packages+=("nextflow.cloud.aws") +# packages+=("nextflow.cloud.aws.nio") +# packages+=("nextflow.cloud.azure") +# packages+=("nextflow.cloud.google") +# packages+=("nextflow.config") +# packages+=("nextflow.container") +packages+=("nextflow.dag") +# packages+=("nextflow.executor") +# packages+=("nextflow.extension") +# packages+=("nextflow.ga4gh") +# packages+=("nextflow.k8s") +# packages+=("nextflow.plugin") +packages+=("nextflow.processor") +# packages+=("nextflow.scm") +packages+=("nextflow.script") +# packages+=("nextflow.secret") +# packages+=("nextflow.trace") + +outfile="nextflow-merged.mmd" + +echo "classDiagram" > ${outfile} + +for package in "${packages[@]}"; do + echo "${package}" + + tail -n +2 "${package}.mmd" >> ${outfile} + echo >> ${outfile} +done diff --git a/docs/developer/diagrams/nextflow.ast.mmd b/docs/developer/diagrams/nextflow.ast.mmd new file mode 100644 index 0000000000..fab36a2c21 --- /dev/null +++ b/docs/developer/diagrams/nextflow.ast.mmd @@ -0,0 +1,7 @@ +classDiagram + %% + %% nextflow.ast + %% + ScriptParser --> NextflowDSLImpl : parse + ScriptParser --> NextflowXformImpl : parse + ScriptParser --> OpXformImpl : parse diff --git a/docs/developer/diagrams/nextflow.cache.mmd b/docs/developer/diagrams/nextflow.cache.mmd new file mode 100644 index 0000000000..105fd444f8 --- /dev/null +++ b/docs/developer/diagrams/nextflow.cache.mmd @@ -0,0 +1,23 @@ +classDiagram + %% + %% nextflow.cache + %% + Session --* CacheDB + %% CacheFactory --> CacheDB : createInstance + + CacheDB --* CacheStore + + CacheStore <|-- DefaultCacheStore + CacheStore <|-- CloudCacheStore + + class DefaultCacheStore { + uniqueId : UUID + runName : String + baseDir : Path + } + + class CloudCacheStore { + uniqueId : UUID + runName : String + basePath : Path + } \ No newline at end of file diff --git a/docs/developer/diagrams/nextflow.cli.mmd b/docs/developer/diagrams/nextflow.cli.mmd new file mode 100644 index 0000000000..228acacd37 --- /dev/null +++ b/docs/developer/diagrams/nextflow.cli.mmd @@ -0,0 +1,29 @@ +classDiagram + %% + %% nextflow.cli + %% + class Launcher { + cliOptions : CliOptions + command : CmdBase + } + Launcher --* CliOptions + Launcher --* CmdBase + + %% CmdBase <|-- CmdClean + %% CmdBase <|-- CmdClone + %% CmdBase <|-- CmdConfig + CmdBase <|-- CmdConsole + %% CmdBase <|-- CmdDrop + %% CmdBase <|-- CmdFs + CmdBase <|-- CmdHelp + CmdBase <|-- CmdInfo + %% CmdBase <|-- CmdKubeRun + %% CmdBase <|-- CmdList + %% CmdBase <|-- CmdLog + %% CmdBase <|-- CmdNode + %% CmdBase <|-- CmdPlugin + %% CmdBase <|-- CmdPull + CmdBase <|-- CmdRun + %% CmdBase <|-- CmdSecret + %% CmdBase <|-- CmdSelfUpdate + %% CmdBase <|-- CmdView diff --git a/docs/developer/diagrams/nextflow.cloud.aws.mmd b/docs/developer/diagrams/nextflow.cloud.aws.mmd new file mode 100644 index 0000000000..d3adbc00f4 --- /dev/null +++ b/docs/developer/diagrams/nextflow.cloud.aws.mmd @@ -0,0 +1,21 @@ +classDiagram + %% + %% nextflow.cloud.aws + %% + Executor <|-- AwsBatchExecutor + TaskHandler <|-- AwsBatchTaskHandler + BashWrapperBuilder <|-- AwsBatchScriptLauncher + + AwsBatchExecutor --* AwsOptions + AwsOptions --* AwsConfig + AwsConfig --* AwsBatchConfig + AwsConfig --* AwsS3Config + + AwsBatchExecutor --> ParallelPollingMonitor : init + AwsBatchExecutor --> AwsBatchTaskHandler : submit + AwsBatchTaskHandler --> AwsBatchScriptLauncher : submit + + %% TaskPollingMonitor <|-- ParallelPollingMonitor + + SimpleFileCopyStrategy <|-- AwsBatchFileCopyStrategy + AwsBatchScriptLauncher --* AwsBatchFileCopyStrategy diff --git a/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd b/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd new file mode 100644 index 0000000000..523c5f971a --- /dev/null +++ b/docs/developer/diagrams/nextflow.cloud.aws.nio.mmd @@ -0,0 +1,31 @@ +classDiagram + %% + %% nextflow.cloud.aws.nio + %% + S3FileSystemProvider --> S3FileSystem : newFileSystem + + class S3FileSystem { + client : S3Client + endpoint : String + } + S3FileSystem --* S3Client + + class S3Client { + client : AmazonS3 + cannedAcl : CannedAccessControlList + kmsKeyId : String + storageEncryption : SSEAlgorithm + transferManager : TransferManager + transferPool : ExecutorService + uploadChunkSize : Long + uploadMaxThreads : Integer + } + + Path <|-- S3Path + + class S3Path { + bucket : String + parts : List~String~ + fileSystem : S3FileSystem + } + S3Path --* S3FileSystem diff --git a/docs/developer/diagrams/nextflow.cloud.azure.mmd b/docs/developer/diagrams/nextflow.cloud.azure.mmd new file mode 100644 index 0000000000..6993609b33 --- /dev/null +++ b/docs/developer/diagrams/nextflow.cloud.azure.mmd @@ -0,0 +1,17 @@ +classDiagram + %% + %% nextflow.cloud.azure + %% + Executor <|-- AzBatchExecutor + TaskHandler <|-- AzBatchTaskHandler + BashWrapperBuilder <|-- AzBatchScriptLauncher + + AzBatchExecutor --* AzConfig + AzBatchExecutor --> AzBatchService : register + + AzBatchExecutor --> TaskPollingMonitor : init + AzBatchExecutor --> AzBatchTaskHandler : submit + AzBatchTaskHandler --> AzBatchScriptLauncher : submit + + SimpleFileCopyStrategy <|-- AzFileCopyStrategy + AzBatchScriptLauncher --* AzFileCopyStrategy diff --git a/docs/developer/diagrams/nextflow.cloud.google.mmd b/docs/developer/diagrams/nextflow.cloud.google.mmd new file mode 100644 index 0000000000..bd428f7586 --- /dev/null +++ b/docs/developer/diagrams/nextflow.cloud.google.mmd @@ -0,0 +1,13 @@ +classDiagram + %% + %% nextflow.cloud.google + %% + Executor <|-- GoogleBatchExecutor + TaskHandler <|-- GoogleBatchTaskHandler + BashWrapperBuilder <|-- GoogleBatchScriptLauncher + + GoogleBatchExecutor --* BatchConfig + + GoogleBatchExecutor --> TaskPollingMonitor : init + GoogleBatchExecutor --> GoogleBatchTaskHandler : submit + GoogleBatchTaskHandler --> GoogleBatchScriptLauncher : submit diff --git a/docs/developer/diagrams/nextflow.config.mmd b/docs/developer/diagrams/nextflow.config.mmd new file mode 100644 index 0000000000..611c19a413 --- /dev/null +++ b/docs/developer/diagrams/nextflow.config.mmd @@ -0,0 +1,10 @@ +classDiagram + %% + %% nextflow.config + %% + CmdRun --> ConfigMap : run + Session --* ConfigMap + + ConfigBuilder --> ConfigParser : build + ConfigBuilder --> ConfigMap : build + ConfigParser --> ConfigBase : parse diff --git a/docs/developer/diagrams/nextflow.container.mmd b/docs/developer/diagrams/nextflow.container.mmd new file mode 100644 index 0000000000..f2ce814750 --- /dev/null +++ b/docs/developer/diagrams/nextflow.container.mmd @@ -0,0 +1,16 @@ +classDiagram + %% + %% nextflow.container + %% + direction LR + + BashWrapperBuilder --> ContainerBuilder : build + + ContainerBuilder <|-- CharliecloudBuilder + ContainerBuilder <|-- DockerBuilder + ContainerBuilder <|-- PodmanBuilder + ContainerBuilder <|-- ShifterBuilder + ContainerBuilder <|-- SingularityBuilder + ContainerBuilder <|-- UdockerBuilder + + SingularityBuilder <|-- ApptainerBuilder diff --git a/docs/developer/diagrams/nextflow.dag.mmd b/docs/developer/diagrams/nextflow.dag.mmd new file mode 100644 index 0000000000..6858074e33 --- /dev/null +++ b/docs/developer/diagrams/nextflow.dag.mmd @@ -0,0 +1,33 @@ +classDiagram + %% + %% nextflow.dag + %% + Session --* DAG + + class DAG { + vertices : List~Vertex~ + edges : List~Edge~ + } + DAG "1" --* "*" Vertex + DAG "1" --* "*" Edge + + class Vertex { + label : String + type : Type + operators : List~DataflowProcessor~ + process : TaskProcessor + } + + class Edge { + channel : Object + from : Vertex + to : Vertex + label : String + } + + %% DagRenderer <|-- CytoscapeHtmlRenderer + %% DagRenderer <|-- CytoscapeJsRenderer + %% DagRenderer <|-- DotRenderer + %% DagRenderer <|-- GexfRenderer + %% DagRenderer <|-- GraphvizRenderer + %% DagRenderer <|-- MermaidRenderer diff --git a/docs/developer/diagrams/nextflow.executor.mmd b/docs/developer/diagrams/nextflow.executor.mmd new file mode 100644 index 0000000000..4044bbced4 --- /dev/null +++ b/docs/developer/diagrams/nextflow.executor.mmd @@ -0,0 +1,82 @@ +classDiagram + %% + %% nextflow.executor + %% + ProcessDef --> Executor : run + %% ExecutorFactory --> Executor : getExecutor + + TaskProcessor --* Executor + + %% class Executor { + %% name : String + %% monitor : TaskMonitor + %% } + %% Executor --* TaskMonitor + %% Executor --> TaskHandler : submit + + %% TaskMonitor <|-- TaskPollingMonitor + + class TaskPollingMonitor { + capacity : int + submitRateLimit : RateLimiter + pollIntervalMillis : long + dumpInterval : Duration + } + + %% TaskPollingMonitor <|-- LocalPollingMonitor + + class LocalPollingMonitor { + maxCpus : int + maxMemory : long + } + + %% class TaskHandler { + %% task : TaskRun + %% } + + Executor <|-- AbstractGridExecutor + Executor <|-- LocalExecutor + %% Executor <|-- NopeExecutor + %% AbstractGridExecutor <|-- CondorExecutor + %% AbstractGridExecutor <|-- HyperQueueExecutor + %% AbstractGridExecutor <|-- LsfExecutor + %% AbstractGridExecutor <|-- MoabExecutor + %% AbstractGridExecutor <|-- NqsiiExecutor + %% AbstractGridExecutor <|-- OarExecutor + %% AbstractGridExecutor <|-- PbsExecutor + %% AbstractGridExecutor <|-- SgeExecutor + %% AbstractGridExecutor <|-- SlurmExecutor + %% PbsExecutor <|-- PbsProExecutor + %% SgeExecutor <|-- CrgExecutor + + LocalExecutor --> LocalPollingMonitor : init + LocalExecutor --> LocalTaskHandler : submit + LocalExecutor --> NativeTaskHandler : submit + LocalTaskHandler --> BashWrapperBuilder : submit + + AbstractGridExecutor --> TaskPollingMonitor : init + AbstractGridExecutor --> GridTaskHandler : submit + GridTaskHandler --> BashWrapperBuilder : submit + + %% TaskHandler <|-- CachedTaskHandler + %% TaskHandler <|-- GridTaskHandler + %% TaskHandler <|-- LocalTaskHandler + %% TaskHandler <|-- NativeTaskHandler + %% TaskHandler <|-- NopeTaskHandler + %% TaskHandler <|-- StoredTaskHandler + + class BashWrapperBuilder { + bean : TaskBean + copyStrategy : ScriptFileCopyStrategy + } + BashWrapperBuilder --* TaskBean + BashWrapperBuilder --* ScriptFileCopyStrategy + + ScriptFileCopyStrategy <|-- SimpleFileCopyStrategy + + class SimpleFileCopyStrategy { + stageinMode : String + stageoutMode : String + targetDir : Path + workDir : Path + } diff --git a/docs/developer/diagrams/nextflow.extension.mmd b/docs/developer/diagrams/nextflow.extension.mmd new file mode 100644 index 0000000000..4cb817c2e7 --- /dev/null +++ b/docs/developer/diagrams/nextflow.extension.mmd @@ -0,0 +1,28 @@ +classDiagram + %% + %% nextflow.extension + %% + direction LR + + ChannelEx --> DumpOp : dump + Nextflow --> GroupKey : groupKey + + OperatorImpl --> BranchOp : branch + OperatorImpl --> BufferOp : buffer + OperatorImpl --> CollectFileOp : collectFile + OperatorImpl --> CollectOp : collect + OperatorImpl --> CombineOp : combine + OperatorImpl --> ConcatOp : concat + OperatorImpl --> CrossOp : cross + OperatorImpl --> GroupTupleOp : groupTuple + OperatorImpl --> JoinOp : join + OperatorImpl --> MapOp : map + OperatorImpl --> MergeOp : merge + OperatorImpl --> MixOp : mix + OperatorImpl --> MultiMapOp : multiMap + OperatorImpl --> RandomSampleOp : randomSample + OperatorImpl --> SplitOp : splitCsv, splitFasta, splitFastq, splitText + OperatorImpl --> TakeOp : take + OperatorImpl --> ToListOp : toList, toSortedList + OperatorImpl --> TransposeOp : transpose + OperatorImpl --> UntilOp : until diff --git a/docs/developer/diagrams/nextflow.ga4gh.mmd b/docs/developer/diagrams/nextflow.ga4gh.mmd new file mode 100644 index 0000000000..7f2c508b3d --- /dev/null +++ b/docs/developer/diagrams/nextflow.ga4gh.mmd @@ -0,0 +1,14 @@ +classDiagram + %% + %% nextflow.ga4gh + %% + Executor <|-- TesExecutor + %% TaskHandler <|-- TesTaskHandler + %% BashWrapperBuilder <|-- TesBashBuilder + + TesExecutor --> TaskPollingMonitor : init + TesExecutor --> TesTaskHandler : submit + TesTaskHandler --> TesBashBuilder : submit + + %% ScriptFileCopyStrategy <|-- TesFileCopyStrategy + TesBashBuilder --* TesFileCopyStrategy diff --git a/docs/developer/diagrams/nextflow.k8s.mmd b/docs/developer/diagrams/nextflow.k8s.mmd new file mode 100644 index 0000000000..e329942fb3 --- /dev/null +++ b/docs/developer/diagrams/nextflow.k8s.mmd @@ -0,0 +1,57 @@ +classDiagram + %% + %% nextflow.k8s + %% + Executor <|-- K8sExecutor + TaskHandler <|-- K8sTaskHandler + BashWrapperBuilder <|-- K8sWrapperBuilder + + K8sExecutor --> TaskPollingMonitor : init + K8sExecutor --> K8sTaskHandler : submit + K8sExecutor --* K8sClient + K8sTaskHandler --> K8sWrapperBuilder : submit + + CmdKubeRun --> K8sDriverLauncher : run + + class K8sDriverLauncher { + args : List~String~ + cmd : CmdKubeRun + config : ConfigObject + configMapName : String + headCpus : int + headImage : String + headMemory : String + headPreScript : String + paramsFile : String + pipelineName : String + runName : String + } + K8sDriverLauncher --* K8sClient + K8sDriverLauncher --* K8sConfig + + K8sClient --* ClientConfig + %% ConfigDiscovery --> ClientConfig : discover + + class K8sConfig { + target : Map + podOptions : PodOptions + } + K8sConfig --* PodOptions + + class PodOptions { + affinity : Map + annotations : Map + automountServiceAccountToken : boolean + configMaps : Collection~PodMountConfig~ + envVars : Collection~PodEnv~ + imagePullPolicy : String + imagePullSecret : String + labels : Map + nodeSelector : PodNodeSelector + priorityClassName : String + privileged : Boolean + secrets : Collection~PodMountSecret~ + securityContext : PodSecurityContext + tolerations : List~Map~ + volumeClaims : Collection~PodVolumeClaim~ + } diff --git a/docs/developer/diagrams/nextflow.mmd b/docs/developer/diagrams/nextflow.mmd new file mode 100644 index 0000000000..c683746488 --- /dev/null +++ b/docs/developer/diagrams/nextflow.mmd @@ -0,0 +1,21 @@ +classDiagram + %% + %% nextflow + %% + class Nextflow + class Channel + class Session { + baseDir : Path + binding : ScriptBinding + cache : CacheDB + commandLine : String + commitId : String + config : Map + configFiles : List~Path~ + dag : DAG + profile : String + runName : String + script : BaseScript + uniqueId : UUID + workDir : Path + } diff --git a/docs/developer/diagrams/nextflow.plugin.mmd b/docs/developer/diagrams/nextflow.plugin.mmd new file mode 100644 index 0000000000..83926f39f7 --- /dev/null +++ b/docs/developer/diagrams/nextflow.plugin.mmd @@ -0,0 +1,14 @@ +classDiagram + %% + %% nextflow.plugin + %% + CmdRun --> Plugins : run + + Plugins --> PluginsFacade : init + + PluginsFacade "1" --> "*" PluginSpec : load + + class PluginSpec { + id : String + version : String + } diff --git a/docs/developer/diagrams/nextflow.processor.mmd b/docs/developer/diagrams/nextflow.processor.mmd new file mode 100644 index 0000000000..196433ff69 --- /dev/null +++ b/docs/developer/diagrams/nextflow.processor.mmd @@ -0,0 +1,44 @@ +classDiagram + %% + %% nextflow.processor + %% + %% ProcessDef --> TaskProcessor : run + + class TaskProcessor { + config : ProcessConfig + executor : Executor + id : int + name : String + operator : DataflowProcessor + taskBody : BodyDef + } + TaskProcessor --> TaskRun : invokeTask + TaskProcessor --> PublishDir : finalizeTask + + class TaskRun { + config : TaskConfig + context : TaskContext + hash : HashCode + id : TaskId + index : int + inputs : Map + name : String + outputs : Map + runType : RunType + type : ScriptType + workDir : Path + } + TaskRun --* TaskConfig + TaskRun --* TaskContext + TaskRun --> TaskBean : toTaskBean + + class TaskConfig { + target : Map + binding : Map + } + + class TaskContext { + holder : Map + script : Script + name : String + } diff --git a/docs/developer/diagrams/nextflow.scm.mmd b/docs/developer/diagrams/nextflow.scm.mmd new file mode 100644 index 0000000000..1c52075372 --- /dev/null +++ b/docs/developer/diagrams/nextflow.scm.mmd @@ -0,0 +1,26 @@ +classDiagram + %% + %% nextflow.scm + %% + direction LR + + CmdRun --> AssetManager : run + + class AssetManager { + project : String + localPath : File + mainScript : String + repositoryProvider : RepositoryProvider + hub : String + providerConfigs : List~ProviderConfig~ + } + AssetManager --* RepositoryProvider + AssetManager "1" --* "*" ProviderConfig + + RepositoryProvider <|-- AzureRepositoryProvider + RepositoryProvider <|-- BitbucketRepositoryProvider + RepositoryProvider <|-- BitbucketServerRepositoryProvider + RepositoryProvider <|-- GiteaRepositoryProvider + RepositoryProvider <|-- GithubRepositoryProvider + RepositoryProvider <|-- GitlabRepositoryProvider + RepositoryProvider <|-- LocalRepositoryProvider diff --git a/docs/developer/diagrams/nextflow.script.mmd b/docs/developer/diagrams/nextflow.script.mmd new file mode 100644 index 0000000000..aa6869d0cd --- /dev/null +++ b/docs/developer/diagrams/nextflow.script.mmd @@ -0,0 +1,136 @@ +classDiagram + %% + %% nextflow.script + %% + CmdRun --> ScriptRunner : run + + class ScriptRunner { + scriptFile : ScriptFile + session : Session + } + ScriptRunner --* ScriptFile + ScriptRunner --* Session + ScriptRunner --> ScriptParser : execute + ScriptParser --> BaseScript : parse + + class ScriptFile { + source : Path + main : Path + repository : String + revisionInfo : AssetManager.RevisionInfo + localPath : Path + projectName : String + } + + Session --* BaseScript + Session --* ScriptBinding + + class ScriptBinding { + scriptPath : Path + args : List~String~ + params : ParamsMap + configEnv : Map + entryName : String + } + + IncludeDef --> BaseScript : load0 + + class BaseScript { + meta : ScriptMeta + entryFlow : WorkflowDef + } + BaseScript --* ScriptMeta + %% BaseScript --> ProcessDef : process + %% BaseScript --> WorkflowDef : workflow + + class ScriptMeta { + scriptPath : Path + definitions : Map + imports : Map + module : boolean + } + ScriptMeta "1" --* "*" ComponentDef : definitions + ScriptMeta "1" --* "*" ComponentDef : imports + + ComponentDef <|-- FunctionDef + ComponentDef <|-- ProcessDef + ComponentDef <|-- WorkflowDef + + class FunctionDef { + target : Object + name : String + alias : String + } + + class ProcessDef { + processName : String + simpleName : String + baseName : String + rawBody : Closure~BodyDef~ + } + ProcessDef --> ProcessConfig : run + ProcessDef --> BodyDef : run + ProcessDef --> Executor : run + ProcessDef --> TaskProcessor : run + ProcessDef --> ChannelOut : run + + class WorkflowDef { + name : String + body : BodyDef + declaredInputs : List~String~ + declaredOutputs : List~String~ + variableNames : Set~String~ + } + WorkflowDef --* BodyDef + WorkflowDef --> WorkflowBinding : run + WorkflowDef --> ChannelOut : run + + class ProcessConfig { + configProperties : Map + inputs : InputsList + outputs : OutputsList + } + ProcessConfig --* InputsList + ProcessConfig --* OutputsList + + class BodyDef { + closure : Closure + source : String + type : ScriptType + isShell : boolean + } + + class ChannelOut { + target : List~DataflowWriteChannel~ + channels : Map + } + + class WorkflowBinding { + vars : Map + } + + class InputsList { + target : List~InParam~ + } + InputsList "1" --* "*" InParam + + class OutputsList { + target : List~OutParam~ + } + OutputsList "1" --* "*" OutParam + + %% InParam <|-- BaseInParam + %% BaseInParam <|-- EachInParam + %% BaseInParam <|-- EnvInParam + %% BaseInParam <|-- FileInParam + %% BaseInParam <|-- StdInParam + %% BaseInParam <|-- TupleInParam + %% BaseInParam <|-- ValueInParam + + %% OutParam <|-- BaseOutParam + %% BaseOutParam <|-- EachOutParam + %% BaseOutParam <|-- EnvOutParam + %% BaseOutParam <|-- FileOutParam + %% BaseOutParam <|-- StdOutParam + %% BaseOutParam <|-- TupleOutParam + %% BaseOutParam <|-- ValueOutParam diff --git a/docs/developer/diagrams/nextflow.secret.mmd b/docs/developer/diagrams/nextflow.secret.mmd new file mode 100644 index 0000000000..547ade7b93 --- /dev/null +++ b/docs/developer/diagrams/nextflow.secret.mmd @@ -0,0 +1,10 @@ +classDiagram + %% + %% nextflow.secret + %% + CmdRun --> SecretsProvider : run + + SecretsLoader --> SecretsProvider : load + SecretsProvider --> Secret : getSecret + SecretsProvider <|-- LocalSecretsProvider + Secret <|-- SecretImpl diff --git a/docs/developer/diagrams/nextflow.trace.mmd b/docs/developer/diagrams/nextflow.trace.mmd new file mode 100644 index 0000000000..891cfcc698 --- /dev/null +++ b/docs/developer/diagrams/nextflow.trace.mmd @@ -0,0 +1,22 @@ +classDiagram + %% + %% nextflow.trace + %% + direction LR + + %% TraceObserverFactory "1" --> "*" TraceObserver : create + %% TraceObserver <|-- AnsiLogObserver + %% TraceObserver <|-- GraphObserver + %% TraceObserver <|-- ReportObserver + %% TraceObserver <|-- TimelineObserver + %% TraceObserver <|-- TraceFileObserver + %% TraceObserver <|-- WebLogObserver + %% TraceObserver <|-- WorkflowStatsObserver + + Session --> AnsiLogObserver : init + Session --> GraphObserver : init + Session --> ReportObserver : init + Session --> TimelineObserver : init + Session --> TraceFileObserver : init + Session --> WebLogObserver : init + Session --> WorkflowStatsObserver : init diff --git a/docs/developer/index.md b/docs/developer/index.md new file mode 100644 index 0000000000..8d2c7d6979 --- /dev/null +++ b/docs/developer/index.md @@ -0,0 +1,166 @@ +# Overview + +This section provides a high-level overview of the Nextflow source code for users who want to understand or contribute to it. Rather than a comprehensive API documentation, these docs simply provide a conceptual map to help you understand the key concepts of the Nextflow implementation, and to quickly find code sections of interest for further investigation. + +Before you dive into code, be sure to check out the [CONTRIBUTING.md](https://github.com/nextflow-io/nextflow/blob/master/CONTRIBUTING.md) for Nextflow to learn about the many ways to contribute to the project. + +## IntelliJ IDEA + +The suggested development environment is [IntelliJ IDEA](https://www.jetbrains.com/idea/download/). Nextflow development with IntelliJ IDEA requires a recent version of the IDE (2019.1.2 or later). + +After installing IntelliJ IDEA, use the following steps to use it with Nextflow: + +1. Clone the Nextflow repository to a directory in your computer. + +2. Open IntelliJ IDEA and go to **File > New > Project from Existing Sources...**. + +3. Select the Nextflow project root directory in your computer and click **OK**. + +4. Select **Import project from external model > Gradle** and click **Finish**. + +5. After the import process completes, select **File > Project Structure...**. + +6. Select **Project**, and make sure that the **SDK** field contains Java 11 (or later). + +7. Go to **File > Settings > Editor > Code Style > Groovy > Imports** and apply the following settings: + + * Use single class import + * Class count to use import with '*': `99` + * Names count to use static import with '*': `99` + * Imports layout: + * `import java.*` + * `import javax.*` + * *blank line* + * all other imports + * all other static imports + +New files must include the appropriate license header boilerplate and the author name(s) and contact email(s) ([see for example](https://github.com/nextflow-io/nextflow/blob/e8945e8b6fc355d3f2eec793d8f288515db2f409/modules/nextflow/src/main/groovy/nextflow/Const.groovy#L1-L15)). + +## Groovy + +Nextflow is written in [Groovy](http://groovy-lang.org/), which is itself a programming language based on [Java](https://www.java.com/). Groovy is designed to be highly interoperable with Java -- Groovy programs compile to Java bytecode, and nearly any Java program is also a valid Groovy program. However, Groovy adds several language features (e.g. closures, list and map literals, optional typing, optional semicolons, meta-programming) and standard libraries (e.g. JSON and XML parsing) that greatly improve the overall experience of developing for the Java virtual machine. + +Recommended resources for Groovy, from most reference-complete to most user-friendly, are listed below: + +- [Groovy documentation](http://groovy-lang.org/documentation.html) +- [Groovy in Action](https://www.manning.com/books/groovy-in-action-second-edition) +- [Groovy: The Awesome Parts](https://www.slideshare.net/paulk_asert/awesome-groovy) +- [Groovy cheat sheet](http://www.cheat-sheets.org/saved-copy/rc015-groovy_online.pdf) + +## Software Dependencies + +Nextflow depends on a variety of libraries and frameworks, the most prominent of which are listed below: + +- [AWS SDK for Java 1.x](https://aws.amazon.com/sdk-for-java/): AWS integration +- [Azure SDK for Java](https://learn.microsoft.com/en-us/azure/developer/java/sdk/): Azure integration +- [Google Cloud Client Libraries for Java](https://cloud.google.com/java/docs/reference): Google Cloud integration +- [GPars](http://gpars.org/1.2.1/guide/guide/dataflow.html): dataflow concurrency +- [Gradle](https://gradle.org/): build automation +- [JCommander](https://jcommander.org/): command line interface +- [JGit](https://www.eclipse.org/jgit/): Git integration +- [Kryo](https://github.com/EsotericSoftware/kryo): serialization +- [LevelDB](https://mvnrepository.com/artifact/org.iq80.leveldb/leveldb): key-value store for the cache database +- [Logback](https://logback.qos.ch/): application logging +- [PF4J](https://pf4j.org/): plugin extensions +- [Spock](https://spockframework.org/): unit testing framework + +Any other integrations are likely implemented using a CLI (e.g. Conda, Docker, HPC schedulers) or REST API (e.g. Kubernetes). + +## Class Diagrams + +Each package has a class diagram, abridged and annotated for relevance and ease of use. + +Each node is a class. Fields are selectively documented in order to show only the core data structures and the classes that "own" them. Methods are not explicitly documented, but they are mentioned in certain links where appropriate. Links are selectively documented in order to show only the most important classes and relationships. + +Links between classes denote one of the following relationships: + +- Inheritance (`A <|-- B`): `B` is a subclass of `A` +- Composition (`A --* B`): `A` contains `B` +- Instantiation (`A --> B : f`): `A` creates instance(s) of `B` at runtime via `A::f()` + +See {ref}`packages-page` for the list of Nextflow packages. + +```{warning} +Class diagrams are manually curated, so they might not always reflect the latest version of the source code. +``` + +## Building from source + +If you are interested in modifying the source code, you only need Java 11 or later to build Nextflow from source. Nextflow uses the [Gradle](http://www.gradle.org/) build automation system, but you do not need to install Gradle to build Nextflow. In other words, if you can run Nextflow, then you can probably build it too! + +To build locally from a branch (useful for testing PRs): + +```bash +git clone -b git@github.com:nextflow-io/nextflow.git +cd nextflow +make compile +``` + +The build system will automatically download all of the necessary dependencies on the first run, which may take several minutes. + +Once complete, you can run your local build of Nextflow using the `launch.sh` script in place of the `nextflow` command: + +```bash +./launch.sh run - - - - - - - -

Nextflow Cytoscape.js with Dagre

-
- - - diff --git a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html new file mode 100644 index 0000000000..2c2a22cc7b --- /dev/null +++ b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html @@ -0,0 +1,29 @@ + + + + + + +
+REPLACE_WITH_NETWORK_DATA
+
+ + + diff --git a/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt b/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt index f98408d195..d1f695a022 100644 --- a/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt +++ b/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt @@ -100,7 +100,7 @@ nxf_fs_fcp() { on_exit() { exit_status=${nxf_main_ret:=$?} - printf $exit_status {{exit_file}} + printf -- $exit_status {{exit_file}} set +u {{cleanup_cmd}} {{sync_cmd}} @@ -147,6 +147,7 @@ nxf_main() { {{task_env}} {{secrets_env}} [[ $NXF_SCRATCH ]] && cd $NXF_SCRATCH + export NXF_TASK_WORKDIR="$PWD" {{stage_cmd}} set +e diff --git a/modules/nextflow/src/main/resources/nextflow/executor/command-trace.txt b/modules/nextflow/src/main/resources/nextflow/executor/command-trace.txt index 9cf1292962..62da5a46ce 100644 --- a/modules/nextflow/src/main/resources/nextflow/executor/command-trace.txt +++ b/modules/nextflow/src/main/resources/nextflow/executor/command-trace.txt @@ -127,6 +127,7 @@ nxf_write_trace() { echo "nextflow.trace/v2" > $trace_file echo "realtime=$wall_time" >> $trace_file echo "%cpu=$ucpu" >> $trace_file + echo "cpu_model=$cpu_model" >> $trace_file echo "rchar=${io_stat1[0]}" >> $trace_file echo "wchar=${io_stat1[1]}" >> $trace_file echo "syscr=${io_stat1[2]}" >> $trace_file @@ -144,6 +145,7 @@ nxf_trace_mac() { local end_millis=$(nxf_date) local wall_time=$((end_millis-start_millis)) local ucpu='' + local cpu_model='' local io_stat1=('' '' '' '' '' '') nxf_write_trace } @@ -163,6 +165,7 @@ nxf_trace_linux() { ## https://stackoverflow.com/questions/27508531/calculate-cpu-per-process/27514562##27514562 ## https://stackoverflow.com/questions/16726779/how-do-i-get-the-total-cpu-usage-of-an-application-from-proc-pid-stat local num_cpus=$(< /proc/cpuinfo grep '^processor' -c) + local cpu_model=$(< /proc/cpuinfo grep '^model name' | head -n 1 | awk 'BEGIN{FS="\t: "} { print $2 }') local tot_time0=$(grep '^cpu ' /proc/stat | awk '{sum=$2+$3+$4+$5+$6+$7+$8+$9; printf "%.0f",sum}') local cpu_time0=$(2> /dev/null < /proc/$pid/stat awk '{printf "%.0f", ($16+$17)*10 }' || echo -n 'X') local io_stat0=($(2> /dev/null < /proc/$pid/io sed 's/^.*:\s*//' | head -n 6 | tr '\n' ' ' || echo -n '0 0 0 0 0 0')) @@ -199,6 +202,7 @@ nxf_trace_linux() { echo "nextflow.trace/v2" > $trace_file echo "realtime=$wall_time" >> $trace_file echo "%cpu=$ucpu" >> $trace_file + echo "cpu_model=$cpu_model" >> $trace_file echo "rchar=${io_stat1[0]}" >> $trace_file echo "wchar=${io_stat1[1]}" >> $trace_file echo "syscr=${io_stat1[2]}" >> $trace_file diff --git a/modules/nextflow/src/main/resources/nextflow/mail/nextflow-logo-v2-min.png b/modules/nextflow/src/main/resources/nextflow/mail/nextflow-logo-v2-min.png new file mode 100644 index 0000000000..20ca91afb0 Binary files /dev/null and b/modules/nextflow/src/main/resources/nextflow/mail/nextflow-logo-v2-min.png differ diff --git a/modules/nextflow/src/main/resources/nextflow/mail/nextflow200x40.png b/modules/nextflow/src/main/resources/nextflow/mail/nextflow200x40.png deleted file mode 100644 index f91037bbd9..0000000000 Binary files a/modules/nextflow/src/main/resources/nextflow/mail/nextflow200x40.png and /dev/null differ diff --git a/modules/nextflow/src/main/resources/nextflow/mail/notification.html b/modules/nextflow/src/main/resources/nextflow/mail/notification.html index 96eadfea4b..736630e248 100644 --- a/modules/nextflow/src/main/resources/nextflow/mail/notification.html +++ b/modules/nextflow/src/main/resources/nextflow/mail/notification.html @@ -139,7 +139,7 @@

Execution summary


- + This email was sent by Nextflow
cite doi:10.1038/nbt.3820
http://nextflow.io
diff --git a/modules/nextflow/src/main/resources/nextflow/trace/ReportTemplate.html b/modules/nextflow/src/main/resources/nextflow/trace/ReportTemplate.html index 7232373fdd..9d3fe8cf96 100644 --- a/modules/nextflow/src/main/resources/nextflow/trace/ReportTemplate.html +++ b/modules/nextflow/src/main/resources/nextflow/trace/ReportTemplate.html @@ -93,7 +93,7 @@