Merge branch 'master' into charliecloud_storage

nextflow-io · Aug 28, 2024 · 525216e · 525216e
2 parents 5ead543 + 5a37e61
commit 525216e
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 19 deletions.
diff --git a/changelog.txt b/changelog.txt
@@ -5,6 +5,7 @@ NEXTFLOW CHANGE-LOG
 - Apply k8s.cpuLimits to kuberun driver pod (#5160) [4300adf1]
 - Await build completion for all Wave containers [2b8117e9]
 - Deprecate module addParams() and params() (#5200) [ci fast] [82c97f8c]
+- Remove capsule launcher dependencies (#3395) [f15e4246]
 - Fix AWS Cloudwatch access when using custom log group name [30195838]
 - Fix Invalid AWS Fargate CPUs usage error reporting [d9c50e59]
 - Fix Prevent AwS Batch retry the job execution when the container does not exist [4e218f22]

diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
@@ -21,7 +21,7 @@
     <div class="nav-footer-logo">
         <a href="https://seqera.io/" target="_blank" title="Developed by Seqera Labs">
             Nextflow is developed by:<br>
-            <img src="_static/seqera-logo.svg" alt="Seqera Labs">
+            <img src="{{ pathto('_static/seqera-logo.svg', 1) }}" alt="Seqera Labs">
         </a>
     </div>
 {% endblock %}

diff --git a/docs/conf.py b/docs/conf.py
@@ -79,7 +79,7 @@
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-html_title = f"Nextflow v{release} documentation"
+html_title = f"Nextflow {release} documentation"
 
 # Get the current sha if not checked out at a specific version
 if len(release) == 0:

diff --git a/docs/config.md b/docs/config.md
@@ -197,7 +197,10 @@ The following settings are available:
 `aws.batch.maxSpotAttempts`
 : :::{versionadded} 22.04.0
   :::
-: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `5`)
+: :::{versionchanged} 24.08.0-edge
+  The default value was changed from `5` to `0`.
+  :::
+: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `0`)
 
 `aws.batch.maxTransferAttempts`
 : Max number of downloads attempts from S3 (default: `1`).
@@ -872,7 +875,10 @@ The following settings are available for Google Cloud Batch:
 `google.batch.maxSpotAttempts`
 : :::{versionadded} 23.11.0-edge
   :::
-: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `5`).
+: :::{versionchanged} 24.08.0-edge
+  The default value was changed from `5` to `0`.
+  :::
+: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `0`).
 : See also: `google.batch.autoRetryExitCodes`
 
 `google.project`

diff --git a/docs/fusion.md b/docs/fusion.md
@@ -44,7 +44,7 @@ configuration. For example:
 fusion.enabled = true
 wave.enabled = true
 process.executor = 'azure-batch'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then run your pipeline using the usual command:
@@ -71,7 +71,7 @@ wave.enabled = true
 process.executor = 'awsbatch'
 process.queue = '<YOUR BATCH QUEUE>'
 aws.region = '<YOUR AWS REGION>'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then you can run your pipeline using the following command:
@@ -146,7 +146,7 @@ configuration. For example:
 fusion.enabled = true
 wave.enabled = true
 process.executor = 'google-batch'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then run your pipeline using the usual command:
@@ -172,10 +172,10 @@ process.executor = 'k8s'
 k8s.context = '<YOUR K8S CONFIGURATION CONTEXT>'
 k8s.namespace = '<YOUR K8S NAMESPACE>'
 k8s.serviceAccount = '<YOUR K8S SERVICE ACCOUNT>'
-tower.accessToken = '<your platform access token>'
+tower.accessToken = '<your platform access token>' // optional
 ```
 
-The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is run as a pod in the Kubernetes clusters.
+The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is running as a pod in the Kubernetes clusters.
 
 The `k8s.namespace` represents the Kubernetes namespace where the jobs submitted by the pipeline execution should be executed.
 
@@ -191,9 +191,10 @@ nextflow run <YOUR PIPELINE> -work-dir s3://<YOUR BUCKET>/scratch
 You an also use Fusion and Kubernetes with Azure Blob Storage and Google Storage using the same deployment approach.
 :::
 
-### Local execution
+### Local execution with AWS S3
 
-Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.
+Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This
+configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.
 
 The AWS S3 bucket credentials should be made accessible via standard `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
 
@@ -204,6 +205,7 @@ docker.enabled = true
 fusion.enabled = true
 fusion.exportStorageCredentials = true
 wave.enabled = true
+tower.accessToken = '<your platform access token>' // optional
 ```
 
 Then you can run your pipeline using the following command:
@@ -223,6 +225,55 @@ The option `fusion.exportStorageCredentials` leaks the AWS credentials on the ta
 This option should only be used for development purposes.
 :::
 
+### Local execution with Minio
+
+[Minio](https://min.io/) is an open source, enterprise grade, object storage compatible with AWS S3. Nextflow and Fusion
+can use Minio (or other S3-compatible object storages) as an alternative to AWS S3 in some deployment scenarios.
+
+This configuration requires the the use of Nextflow local execution and Docker (or similar container engine) for the
+execution of your pipeline tasks.
+
+For the same of this example, runs a local instance of Minio using this command:
+
+```
+docker run -p 9000:9000 \
+    --rm -d -p 9001:9001 \
+    -e "MINIO_ROOT_USER=admin" \
+    -e "MINIO_ROOT_PASSWORD=secret" \
+    quay.io/minio/minio server /data --console-address ":9001"
+```
+
+Open the Minio console opening in your browser this address `http://localhost:9001`, then create a credentials pair,
+and a bucket. For the sake of this example the bucket name `foobar` will be used.
+
+
+The following configuration should be added in your Nextflow configuration file:
+
+```groovy
+aws.accessKey = '<YOUR MINIO ACCESS KEY>'
+aws.secretKey = '<YOUR MINIO SECRET KEY>'
+aws.client.endpoint = 'http://localhost:9000'
+aws.client.s3PathStyleAccess = true
+wave.enabled = true
+fusion.enabled = true
+fusion.exportStorageCredentials = true
+docker.enabled = true
+tower.accessToken = '<your platform access token>' // optional
+```
+
+Then you can run your pipeline using the following command:
+
+```bash
+nextflow run <YOUR PIPELINE> -work-dir s3://foobar/scratch
+```
+
+Replace `<YOUR PIPELINE>` with a pipeline script and bucket or your choice:
+
+:::{warning}
+The option `fusion.exportStorageCredentials` leaks the AWS credentials on the task launcher script created by Nextflow.
+This option should only be used for development purposes.
+:::
+
 ## Advanced settings
 
 Fusion advanced configuration settings are described in the {ref}`Fusion <config-fusion>` section on the Nextflow configuration page.
diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy
@@ -36,7 +36,7 @@ import nextflow.util.Duration
 @CompileStatic
 class AwsBatchConfig implements CloudTransferOptions {
 
-    public static final int DEFAULT_MAX_SPOT_ATTEMPTS = 5
+    public static final int DEFAULT_MAX_SPOT_ATTEMPTS = 0
 
     public static final int DEFAULT_AWS_MAX_ATTEMPTS = 5
 

diff --git a/plugins/nf-google/src/main/nextflow/cloud/google/batch/client/BatchConfig.groovy b/plugins/nf-google/src/main/nextflow/cloud/google/batch/client/BatchConfig.groovy
@@ -16,13 +16,11 @@
 
 package nextflow.cloud.google.batch.client
 
-
 import com.google.auth.oauth2.GoogleCredentials
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import nextflow.Session
 import nextflow.cloud.google.GoogleOpts
-import nextflow.exception.ProcessUnrecoverableException
 import nextflow.util.MemoryUnit
 /**
  * Model Google Batch config settings
@@ -33,7 +31,9 @@ import nextflow.util.MemoryUnit
 @CompileStatic
 class BatchConfig {
 
-    static private List<Integer> DEFAULT_RETRY_LIST = List.of(50001)
+    static final private int DEFAULT_MAX_SPOT_ATTEMPTS = 0
+
+    static final private List<Integer> DEFAULT_RETRY_LIST = List.of(50001)
 
     private GoogleOpts googleOpts
     private GoogleCredentials credentials
@@ -74,7 +74,7 @@ class BatchConfig {
         result.allowedLocations = session.config.navigate('google.batch.allowedLocations', List.of()) as List<String>
         result.bootDiskSize = session.config.navigate('google.batch.bootDiskSize') as MemoryUnit
         result.cpuPlatform = session.config.navigate('google.batch.cpuPlatform')
-        result.maxSpotAttempts = session.config.navigate('google.batch.maxSpotAttempts',5) as int
+        result.maxSpotAttempts = session.config.navigate('google.batch.maxSpotAttempts', DEFAULT_MAX_SPOT_ATTEMPTS) as int
         result.installGpuDrivers = session.config.navigate('google.batch.installGpuDrivers',false)
         result.preemptible = session.config.navigate('google.batch.preemptible',false)
         result.spot = session.config.navigate('google.batch.spot',false)
@@ -83,7 +83,7 @@ class BatchConfig {
         result.subnetwork = session.config.navigate('google.batch.subnetwork')
         result.serviceAccountEmail = session.config.navigate('google.batch.serviceAccountEmail')
         result.retryConfig = new BatchRetryConfig( session.config.navigate('google.batch.retryPolicy') as Map ?: Map.of() )
-        result.autoRetryExitCodes = session.config.navigate('google.batch.autoRetryExitCodes',DEFAULT_RETRY_LIST) as List<Integer>
+        result.autoRetryExitCodes = session.config.navigate('google.batch.autoRetryExitCodes', DEFAULT_RETRY_LIST) as List<Integer>
         return result
     }
 

diff --git a/plugins/nf-google/src/test/nextflow/cloud/google/batch/client/BatchConfigTest.groovy b/plugins/nf-google/src/test/nextflow/cloud/google/batch/client/BatchConfigTest.groovy
@@ -42,7 +42,7 @@ class BatchConfigTest extends Specification {
         config.getSpot()
         and:
         config.retryConfig.maxAttempts == 5
-        config.maxSpotAttempts == 5
+        config.maxSpotAttempts == 0
         config.autoRetryExitCodes == [50001]
     }
 

diff --git a/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy b/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy
@@ -265,7 +265,7 @@ class WaveClient {
                     throw new UnauthorizedException("Unauthorized [401] - Verify you have provided a valid access token")
             }
             else
-                throw new BadResponseException("Wave invalid response: [${resp.statusCode()}] ${resp.body()}")
+                throw new BadResponseException("Wave invalid response: POST ${uri} [${resp.statusCode()}] ${resp.body()}")
         }
         catch (IOException e) {
             throw new IllegalStateException("Unable to connect Wave service: $endpoint")