Skip to content

Commit

Permalink
Merge branch 'master' into charliecloud_storage
Browse files Browse the repository at this point in the history
  • Loading branch information
nschan authored Aug 28, 2024
2 parents 5ead543 + 5a37e61 commit 525216e
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 19 deletions.
1 change: 1 addition & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ NEXTFLOW CHANGE-LOG
- Apply k8s.cpuLimits to kuberun driver pod (#5160) [4300adf1]
- Await build completion for all Wave containers [2b8117e9]
- Deprecate module addParams() and params() (#5200) [ci fast] [82c97f8c]
- Remove capsule launcher dependencies (#3395) [f15e4246]
- Fix AWS Cloudwatch access when using custom log group name [30195838]
- Fix Invalid AWS Fargate CPUs usage error reporting [d9c50e59]
- Fix Prevent AwS Batch retry the job execution when the container does not exist [4e218f22]
Expand Down
2 changes: 1 addition & 1 deletion docs/_templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<div class="nav-footer-logo">
<a href="https://seqera.io/" target="_blank" title="Developed by Seqera Labs">
Nextflow is developed by:<br>
<img src="_static/seqera-logo.svg" alt="Seqera Labs">
<img src="{{ pathto('_static/seqera-logo.svg', 1) }}" alt="Seqera Labs">
</a>
</div>
{% endblock %}
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
html_title = f"Nextflow v{release} documentation"
html_title = f"Nextflow {release} documentation"

# Get the current sha if not checked out at a specific version
if len(release) == 0:
Expand Down
10 changes: 8 additions & 2 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,10 @@ The following settings are available:
`aws.batch.maxSpotAttempts`
: :::{versionadded} 22.04.0
:::
: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `5`)
: :::{versionchanged} 24.08.0-edge
The default value was changed from `5` to `0`.
:::
: Max number of execution attempts of a job interrupted by a EC2 spot reclaim event (default: `0`)

`aws.batch.maxTransferAttempts`
: Max number of downloads attempts from S3 (default: `1`).
Expand Down Expand Up @@ -872,7 +875,10 @@ The following settings are available for Google Cloud Batch:
`google.batch.maxSpotAttempts`
: :::{versionadded} 23.11.0-edge
:::
: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `5`).
: :::{versionchanged} 24.08.0-edge
The default value was changed from `5` to `0`.
:::
: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `0`).
: See also: `google.batch.autoRetryExitCodes`

`google.project`
Expand Down
65 changes: 58 additions & 7 deletions docs/fusion.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ configuration. For example:
fusion.enabled = true
wave.enabled = true
process.executor = 'azure-batch'
tower.accessToken = '<your platform access token>'
tower.accessToken = '<your platform access token>' // optional
```

Then run your pipeline using the usual command:
Expand All @@ -71,7 +71,7 @@ wave.enabled = true
process.executor = 'awsbatch'
process.queue = '<YOUR BATCH QUEUE>'
aws.region = '<YOUR AWS REGION>'
tower.accessToken = '<your platform access token>'
tower.accessToken = '<your platform access token>' // optional
```

Then you can run your pipeline using the following command:
Expand Down Expand Up @@ -146,7 +146,7 @@ configuration. For example:
fusion.enabled = true
wave.enabled = true
process.executor = 'google-batch'
tower.accessToken = '<your platform access token>'
tower.accessToken = '<your platform access token>' // optional
```

Then run your pipeline using the usual command:
Expand All @@ -172,10 +172,10 @@ process.executor = 'k8s'
k8s.context = '<YOUR K8S CONFIGURATION CONTEXT>'
k8s.namespace = '<YOUR K8S NAMESPACE>'
k8s.serviceAccount = '<YOUR K8S SERVICE ACCOUNT>'
tower.accessToken = '<your platform access token>'
tower.accessToken = '<your platform access token>' // optional
```

The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is run as a pod in the Kubernetes clusters.
The `k8s.context` represents the Kubernetes configuration context to be used for the pipeline execution. This setting can be omitted if Nextflow itself is running as a pod in the Kubernetes clusters.

The `k8s.namespace` represents the Kubernetes namespace where the jobs submitted by the pipeline execution should be executed.

Expand All @@ -191,9 +191,10 @@ nextflow run <YOUR PIPELINE> -work-dir s3://<YOUR BUCKET>/scratch
You an also use Fusion and Kubernetes with Azure Blob Storage and Google Storage using the same deployment approach.
:::

### Local execution
### Local execution with AWS S3

Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.
Fusion file system allows the use of an S3 bucket as a pipeline work directory with the Nextflow local executor. This
configuration requires the use of Docker (or similar container engine) for the execution of your pipeline tasks.

The AWS S3 bucket credentials should be made accessible via standard `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.

Expand All @@ -204,6 +205,7 @@ docker.enabled = true
fusion.enabled = true
fusion.exportStorageCredentials = true
wave.enabled = true
tower.accessToken = '<your platform access token>' // optional
```

Then you can run your pipeline using the following command:
Expand All @@ -223,6 +225,55 @@ The option `fusion.exportStorageCredentials` leaks the AWS credentials on the ta
This option should only be used for development purposes.
:::

### Local execution with Minio

[Minio](https://min.io/) is an open source, enterprise grade, object storage compatible with AWS S3. Nextflow and Fusion
can use Minio (or other S3-compatible object storages) as an alternative to AWS S3 in some deployment scenarios.

This configuration requires the the use of Nextflow local execution and Docker (or similar container engine) for the
execution of your pipeline tasks.

For the same of this example, runs a local instance of Minio using this command:

```
docker run -p 9000:9000 \
--rm -d -p 9001:9001 \
-e "MINIO_ROOT_USER=admin" \
-e "MINIO_ROOT_PASSWORD=secret" \
quay.io/minio/minio server /data --console-address ":9001"
```

Open the Minio console opening in your browser this address `http://localhost:9001`, then create a credentials pair,
and a bucket. For the sake of this example the bucket name `foobar` will be used.


The following configuration should be added in your Nextflow configuration file:

```groovy
aws.accessKey = '<YOUR MINIO ACCESS KEY>'
aws.secretKey = '<YOUR MINIO SECRET KEY>'
aws.client.endpoint = 'http://localhost:9000'
aws.client.s3PathStyleAccess = true
wave.enabled = true
fusion.enabled = true
fusion.exportStorageCredentials = true
docker.enabled = true
tower.accessToken = '<your platform access token>' // optional
```

Then you can run your pipeline using the following command:

```bash
nextflow run <YOUR PIPELINE> -work-dir s3://foobar/scratch
```

Replace `<YOUR PIPELINE>` with a pipeline script and bucket or your choice:

:::{warning}
The option `fusion.exportStorageCredentials` leaks the AWS credentials on the task launcher script created by Nextflow.
This option should only be used for development purposes.
:::

## Advanced settings

Fusion advanced configuration settings are described in the {ref}`Fusion <config-fusion>` section on the Nextflow configuration page.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import nextflow.util.Duration
@CompileStatic
class AwsBatchConfig implements CloudTransferOptions {

public static final int DEFAULT_MAX_SPOT_ATTEMPTS = 5
public static final int DEFAULT_MAX_SPOT_ATTEMPTS = 0

public static final int DEFAULT_AWS_MAX_ATTEMPTS = 5

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@

package nextflow.cloud.google.batch.client


import com.google.auth.oauth2.GoogleCredentials
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Session
import nextflow.cloud.google.GoogleOpts
import nextflow.exception.ProcessUnrecoverableException
import nextflow.util.MemoryUnit
/**
* Model Google Batch config settings
Expand All @@ -33,7 +31,9 @@ import nextflow.util.MemoryUnit
@CompileStatic
class BatchConfig {

static private List<Integer> DEFAULT_RETRY_LIST = List.of(50001)
static final private int DEFAULT_MAX_SPOT_ATTEMPTS = 0

static final private List<Integer> DEFAULT_RETRY_LIST = List.of(50001)

private GoogleOpts googleOpts
private GoogleCredentials credentials
Expand Down Expand Up @@ -74,7 +74,7 @@ class BatchConfig {
result.allowedLocations = session.config.navigate('google.batch.allowedLocations', List.of()) as List<String>
result.bootDiskSize = session.config.navigate('google.batch.bootDiskSize') as MemoryUnit
result.cpuPlatform = session.config.navigate('google.batch.cpuPlatform')
result.maxSpotAttempts = session.config.navigate('google.batch.maxSpotAttempts',5) as int
result.maxSpotAttempts = session.config.navigate('google.batch.maxSpotAttempts', DEFAULT_MAX_SPOT_ATTEMPTS) as int
result.installGpuDrivers = session.config.navigate('google.batch.installGpuDrivers',false)
result.preemptible = session.config.navigate('google.batch.preemptible',false)
result.spot = session.config.navigate('google.batch.spot',false)
Expand All @@ -83,7 +83,7 @@ class BatchConfig {
result.subnetwork = session.config.navigate('google.batch.subnetwork')
result.serviceAccountEmail = session.config.navigate('google.batch.serviceAccountEmail')
result.retryConfig = new BatchRetryConfig( session.config.navigate('google.batch.retryPolicy') as Map ?: Map.of() )
result.autoRetryExitCodes = session.config.navigate('google.batch.autoRetryExitCodes',DEFAULT_RETRY_LIST) as List<Integer>
result.autoRetryExitCodes = session.config.navigate('google.batch.autoRetryExitCodes', DEFAULT_RETRY_LIST) as List<Integer>
return result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BatchConfigTest extends Specification {
config.getSpot()
and:
config.retryConfig.maxAttempts == 5
config.maxSpotAttempts == 5
config.maxSpotAttempts == 0
config.autoRetryExitCodes == [50001]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ class WaveClient {
throw new UnauthorizedException("Unauthorized [401] - Verify you have provided a valid access token")
}
else
throw new BadResponseException("Wave invalid response: [${resp.statusCode()}] ${resp.body()}")
throw new BadResponseException("Wave invalid response: POST ${uri} [${resp.statusCode()}] ${resp.body()}")
}
catch (IOException e) {
throw new IllegalStateException("Unable to connect Wave service: $endpoint")
Expand Down

0 comments on commit 525216e

Please sign in to comment.