From efc6875f6cb42735bb9cd277d7137c1333d5053e Mon Sep 17 00:00:00 2001 From: phoenixAja Date: Mon, 17 Jun 2024 12:58:42 -0700 Subject: [PATCH] subsample with seqtk instead of using head --- workflows/long-read-mngs/run.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflows/long-read-mngs/run.wdl b/workflows/long-read-mngs/run.wdl index 410e9e394..e1ffb2a16 100644 --- a/workflows/long-read-mngs/run.wdl +++ b/workflows/long-read-mngs/run.wdl @@ -268,7 +268,9 @@ task RunSubsampling { command <<< set -euxo pipefail - head -"~{subsample_depth}" "~{input_fastq}" > sample.subsampled.fastq + + # set seed to 42 for reproducibility + seqtk sample -s42 "~{input_fastq}" "~{subsample_depth}" > sample.subsampled.fastq # We should always have reads after subsampling, but adding for consistency with other steps filter_count sample.subsampled.fastq subsampled "No reads remaining after subsampling"