Skip to content

Commit

Permalink
Expose cutadapt --cut options
Browse files Browse the repository at this point in the history
  • Loading branch information
vaamb authored Oct 23, 2023
1 parent 88b8b2a commit ba4bff0
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 10 deletions.
35 changes: 27 additions & 8 deletions q2_cutadapt/_demux.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def run_command(cmd, verbose=True):

def _build_demux_command(seqs_dir_fmt, barcode_fhs, per_sample_dir_fmt,
untrimmed_dir_fmt, error_rate, minimum_length,
cores=1):
forward_cut=0, reverse_cut=0, cores=1):
cmd = ['cutadapt',
'--front', 'file:%s' % barcode_fhs['fwd'].name,
'--error-rate', str(error_rate),
Expand All @@ -65,12 +65,16 @@ def _build_demux_command(seqs_dir_fmt, barcode_fhs, per_sample_dir_fmt,
os.path.join(str(untrimmed_dir_fmt), 'reverse.fastq.gz'),
str(seqs_dir_fmt.forward_sequences.view(FastqGzFormat)),
str(seqs_dir_fmt.reverse_sequences.view(FastqGzFormat)),
'-U', str(reverse_cut),
]
else:
# SINGLE-END
cmd += [str(seqs_dir_fmt.file.view(FastqGzFormat))]

cmd += ['-j', str(cores)]
cmd += [
'-u', str(forward_cut),
'-j', str(cores)
]
return cmd


Expand Down Expand Up @@ -191,7 +195,8 @@ def _check_barcodes_uniqueness(


def _demux(seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
error_tolerance, mux_fmt, batch_size, minimum_length, cores):
error_tolerance, mux_fmt, batch_size, minimum_length, forward_cut,
reverse_cut, cores):
fwd_barcode_name = forward_barcodes.name
forward_barcodes = forward_barcodes.drop_missing_values()
barcodes = forward_barcodes.to_series().to_frame()
Expand Down Expand Up @@ -220,7 +225,8 @@ def _demux(seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
cmd = _build_demux_command(previous_untrimmed, open_fhs,
per_sample_sequences,
current_untrimmed, error_tolerance,
minimum_length, cores)
minimum_length, forward_cut, reverse_cut,
cores)
run_command(cmd)
open_fhs['fwd'].close()
if reverse_barcodes is not None:
Expand All @@ -240,6 +246,7 @@ def demux_single(seqs: MultiplexedSingleEndBarcodeInSequenceDirFmt,
error_rate: float = 0.1,
batch_size: int = 0,
minimum_length: int = 1,
cut: int = 0,
cores: int = 1) -> \
(CasavaOneEightSingleLanePerSampleDirFmt,
MultiplexedSingleEndBarcodeInSequenceDirFmt):
Expand All @@ -249,14 +256,16 @@ def demux_single(seqs: MultiplexedSingleEndBarcodeInSequenceDirFmt,

untrimmed = _demux(
seqs, per_sample_sequences, barcodes, None, error_rate, mux_fmt,
batch_size, minimum_length, cores)
batch_size, minimum_length, cut, 0, cores)

return per_sample_sequences, untrimmed


def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
forward_barcodes: qiime2.CategoricalMetadataColumn,
reverse_barcodes: qiime2.CategoricalMetadataColumn = None,
forward_cut: int = 0,
reverse_cut: int = 0,
error_rate: float = 0.1,
batch_size: int = 0,
minimum_length: int = 1,
Expand All @@ -267,12 +276,21 @@ def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
_check_barcodes_uniqueness(
forward_barcodes, reverse_barcodes, mixed_orientation)

if (
mixed_orientation
and forward_cut != reverse_cut
):
raise ValueError("'forward_cut' and 'reverse_cut' need to be set to "
"the same number when using the 'mixed_orientation' "
"mode")

per_sample_sequences = CasavaOneEightSingleLanePerSampleDirFmt()
mux_fmt = MultiplexedPairedEndBarcodeInSequenceDirFmt

untrimmed = _demux(
seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
error_rate, mux_fmt, batch_size, minimum_length, cores)
error_rate, mux_fmt, batch_size, minimum_length, forward_cut,
reverse_cut, cores)

if mixed_orientation:
fwd = untrimmed.forward_sequences.view(FastqGzFormat)
Expand All @@ -282,10 +300,11 @@ def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
# fwd -> rev && rev -> fwd
remaining_seqs.forward_sequences.write_data(rev, FastqGzFormat)
remaining_seqs.reverse_sequences.write_data(fwd, FastqGzFormat)

# Cuts have already been performed during the first demux pass, set
# forward and reverse cut to 0
untrimmed = _demux(
remaining_seqs, per_sample_sequences, forward_barcodes,
reverse_barcodes, error_rate, mux_fmt, batch_size,
minimum_length, cores)
minimum_length, 0, 0, cores)

return per_sample_sequences, untrimmed
26 changes: 25 additions & 1 deletion q2_cutadapt/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@
inclusive_end=True),
'batch_size': Int % Range(0, None),
'minimum_length': Int % Range(1, None),
'cut': Int,
'cores': Int % Range(1, None),
},
outputs=[
Expand All @@ -291,6 +292,11 @@
'the cutadapt default of 0 has been overridden, '
'because that value produces empty sequence '
'records.',
'cut': 'Remove the specified number of bases from the sequences. Bases'
'are removed before demultiplexing. If a positive value is'
'provided, bases are removed from the beginning of the '
'sequences. If a negative value is provided, bases are removed '
'from the end of the sequences',
},
output_descriptions={
'per_sample_sequences': 'The resulting demultiplexed sequences.',
Expand All @@ -314,6 +320,8 @@
parameters={
'forward_barcodes': MetadataColumn[Categorical],
'reverse_barcodes': MetadataColumn[Categorical],
'forward_cut': Int,
'reverse_cut': Int,
'error_rate': Float % Range(0, 1, inclusive_start=True,
inclusive_end=True),
'batch_size': Int % Range(0, None),
Expand All @@ -334,6 +342,22 @@
'per-sample barcodes for the forward reads.',
'reverse_barcodes': 'The sample metadata column listing the '
'per-sample barcodes for the reverse reads.',
'forward_cut': 'Remove the specified number of bases from the forward '
'sequences. Bases are removed before demultiplexing. '
'If a positive value is provided, bases are removed '
'from the beginning of the sequences. If a negative '
'value is provided, bases are removed from the end of '
'the sequences. If --p-mixed-orientation is set, then '
'both --p-forward-cut and --p-reverse-cut must be '
'set to the same value.',
'reverse_cut': 'Remove the specified number of bases from the reverse '
'sequences. Bases are removed before demultiplexing. '
'If a positive value is provided, bases are removed '
'from the beginning of the sequences. If a negative '
'value is provided, bases are removed from the end of '
'the sequences. If --p-mixed-orientation is set, then '
'both --p-forward-cut and --p-reverse-cut must be '
'set to the same value.',
'error_rate': 'The level of error tolerance, specified as the maximum '
'allowable error rate.',
'batch_size': 'The number of samples cutadapt demultiplexes '
Expand All @@ -347,7 +371,7 @@
'records.',
'mixed_orientation': 'Handle demultiplexing of mixed orientation '
'reads (i.e. when forward and reverse reads '
'coexist in the same file).'
'coexist in the same file).',
},
output_descriptions={
'per_sample_sequences': 'The resulting demultiplexed sequences.',
Expand Down
Loading

0 comments on commit ba4bff0

Please sign in to comment.