From c78541c9c908e508c7e92b6ad3a91f4ef9d03aae Mon Sep 17 00:00:00 2001
From: Valentin Ambroise <113367796+vaamb@users.noreply.github.com>
Date: Tue, 5 Dec 2023 22:38:20 +0100
Subject: [PATCH] IMP: Anchor barcodes during demultiplexing (#58)

---
 q2_cutadapt/_demux.py           |  81 +++++++----
 q2_cutadapt/plugin_setup.py     |  25 +++-
 q2_cutadapt/tests/test_demux.py | 230 +++++++++++++++++++++++++++++++-
 3 files changed, 306 insertions(+), 30 deletions(-)

diff --git a/q2_cutadapt/_demux.py b/q2_cutadapt/_demux.py
index 02ee411..93a5a65 100644
--- a/q2_cutadapt/_demux.py
+++ b/q2_cutadapt/_demux.py
@@ -40,9 +40,12 @@ def run_command(cmd, verbose=True):
 
 def _build_demux_command(seqs_dir_fmt, barcode_fhs, per_sample_dir_fmt,
                          untrimmed_dir_fmt, error_rate, minimum_length,
-                         forward_cut=0, reverse_cut=0, cores=1):
+                         forward_cut=0, reverse_cut=0,
+                         anchor_forward=False, anchor_reverse=False,
+                         cores=1):
     cmd = ['cutadapt',
-           '--front', 'file:%s' % barcode_fhs['fwd'].name,
+           '-g',
+           f'{"^" if anchor_forward else ""}file:{barcode_fhs["fwd"].name}',
            '--error-rate', str(error_rate),
            '--minimum-length', str(minimum_length),
            # {name} is a cutadapt convention for interpolating the sample id
@@ -57,7 +60,8 @@ def _build_demux_command(seqs_dir_fmt, barcode_fhs, per_sample_dir_fmt,
             # Dual indices
             cmd += [
                 '--pair-adapters',
-                '-G', 'file:%s' % barcode_fhs['rev'].name,
+                '-G',
+                f'{"^" if anchor_reverse else ""}file:{barcode_fhs["rev"].name}',  # noqa: E501
             ]
         cmd += [
             '-p', os.path.join(str(per_sample_dir_fmt), '{name}.2.fastq.gz'),
@@ -66,7 +70,7 @@ def _build_demux_command(seqs_dir_fmt, barcode_fhs, per_sample_dir_fmt,
             str(seqs_dir_fmt.forward_sequences.view(FastqGzFormat)),
             str(seqs_dir_fmt.reverse_sequences.view(FastqGzFormat)),
             '-U', str(reverse_cut),
-            ]
+        ]
     else:
         # SINGLE-END
         cmd += [str(seqs_dir_fmt.file.view(FastqGzFormat))]
@@ -196,7 +200,7 @@ def _check_barcodes_uniqueness(
 
 def _demux(seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
            error_tolerance, mux_fmt, batch_size, minimum_length, forward_cut,
-           reverse_cut, cores):
+           reverse_cut, anchor_forward, anchor_reverse, cores):
     fwd_barcode_name = forward_barcodes.name
     forward_barcodes = forward_barcodes.drop_missing_values()
     barcodes = forward_barcodes.to_series().to_frame()
@@ -222,11 +226,11 @@ def _demux(seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
             open_fhs['rev'] = tempfile.NamedTemporaryFile()
             _write_barcode_fasta(barcode_batch[rev_barcode_name],
                                  open_fhs['rev'])
-        cmd = _build_demux_command(previous_untrimmed, open_fhs,
-                                   per_sample_sequences,
-                                   current_untrimmed, error_tolerance,
-                                   minimum_length, forward_cut, reverse_cut,
-                                   cores)
+        cmd = _build_demux_command(
+            previous_untrimmed, open_fhs, per_sample_sequences,
+            current_untrimmed, error_tolerance, minimum_length, forward_cut,
+            reverse_cut, anchor_forward, anchor_reverse, cores
+        )
         run_command(cmd)
         open_fhs['fwd'].close()
         if reverse_barcodes is not None:
@@ -243,10 +247,11 @@ def _demux(seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
 
 def demux_single(seqs: MultiplexedSingleEndBarcodeInSequenceDirFmt,
                  barcodes: qiime2.CategoricalMetadataColumn,
+                 cut: int = 0,
+                 anchor_barcode: bool = False,
                  error_rate: float = 0.1,
                  batch_size: int = 0,
                  minimum_length: int = 1,
-                 cut: int = 0,
                  cores: int = 1) -> \
                  (CasavaOneEightSingleLanePerSampleDirFmt,
                   MultiplexedSingleEndBarcodeInSequenceDirFmt):
@@ -255,17 +260,53 @@ def demux_single(seqs: MultiplexedSingleEndBarcodeInSequenceDirFmt,
     mux_fmt = MultiplexedSingleEndBarcodeInSequenceDirFmt
 
     untrimmed = _demux(
-        seqs, per_sample_sequences, barcodes, None, error_rate, mux_fmt,
-        batch_size, minimum_length, cut, 0, cores)
+        seqs, per_sample_sequences, barcodes, None, error_rate,
+        mux_fmt, batch_size, minimum_length, cut, 0, anchor_barcode, False,
+        cores)
 
     return per_sample_sequences, untrimmed
 
 
+def _check_paired_requirements(loc):
+    mixed_orientation = loc.get("mixed_orientation", None)
+    forward_cut = loc.get("forward_cut", 0)
+    reverse_cut = loc.get("reverse_cut", 0)
+    reverse_barcodes = loc.get("reverse_barcodes", None)
+    anchor_forward_barcode = loc.get("anchor_forward_barcode", False)
+    anchor_reverse_barcode = loc.get("anchor_reverse_barcode", False)
+
+    if (
+        not mixed_orientation
+        and anchor_reverse_barcode and (reverse_barcodes is None)
+    ):
+        raise ValueError("A reverse barcode needs to be provided in order to "
+                         "anchor the reverse barcode.")
+
+    if (
+        mixed_orientation
+        and forward_cut != reverse_cut
+    ):
+        raise ValueError("'forward_cut' and 'reverse_cut' need to be set to "
+                         "the same number when using the 'mixed_orientation' "
+                         "mode.")
+
+    if (
+        mixed_orientation
+        and anchor_forward_barcode != anchor_reverse_barcode
+    ):
+        raise ValueError(
+            "'anchor_forward_barcode' and 'anchor_reverse_barcode' need to be "
+            "set to the same value when using the 'mixed_orientation' mode."
+        )
+
+
 def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
                  forward_barcodes: qiime2.CategoricalMetadataColumn,
                  reverse_barcodes: qiime2.CategoricalMetadataColumn = None,
                  forward_cut: int = 0,
                  reverse_cut: int = 0,
+                 anchor_forward_barcode: bool = False,
+                 anchor_reverse_barcode: bool = False,
                  error_rate: float = 0.1,
                  batch_size: int = 0,
                  minimum_length: int = 1,
@@ -275,14 +316,7 @@ def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
                      MultiplexedPairedEndBarcodeInSequenceDirFmt):
     _check_barcodes_uniqueness(
         forward_barcodes, reverse_barcodes, mixed_orientation)
-
-    if (
-        mixed_orientation
-        and forward_cut != reverse_cut
-    ):
-        raise ValueError("'forward_cut' and 'reverse_cut' need to be set to "
-                         "the same number when using the 'mixed_orientation' "
-                         "mode")
+    _check_paired_requirements(locals())
 
     per_sample_sequences = CasavaOneEightSingleLanePerSampleDirFmt()
     mux_fmt = MultiplexedPairedEndBarcodeInSequenceDirFmt
@@ -290,7 +324,7 @@ def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
     untrimmed = _demux(
         seqs, per_sample_sequences, forward_barcodes, reverse_barcodes,
         error_rate, mux_fmt, batch_size, minimum_length, forward_cut,
-        reverse_cut, cores)
+        reverse_cut, anchor_forward_barcode, anchor_reverse_barcode, cores)
 
     if mixed_orientation:
         fwd = untrimmed.forward_sequences.view(FastqGzFormat)
@@ -305,6 +339,7 @@ def demux_paired(seqs: MultiplexedPairedEndBarcodeInSequenceDirFmt,
         untrimmed = _demux(
             remaining_seqs, per_sample_sequences, forward_barcodes,
             reverse_barcodes, error_rate, mux_fmt, batch_size,
-            minimum_length, 0, 0, cores)
+            minimum_length, 0, 0, anchor_reverse_barcode,
+            anchor_forward_barcode, cores)
 
     return per_sample_sequences, untrimmed
diff --git a/q2_cutadapt/plugin_setup.py b/q2_cutadapt/plugin_setup.py
index 0eebc7a..4f8e224 100644
--- a/q2_cutadapt/plugin_setup.py
+++ b/q2_cutadapt/plugin_setup.py
@@ -263,6 +263,7 @@
         'barcodes': MetadataColumn[Categorical],
         'error_rate': Float % Range(0, 1, inclusive_start=True,
                                     inclusive_end=True),
+        'anchor_barcode': Bool,
         'batch_size': Int % Range(0, None),
         'minimum_length': Int % Range(1, None),
         'cut': Int,
@@ -283,6 +284,10 @@
                       'allowable error rate. The default value specified by '
                       'cutadapt is 0.1 (=10%), which is greater than '
                       '`demux emp-*`, which is 0.0 (=0%).',
+        'anchor_barcode': 'Anchor the barcode. The barcode is then '
+                          'expected to occur in full length at the beginning '
+                          '(5\' end) of the sequence. Can speed up '
+                          'demultiplexing if used.',
         'batch_size': 'The number of samples cutadapt demultiplexes '
                       'concurrently. Demultiplexing in smaller batches will '
                       'yield the same result with marginal speed loss, and '
@@ -292,11 +297,11 @@
                           'the cutadapt default of 0 has been overridden, '
                           'because that value produces empty sequence '
                           'records.',
-        'cut': 'Remove the specified number of bases from the sequences. Bases'
-               'are removed before demultiplexing. If a positive value is'
-               'provided, bases are removed from the beginning of the '
+        'cut': 'Remove the specified number of bases from the sequences. '
+               'Bases are removed before demultiplexing. If a positive value '
+               'is provided, bases are removed from the beginning of the '
                'sequences. If a negative value is provided, bases are removed '
-               'from the end of the sequences',
+               'from the end of the sequences.',
     },
     output_descriptions={
         'per_sample_sequences': 'The resulting demultiplexed sequences.',
@@ -324,6 +329,8 @@
         'reverse_cut': Int,
         'error_rate': Float % Range(0, 1, inclusive_start=True,
                                     inclusive_end=True),
+        'anchor_forward_barcode': Bool,
+        'anchor_reverse_barcode': Bool,
         'batch_size': Int % Range(0, None),
         'minimum_length': Int % Range(1, None),
         'mixed_orientation': Bool,
@@ -358,6 +365,16 @@
                        'the sequences. If --p-mixed-orientation is set, then '
                        'both --p-forward-cut and --p-reverse-cut must be '
                        'set to the same value.',
+        'anchor_forward_barcode': 'Anchor the forward barcode. The '
+                                  'barcode is then expected to occur in full '
+                                  'length at the beginning (5\' end) of the '
+                                  'forward sequence. Can speed up '
+                                  'demultiplexing if used.',
+        'anchor_reverse_barcode': 'Anchor the reverse barcode. The '
+                                  'barcode is then expected to occur in full '
+                                  'length at the beginning (5\' end) of the '
+                                  'reverse sequence. Can speed up '
+                                  'demultiplexing if used.',
         'error_rate': 'The level of error tolerance, specified as the maximum '
                       'allowable error rate.',
         'batch_size': 'The number of samples cutadapt demultiplexes '
diff --git a/q2_cutadapt/tests/test_demux.py b/q2_cutadapt/tests/test_demux.py
index 03aa8c0..5d8df13 100644
--- a/q2_cutadapt/tests/test_demux.py
+++ b/q2_cutadapt/tests/test_demux.py
@@ -396,6 +396,55 @@ def test_cut_negative(self):
         self.assert_demux_results(metadata.to_series(), exp, obs_demuxed_art)
         self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
 
+    def test_anchored(self):
+        metadata = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCA'], name='Barcode',
+                      index=pd.Index(['sample_a', 'sample_b'], name='id')))
+        exp = [
+            # sample a
+            '@id1\nACGTACGT\n+\nzzzzzzzz\n'
+            '@id3\nACGTACGT\n+\nzzzzzzzz\n',
+            # sample b is empty because the first 'C' from the sequence is
+            #  not in the barcode (sequence is 'CCCCACGTACGT')
+            '',
+        ]
+
+        with redirected_stdio(stderr=os.devnull):
+            obs_demuxed_art, obs_untrimmed_art = self.demux_single_fn(
+                self.muxed_sequences, metadata, anchor_barcode=True)
+        self.assert_demux_results(metadata.to_series(), exp, obs_demuxed_art)
+        self.assert_untrimmed_results('@id2\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                                      '@id4\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                                      '@id5\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                                      '@id6\nGGGGACGTACGT\n+\nzzzzzzzzzzzz\n',
+                                      obs_untrimmed_art)
+
+    def test_anchored_cut(self):
+        metadata = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCC'], name='Barcode',
+                      index=pd.Index(['sample_a', 'sample_b'], name='id')))
+        exp = [
+            # sample a passed. However, as the first 'A' was removed, there was
+            #  a shift in the extracted sequence.
+            '@id1\nCGTACGT\n+\nzzzzzzz\n'  # vs ACGTACGT in other tests
+            '@id3\nCGTACGT\n+\nzzzzzzz\n',
+            # sample b is empty because the removal of the first base only left
+            #  'CCC' from the original barcode.
+            '',
+        ]
+
+        with redirected_stdio(stderr=os.devnull):
+            obs_demuxed_art, obs_untrimmed_art = self.demux_single_fn(
+                self.muxed_sequences, metadata, cut=1, anchor_barcode=True)
+
+        self.assert_demux_results(metadata.to_series(), exp, obs_demuxed_art)
+        # Rem: the first base was removed from all the sequences
+        self.assert_untrimmed_results('@id2\nCCCACGTACGT\n+\nzzzzzzzzzzz\n'
+                                      '@id4\nCCCACGTACGT\n+\nzzzzzzzzzzz\n'
+                                      '@id5\nCCCACGTACGT\n+\nzzzzzzzzzzz\n'
+                                      '@id6\nGGGACGTACGT\n+\nzzzzzzzzzzz\n',
+                                      obs_untrimmed_art)
+
 
 class TestDemuxPaired(TestPluginBase):
     package = 'q2_cutadapt.tests'
@@ -512,6 +561,45 @@ def test_cut(self):
         self.assert_demux_results(metadata.to_series(), exp, obs_demuxed_art)
         self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
 
+    def test_anchored(self):
+        metadata = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCA'], name='Barcode',
+                      index=pd.Index(['sample_a', 'sample_b'], name='id')))
+        exp = [
+            # sample a, fwd
+            '@id1\nACGTACGT\n+\nzzzzzzzz\n'
+            '@id3\nACGTACGT\n+\nzzzzzzzz\n',
+            # sample a, rev
+            '@id1\nGGGGTGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id3\nGGGGTGCATGCA\n+\nzzzzzzzzzzzz\n',
+            # sample b, fwd is empty because the first 'C' from the sequence is
+            #  not in the barcode (sequence is 'CCCCACGTACGT')
+            '',
+            # sample b, rev is empty for the same reason
+            '', ]
+
+        exp_untrimmed = [
+                '@id2\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                '@id4\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                '@id5\nCCCCACGTACGT\n+\nzzzzzzzzzzzz\n'
+                '@id6\nGGGGACGTACGT\n+\nzzzzzzzzzzzz\n',
+                '@id2\nTTTTTGCATGCA\n+\nzzzzzzzzzzzz\n'
+                '@id4\nTTTTTGCATGCA\n+\nzzzzzzzzzzzz\n'
+                '@id5\nTTTTTGCATGCA\n+\nzzzzzzzzzzzz\n'
+                '@id6\nTTTTTGCATGCA\n+\nzzzzzzzzzzzz\n'
+            ]
+
+        # Test a positive cut in forward sequences and a negative cut in
+        #  reverse at the same time
+        with redirected_stdio(stderr=os.devnull):
+            obs_demuxed_art, obs_untrimmed_art = \
+                self.demux_paired_fn(self.muxed_sequences,
+                                     forward_barcodes=metadata,
+                                     anchor_forward_barcode=True)
+
+        self.assert_demux_results(metadata.to_series(), exp, obs_demuxed_art)
+        self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
+
     def test_dual_index_success(self):
         forward_barcodes = CategoricalMetadataColumn(
             pd.Series(['AAAA', 'CCCC'], name='ForwardBarcode',
@@ -530,7 +618,7 @@ def test_dual_index_success(self):
             '@id2\nACGTACGT\n+\nzzzzzzzz\n'
             '@id4\nACGTACGT\n+\nzzzzzzzz\n'
             '@id5\nACGTACGT\n+\nzzzzzzzz\n',
-            # sample a, rev
+            # sample b, rev
             '@id2\nTGCATGCA\n+\nzzzzzzzz\n'
             '@id4\nTGCATGCA\n+\nzzzzzzzz\n'
             '@id5\nTGCATGCA\n+\nzzzzzzzz\n', ]
@@ -547,6 +635,65 @@ def test_dual_index_success(self):
                                   obs_demuxed_art)
         self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
 
+    def test_dual_index_anchored(self):
+        forward_barcodes = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCC', 'GGGA'], name='ForwardBarcode',
+                      index=pd.Index(['sample_a', 'sample_b', 'sample_c'],
+                                     name='id')))
+        reverse_barcodes = CategoricalMetadataColumn(
+            pd.Series(['GGGT', 'TTTT', 'TTTT'], name='ReverseBarcode',
+                      index=pd.Index(['sample_a', 'sample_b', 'sample_c'],
+                                     name='id')))
+        exp = [
+            # sample a, fwd is empty because of reverse anchoring
+            '',
+            # sample a, rev is empty because of reverse anchoring
+            '',
+            # sample b, fwd
+            '@id2\nACGTACGT\n+\nzzzzzzzz\n'
+            '@id4\nACGTACGT\n+\nzzzzzzzz\n'
+            '@id5\nACGTACGT\n+\nzzzzzzzz\n',
+            # sample b, rev
+            '@id2\nTGCATGCA\n+\nzzzzzzzz\n'
+            '@id4\nTGCATGCA\n+\nzzzzzzzz\n'
+            '@id5\nTGCATGCA\n+\nzzzzzzzz\n',
+            # sample c, fwd is empty because of forward anchoring,
+            '',
+            # sample c, rev is empty because of forward anchoring,
+            '',
+        ]
+        exp_untrimmed = [
+            '@id1\nAAAAACGTACGT\n+\nzzzzzzzzzzzz\n'
+            '@id3\nAAAAACGTACGT\n+\nzzzzzzzzzzzz\n'
+            '@id6\nGGGGACGTACGT\n+\nzzzzzzzzzzzz\n',
+            '@id1\nGGGGTGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id3\nGGGGTGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id6\nTTTTTGCATGCA\n+\nzzzzzzzzzzzz\n'
+        ]
+
+        with redirected_stdio(stderr=os.devnull):
+            obs_demuxed_art, obs_untrimmed_art = \
+                self.demux_paired_fn(self.muxed_sequences,
+                                     forward_barcodes=forward_barcodes,
+                                     reverse_barcodes=reverse_barcodes,
+                                     anchor_forward_barcode=True,
+                                     anchor_reverse_barcode=True)
+
+        self.assert_demux_results(forward_barcodes.to_series(), exp,
+                                  obs_demuxed_art)
+        self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
+
+    def test_dual_index_anchor_fail_no_reverse(self):
+        metadata = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCA'], name='Barcode',
+                      index=pd.Index(['sample_a', 'sample_b'], name='id')))
+
+        with self.assertRaises(ValueError):
+            self.demux_paired_fn(self.muxed_sequences,
+                                 forward_barcodes=metadata,
+                                 anchor_forward_barcode=True,
+                                 anchor_reverse_barcode=True)
+
     def test_dual_index_mixed_orientation_success(self):
         forward_barcodes = CategoricalMetadataColumn(
             pd.Series(['AAAA', 'CCCC'], name='ForwardBarcode',
@@ -582,7 +729,7 @@ def test_dual_index_mixed_orientation_success(self):
             '@id2\nACGTACGT\n+\nzzzzzzzz\n'
             '@id4\nACGTACGT\n+\nzzzzzzzz\n'
             '@id5\nACGTACGT\n+\nzzzzzzzz\n',
-            # sample a, rev
+            # sample b, rev
             '@id2\nTGCATGCA\n+\nzzzzzzzz\n'
             '@id4\nTGCATGCA\n+\nzzzzzzzz\n'
             '@id5\nTGCATGCA\n+\nzzzzzzzz\n', ]
@@ -712,6 +859,71 @@ def test_mixed_orientation_cut(self):
         # Everything should match, so untrimmed should be empty
         self.assert_untrimmed_results(['', ''], obs_untrimmed_art)
 
+    def test_mixed_orientation_anchored(self):
+        # sample_a and sample_b have reads in both fwd and rev directions.
+        # sample_c only has reads in the fwd direction.
+        # sample_d only has reads in the rev direction.
+        forward_barcodes = CategoricalMetadataColumn(
+            pd.Series(['AAAA', 'CCCA', 'GGGG', 'TTTA'], name='ForwardBarcode',
+                      index=pd.Index(['sample_a', 'sample_b', 'sample_c',
+                                      'sample_d'], name='id')))
+        mixed_orientation_sequences_f_fp = self.get_data_path(
+            'mixed-orientation/forward.fastq.gz')
+        mixed_orientation_sequences_r_fp = self.get_data_path(
+            'mixed-orientation/reverse.fastq.gz')
+        with tempfile.TemporaryDirectory() as temp:
+            shutil.copy(mixed_orientation_sequences_f_fp, temp)
+            shutil.copy(mixed_orientation_sequences_r_fp, temp)
+            mixed_orientation_sequences = Artifact.import_data(
+                'MultiplexedPairedEndBarcodeInSequence', temp)
+
+        with redirected_stdio(stderr=os.devnull):
+            obs_demuxed_art, obs_untrimmed_art = \
+                self.demux_paired_fn(mixed_orientation_sequences,
+                                     forward_barcodes=forward_barcodes,
+                                     anchor_forward_barcode=True,
+                                     anchor_reverse_barcode=True,
+                                     mixed_orientation=True)
+        exp = [
+            # sample_a fwd
+            '@id1\nACGTACGT\n+\nyyyyyyyy\n'
+            '@id3\nACGTACGT\n+\nyyyyyyyy\n',
+            # sample_a rev
+            '@id1\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id3\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n',
+            # sample_b fwd is empty because the first 'C' from the sequence is
+            #  not in the barcode (sequence is 'CCCCACGTACGT')
+            '',
+            # sample_b rev is empty for the same reason
+            '',
+            # sample_c fwd
+            '@id5\nACGTACGT\n+\nyyyyyyyy\n',
+            # sample_c rev
+            '@id5\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n',
+            # sample_d fwd is empty cf. sample_b
+            '',
+            # sample_d rev is empty cf. sample_b
+            '',
+        ]
+        exp_untrimmed = [
+            '@id2\nCCCCACGTACGT\n+\nyyyyyyyyyyyy\n'
+            '@id4\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id6\nTTTTACGTACGT\n+\nyyyyyyyyyyyy\n',
+            '@id2\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n'
+            '@id4\nCCCCACGTACGT\n+\nyyyyyyyyyyyy\n'
+            '@id6\nTGCATGCATGCA\n+\nzzzzzzzzzzzz\n'
+        ]
+
+        # We want to be sure that the validation is 100%, not just `min`,
+        obs_demuxed_art.validate(level='max')
+        # checkpoint assertion for the above `validate` - nothing should fail
+        self.assertTrue(True)
+
+        self.assert_demux_results(forward_barcodes.to_series(), exp,
+                                  obs_demuxed_art)
+
+        self.assert_untrimmed_results(exp_untrimmed, obs_untrimmed_art)
+
     def test_dual_index_mismatched_barcodes(self):
         forward_barcodes = CategoricalMetadataColumn(
             pd.Series(['AAAA', 'CCCC', 'ACGT'], name='ForwardBarcode',
@@ -801,6 +1013,7 @@ def test_build_demux_command(self):
                                        0.1,
                                        2)
             self.assertTrue(barcode_fasta.name in obs[2])
+            self.assertTrue('^file' not in obs[2])  # not anchored
             self.assertTrue('0.1' in obs[4])
             self.assertTrue('2' in obs[6])
             self.assertTrue(str(self.per_sample_dir_fmt) in obs[8])
@@ -810,6 +1023,17 @@ def test_build_demux_command(self):
             self.assertTrue('0' in obs[13])  # fwd cut
             self.assertTrue('1' in obs[15])  # cores
 
+        # Check that '^' is added before 'file' when adapters are anchored
+        with tempfile.NamedTemporaryFile() as barcode_fasta:
+            obs = _build_demux_command(self.seqs_dir_fmt,
+                                       {'fwd': barcode_fasta, 'rev': None},
+                                       self.per_sample_dir_fmt,
+                                       self.untrimmed_dir_fmt,
+                                       0.1,
+                                       2,
+                                       anchor_forward=True)
+            self.assertTrue('^file' in obs[2])
+
     def test_rename_files_single(self):
         for fn in ['sample_a.1.fastq.gz', 'sample_b.1.fastq.gz']:
             shutil.copy(self.fastq_fp,
@@ -890,7 +1114,7 @@ def test_build_demux_command(self):
                                        self.untrimmed_dir_fmt,
                                        0.1,
                                        2)
-            self.assertTrue(barcode_fasta.name in obs[2])
+        self.assertTrue(barcode_fasta.name in obs[2])
         self.assertTrue('0.1' in obs[4])
         self.assertTrue('2' in obs[6])
         self.assertTrue(str(self.per_sample_dir_fmt) in obs[8])  # fwd