don't do truncation alignment

qiime2 · Dec 11, 2024 · 46ce373 · 46ce373
1 parent e174ffc
commit 46ce373
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 90 deletions.
diff --git a/q2_quality_filter/_filter.py b/q2_quality_filter/_filter.py
@@ -283,39 +283,6 @@ def _is_retained(
     return True
 
 
-def _align_records(
-    forward_record: FastqRecord, reverse_record: FastqRecord
-) -> tuple[FastqRecord, FastqRecord]:
-    '''
-    Align a forward record and reverse record to the same truncation length.
-    Note that if either (forward or reverse) truncation resulted in the record
-    falling below the minimum length fraction then this was already handled
-    upstream.
-
-    Parameters
-    ----------
-    forward_record : FastqRecord
-        The record from the forward fastq file.
-    reverse_record : FastqRecord
-        The record from the reverse fastq file.
-
-    Returns
-    -------
-    tuple[FastqRecord, FastqRecord]
-        The length-aligned forward and reverse records.
-    '''
-    if len(forward_record.sequence) < len(reverse_record.sequence):
-        reverse_record = _truncate(
-            reverse_record, len(forward_record.sequence)
-        )
-    elif len(reverse_record.sequence) < len(forward_record.sequence):
-        forward_record = _truncate(
-            forward_record, len(reverse_record.sequence)
-        )
-
-    return forward_record, reverse_record
-
-
 def _write_record(fastq_record: FastqRecord, fh: gzip.GzipFile) -> None:
     '''
     Writes a fastq record to an open fastq file.
@@ -443,10 +410,6 @@ def q_score(
             # if retained write to output file(s)
             if retained:
                 if paired:
-                    # align truncations if paired
-                    forward_record, reverse_record = _align_records(
-                        forward_record, reverse_record
-                    )
                     _write_record(forward_record, forward_fh)
                     _write_record(reverse_record, reverse_fh)
                 else:

diff --git a/q2_quality_filter/tests/test_filter.py b/q2_quality_filter/tests/test_filter.py
@@ -36,7 +36,6 @@
     RecordStatus,
     _process_record,
     _is_retained,
-    _align_records,
     _write_record,
 )
 from q2_quality_filter._format import QualityFilterStatsFmt
@@ -336,51 +335,6 @@ def test_is_retained(self):
         )
         filtering_stats_df.iloc[:, :] = 0
 
-    def test_align_records(self):
-        # records unchanged if equal lengths
-        forward_record = FastqRecord(
-            b'@header', b'ATTCTGTA', b'+', b'MMLMLL++'
-        )
-        reverse_record = FastqRecord(
-            b'@header', b'TTAGCATC', b'+', b'+MM+MLM+'
-        )
-        obs_forward_record, obs_reverse_record = _align_records(
-            forward_record, reverse_record
-        )
-        self.assertEqual(obs_forward_record, forward_record)
-        self.assertEqual(obs_reverse_record, reverse_record)
-
-        # longer record truncated to shorter record
-        forward_record = FastqRecord(
-            b'@header', b'ATTCTGTA', b'+', b'MMLMLL++'
-        )
-        reverse_record = FastqRecord(
-            b'@header', b'TTAGCA', b'+', b'+MM+ML'
-        )
-        obs_forward_record, obs_reverse_record = _align_records(
-            forward_record, reverse_record
-        )
-        exp_forward_record = FastqRecord(
-            b'@header', b'ATTCTG', b'+', b'MMLMLL'
-        )
-        self.assertEqual(obs_forward_record, exp_forward_record)
-        self.assertEqual(obs_reverse_record, reverse_record)
-
-        forward_record = FastqRecord(
-            b'@header', b'ATTC', b'+', b'MMLM'
-        )
-        reverse_record = FastqRecord(
-            b'@header', b'TTAGCATC', b'+', b'+MM+MLM+'
-        )
-        obs_forward_record, obs_reverse_record = _align_records(
-            forward_record, reverse_record
-        )
-        exp_reverse_record = FastqRecord(
-            b'@header', b'TTAG', b'+', b'+MM+'
-        )
-        self.assertEqual(obs_forward_record, forward_record)
-        self.assertEqual(obs_reverse_record, exp_reverse_record)
-
     def test_write_record(self):
         fastq_record = FastqRecord(
             b'@header', b'ATTCTGTA', b'+', b'MMLMLL++'
@@ -692,9 +646,6 @@ def _assert_records_match(self, manifest_df: pd.DataFrame):
                 self.assertEqual(
                     self._get_header_diff(forward_record, reverse_record), 1
                 )
-                self.assertEqual(
-                    len(forward_record.sequence), len(reverse_record.sequence)
-                )
 
     def test_paired_end_sequences(self):
         demux_artifact = Artifact.import_data(
@@ -714,10 +665,10 @@ def test_paired_end_sequences(self):
         )
         demux_manifest_df = output_demux_format.manifest.view(pd.DataFrame)
 
-        # corresponding records should be same length and have matching headers
+        # corresponding records should have matching headers
         self._assert_records_match(demux_manifest_df)
 
-        # "Human-Kneecap2_S2" is dropped because the R2 reads have low q scores
+        # "Human-Kneecap2_S2" is dropped because the R1 reads have low q scores
         exp_sample_ids = ['Human-Kneecap', 'Human-Kneecap3']
         self.assertEqual(
             set(demux_manifest_df.index), set(exp_sample_ids)
@@ -747,9 +698,9 @@ def test_paired_end_sequences(self):
         sample1_reverse_exp = [
             # first record dropped because of R2 scores
             b'@M00899:113:000000000-A5K20:1:1101:25454:3578 2:N:0:2',
-            b'GACTACCGGGGTATCTAATCCTGTTCGATACCCGCACCTTCGAGCTTCAGCGTCAGTTGCG',
+            b'GACTACCGGGGTATCTAATCCTGTTCGATACCCGCACCTTCGAGCTTCAGCGTCAGTTGCGCTCCCGTCAGCTGC', # noqa
             b'+',
-            b'CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGG',
+            b'CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG', # noqa
             b'@M00899:113:000000000-A5K20:1:1101:25177:3605 2:N:0:2',
             b'GACTACTGGGGTATCTAATCCTGTTTGATACCCGCACCTTCGAGCTTAAGCGTCAGTTGCGCTCCCGTCAGCTGC', # noqa
             b'+',