From 4a0b057aaf41efd10f0b82da0a50503141d59542 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 8 Oct 2024 11:51:47 -0700 Subject: [PATCH 1/4] add fragment iteration --- lib/synthesis/fragment/fragment.go | 16 +++++++++++++--- py/tests/test_fragment.py | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/synthesis/fragment/fragment.go b/lib/synthesis/fragment/fragment.go index 1c8ab8a..16bcbc6 100644 --- a/lib/synthesis/fragment/fragment.go +++ b/lib/synthesis/fragment/fragment.go @@ -247,8 +247,14 @@ type Assembly struct { // The forwardFlank and reverseFlank are for preparing the sequences for // recursive assembly. Generally, this involves appending a certain sequence // to each oligo, and also to the edges of each subassembly. Do not add these -// to the maxCodingSizeOligo: that is done within the function. +// to the maxCodingSizeOligo: that is done within the function. The flanks are +// NOT added to the first iteration of the sequence: if they are desired there, +// add them manually. func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern []int, excludeOverhangs []string, includeOverhangs []string, forwardFlank string, reverseFlank string) (Assembly, error) { + return recursiveFragmentIteration(sequence, maxCodingSizeOligo, assemblyPattern, excludeOverhangs, includeOverhangs, forwardFlank, reverseFlank, 0) +} + +func recursiveFragmentIteration(sequence string, maxCodingSizeOligo int, assemblyPattern []int, excludeOverhangs []string, includeOverhangs []string, forwardFlank string, reverseFlank string, iteration int) (Assembly, error) { /* Ok, so this is a note for you hackers out there: this algorithm can be greatly improved. The optimal way to do this would be to do a continuous @@ -296,14 +302,18 @@ func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern // After the smallest possible block, begin iterating for each size. for i, size := range sizes[1:] { if sequenceLen <= size { - fragments, efficiency, err := FragmentWithOverhangs(forwardFlank+sequence+reverseFlank, sizes[i]-minFragmentSizeSubtraction, sizes[i], excludeOverhangs, includeOverhangs) + targetSequence := sequence + if iteration != 0 { + targetSequence = forwardFlank + sequence + reverseFlank + } + fragments, efficiency, err := FragmentWithOverhangs(targetSequence, sizes[i]-minFragmentSizeSubtraction, sizes[i], excludeOverhangs, includeOverhangs) if err != nil { return assembly, err } // Now we need to get the derived fragments from this overall construction var subAssemblies []Assembly for _, fragment := range fragments { - subAssembly, err := RecursiveFragment(fragment, maxCodingSizeOligo, assemblyPattern, excludeOverhangs, includeOverhangs, forwardFlank, reverseFlank) + subAssembly, err := recursiveFragmentIteration(fragment, maxCodingSizeOligo, assemblyPattern, excludeOverhangs, includeOverhangs, forwardFlank, reverseFlank, iteration+1) if err != nil { return subAssembly, err } diff --git a/py/tests/test_fragment.py b/py/tests/test_fragment.py index 1969d1d..8720250 100644 --- a/py/tests/test_fragment.py +++ b/py/tests/test_fragment.py @@ -42,4 +42,4 @@ def test_recursive_fragment(): result = recursive_fragment(gene, max_oligo_len, assembly_pattern, exclude_overhangs, default_overhangs, "GTCTCT", "CGAG") assert result is not None, "RecursiveFragment failed" # Add more specific assertions based on the expected structure of the result - assert result.fragments == ['GTCTCTATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAG', 'CCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGCGAG'] + assert result.fragments == ['ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAG', 'CCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG'] From d96c52c581b8c26e465b124f3b5b4d57122d3411 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 8 Oct 2024 11:53:11 -0700 Subject: [PATCH 2/4] update log --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 26ff098..2c112c9 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Fixes RecursiveFragment to not add flanks to the initial input [#102](https://github.com/Koeng101/dnadesign/pull/102) - Fixes add flank bug, releases new version of python lib [#101](https://github.com/Koeng101/dnadesign/pull/101) - Adds feature for adding flanks to RecursiveFragment. [#100](https://github.com/Koeng101/dnadesign/pull/100) - Adds cloning and recursion functions to python. [#96](https://github.com/Koeng101/dnadesign/pull/96) From 84cd8a05439e4a4c696be5bb8edbd19c7086714a Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 8 Oct 2024 11:59:03 -0700 Subject: [PATCH 3/4] make equivalent go<->py test --- lib/synthesis/fragment/fragment.go | 10 +++++----- lib/synthesis/fragment/fragment_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/lib/synthesis/fragment/fragment.go b/lib/synthesis/fragment/fragment.go index 16bcbc6..522f458 100644 --- a/lib/synthesis/fragment/fragment.go +++ b/lib/synthesis/fragment/fragment.go @@ -292,8 +292,12 @@ func recursiveFragmentIteration(sequence string, maxCodingSizeOligo int, assembl } sizes[i] = sizes[i-1]*assemblyPattern[i] - smallestMinFragmentSizeSubtraction // subtract approx 60bp to give room for finding overhangs } + targetSequence := sequence + if iteration != 0 { + targetSequence = forwardFlank + sequence + reverseFlank + } if sequenceLen <= sizes[0] { - fragments, efficiency, err := FragmentWithOverhangs(forwardFlank+sequence+reverseFlank, maxCodingSizeOligo-60, maxCodingSizeOligo, excludeOverhangs, includeOverhangs) + fragments, efficiency, err := FragmentWithOverhangs(targetSequence, maxCodingSizeOligo-60, maxCodingSizeOligo, excludeOverhangs, includeOverhangs) if err != nil { return assembly, err } @@ -302,10 +306,6 @@ func recursiveFragmentIteration(sequence string, maxCodingSizeOligo int, assembl // After the smallest possible block, begin iterating for each size. for i, size := range sizes[1:] { if sequenceLen <= size { - targetSequence := sequence - if iteration != 0 { - targetSequence = forwardFlank + sequence + reverseFlank - } fragments, efficiency, err := FragmentWithOverhangs(targetSequence, sizes[i]-minFragmentSizeSubtraction, sizes[i], excludeOverhangs, includeOverhangs) if err != nil { return assembly, err diff --git a/lib/synthesis/fragment/fragment_test.go b/lib/synthesis/fragment/fragment_test.go index c5b3b31..9619a3f 100644 --- a/lib/synthesis/fragment/fragment_test.go +++ b/lib/synthesis/fragment/fragment_test.go @@ -2,6 +2,7 @@ package fragment_test import ( _ "embed" + "reflect" "strings" "testing" @@ -117,3 +118,27 @@ func TestRecursiveFragment(t *testing.T) { t.Errorf("Failed to RecursiveFragment blue1. Got error: %s", err) } } + +func TestRecursiveFragmentPy(t *testing.T) { + // These are the 46 possible overhangs I personally use, plus the two identity overhangs CGAG+GTCT + defaultOverhangs := []string{"GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"} + excludeOverhangs := []string{"CGAG", "GTCT"} // These are the recursive BsaI definitions, and must be excluded from all builds. + gene := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" + maxOligoLen := 174 // for Agilent oligo pools + assemblyPattern := []int{5, 4, 4, 5} // seems reasonable enough + result, err := fragment.RecursiveFragment(gene, maxOligoLen, assemblyPattern, excludeOverhangs, defaultOverhangs, "GTCTCT", "CGAG") + if err != nil { + t.Errorf("Failed to RecursiveFragment blue1. Got error: %s", err) + } + + // Add more specific assertions based on the expected structure of the result + expectedFragments := []string{ + "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAG", + "CCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG", + } + + if !reflect.DeepEqual(result.Fragments, expectedFragments) { + t.Errorf("Unexpected fragments. Got %v, want %v", result.Fragments, expectedFragments) + } + +} From ec2040c830a11968797fff2b662715589f88823f Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 8 Oct 2024 12:00:49 -0700 Subject: [PATCH 4/4] make linter happy --- lib/synthesis/fragment/fragment_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/synthesis/fragment/fragment_test.go b/lib/synthesis/fragment/fragment_test.go index 9619a3f..5b818d6 100644 --- a/lib/synthesis/fragment/fragment_test.go +++ b/lib/synthesis/fragment/fragment_test.go @@ -140,5 +140,4 @@ func TestRecursiveFragmentPy(t *testing.T) { if !reflect.DeepEqual(result.Fragments, expectedFragments) { t.Errorf("Unexpected fragments. Got %v, want %v", result.Fragments, expectedFragments) } - }