Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

indexing #60

Merged
merged 18 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 0 additions & 27 deletions .github/workflows/stale.yml

This file was deleted.

3 changes: 1 addition & 2 deletions external/minimap2/minimap2.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) e
}

// Minimap2Channeled uses channels rather than io.Reader and io.Writers.
func Minimap2Channeled(fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
ctx := context.Background()
func Minimap2Channeled(ctx context.Context, fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing fastq reads and reading them as an io.Reader
Expand Down
45 changes: 40 additions & 5 deletions external/samtools/samtools.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os/exec"
"syscall"

"github.com/koeng101/dnadesign/lib/bio/sam"
"golang.org/x/sync/errgroup"
)

Expand All @@ -22,7 +23,7 @@ import (
// The first samtools view removes unmapped sequences, the sort sorts the
// sequences for piping into pileup, and the final command builds the pileup
// file.
func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
func Pileup(ctx context.Context, templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
/*
Due to how os.exec works in Golang, we can't directly have pipes as if
the whole thing was a script. However, we can attach pipes to each
Expand All @@ -49,7 +50,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
}
tmpFile.Close() // Close the file as it's no longer needed

g, ctx := errgroup.WithContext(context.Background())
g, ctx := errgroup.WithContext(ctx)

// Setup pipe connections between commands
viewSortReader, viewSortWriter := io.Pipe()
Expand All @@ -73,7 +74,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
viewCmd.Process.Signal(syscall.SIGTERM)
_ = viewCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return viewCmd.Wait()
Expand All @@ -93,7 +94,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
sortCmd.Process.Signal(syscall.SIGTERM)
_ = sortCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return sortCmd.Wait()
Expand All @@ -111,7 +112,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
mpileupCmd.Process.Signal(syscall.SIGTERM)
_ = mpileupCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return mpileupCmd.Wait()
Expand All @@ -125,3 +126,37 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

return nil
}

// PileupChanneled processes SAM alignments from a channel and sends pileup lines to another channel.
func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-chan sam.Alignment, w io.Writer) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing SAM alignments and reading them as an io.Reader
samPr, samPw := io.Pipe()

// Goroutine to consume SAM alignments and write them to the PipeWriter
g.Go(func() error {
defer samPw.Close()
for alignment := range samChan {
// Assuming the sam.Alignment type has a WriteTo method or similar to serialize it to the writer
_, err := alignment.WriteTo(samPw)
if err != nil {
return err // return error to be handled by errgroup
}
}
return nil
})

// Run Pileup function in a goroutine
g.Go(func() error {
return Pileup(ctx, templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
})

// Wait for all goroutines in the group to finish
if err := g.Wait(); err != nil {
return err // This will return the first non-nil error from the group of goroutines
}

// At this point, all goroutines have finished successfully
return nil
}
4 changes: 3 additions & 1 deletion external/samtools/samtools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package samtools_test

import (
"bytes"
"context"
"os"
"testing"

Expand All @@ -28,7 +29,8 @@ func TestPileup(t *testing.T) {
var buf bytes.Buffer

// Execute the pileup function
err = samtools.Pileup(templateFile, samFile, &buf)
ctx := context.Background()
err = samtools.Pileup(ctx, templateFile, samFile, &buf)
if err != nil {
t.Errorf("Pileup returned error: %s", err)
}
Expand Down
6 changes: 3 additions & 3 deletions go.work
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
go 1.22.0

use (
./lib
./external
)
./external
./lib
)
4 changes: 3 additions & 1 deletion lib/align/megamash/megamash.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package megamash
import (
"encoding/json"
"fmt"
"strings"

"github.com/koeng101/dnadesign/lib/bio/fasta"
"github.com/koeng101/dnadesign/lib/transform"
Expand All @@ -19,6 +20,7 @@ import (
// StandardizedDNA returns the alphabetically lesser strand of a double
// stranded DNA molecule.
func StandardizedDNA(sequence string) string {
sequence = strings.ToUpper(sequence)
var deterministicSequence string
reverseComplement := transform.ReverseComplement(sequence)
if sequence > reverseComplement {
Expand All @@ -32,7 +34,7 @@ func StandardizedDNA(sequence string) string {
var (
DefaultKmerSize uint = 16
DefaultMinimalKmerCount uint = 10
DefaultScoreThreshold float64 = 0.2
DefaultScoreThreshold float64 = 0.5
)

type MegamashMap struct {
Expand Down
17 changes: 9 additions & 8 deletions lib/align/megamash/megamash_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package megamash
package megamash_test

import (
"testing"

"github.com/koeng101/dnadesign/lib/align/megamash"
"github.com/koeng101/dnadesign/lib/bio/fasta"
)

Expand All @@ -12,7 +13,7 @@ func TestMegamash(t *testing.T) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, err := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, err := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
if err != nil {
t.Errorf("Failed to make NewMegamashMap: %s", err)
}
Expand All @@ -31,8 +32,8 @@ func BenchmarkMegamash(b *testing.B) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, _ := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, _ := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
for _, sample := range samples {
_ = m.Match(sample)
}
Expand All @@ -41,24 +42,24 @@ func BenchmarkMegamash(b *testing.B) {

func TestMatchesConversion(t *testing.T) {
// Initial slice of Match structs
matches := []Match{
matches := []megamash.Match{
{"match1", 90.1},
{"match2", 85.5},
}
// Convert matches to JSON string
jsonStr, err := MatchesToJSON(matches)
jsonStr, err := megamash.MatchesToJSON(matches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}

// Convert JSON string back to slice of Match structs
convertedMatches, err := JSONToMatches(jsonStr)
convertedMatches, err := megamash.JSONToMatches(jsonStr)
if err != nil {
t.Fatalf("JSONToMatches failed with error: %v", err)
}

// Convert the convertedMatches back to JSON to compare strings
convertedJSONStr, err := MatchesToJSON(convertedMatches)
convertedJSONStr, err := megamash.MatchesToJSON(convertedMatches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}
Expand Down
35 changes: 26 additions & 9 deletions lib/bio/fastq/fastq.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"errors"
"fmt"
"io"
"sort"
"strings"
)

Expand All @@ -40,6 +41,17 @@ type Read struct {
Quality string `json:"quality"`
}

// DeepCopy deep copies a read. Used for when you want to modify optionals then
// pipe elsewhere.
func (read *Read) DeepCopy() Read {
newRead := Read{Identifier: read.Identifier, Sequence: read.Sequence, Quality: read.Quality}
newRead.Optionals = make(map[string]string)
for key, value := range read.Optionals {
newRead.Optionals[key] = value
}
return newRead
}

// Header is a blank struct, needed for compatibility with bio parsers. It contains nothing.
type Header struct{}

Expand Down Expand Up @@ -161,7 +173,11 @@ func (parser *Parser) Next() (Read, error) {
if len(line) <= 1 { // newline delimiter - actually checking for empty line
return Read{}, fmt.Errorf("empty quality sequence for %q, got to line %d: %w", seqIdentifier, parser.line, err)
}
quality = string(line[:len(line)-1])
if parser.atEOF {
quality = string(line)
} else {
quality = string(line[:len(line)-1])
}

// Parsing ended. Check for inconsistencies.
if lookingForIdentifier {
Expand All @@ -179,12 +195,6 @@ func (parser *Parser) Next() (Read, error) {
return fastq, nil
}

// Reset discards all data in buffer and resets state.
func (parser *Parser) Reset(r io.Reader) {
parser.reader.Reset(r)
parser.line = 0
}

/******************************************************************************

Start of Write functions
Expand All @@ -200,8 +210,15 @@ func (read *Read) WriteTo(w io.Writer) (int64, error) {
if err != nil {
return writtenBytes, err
}
for key, val := range read.Optionals {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, val)
keys := make([]string, len(read.Optionals))
i := 0
for key := range read.Optionals {
keys[i] = key
i++
}
sort.Strings(keys)
for _, key := range keys {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, read.Optionals[key])
writtenBytes += int64(newWrittenBytes)
if err != nil {
return writtenBytes, err
Expand Down
1 change: 1 addition & 0 deletions lib/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.22.0

require (
github.com/google/go-cmp v0.6.0
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117
golang.org/x/sync v0.5.0
)
2 changes: 2 additions & 0 deletions lib/go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 h1:MLWgADbigSsAmDP3yG93ESlN0Ek9QLtH5uHigmWVXwg=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117/go.mod h1:nb80z/jm5HMCxfNZ50cBJa5TffkXxpY9okvqnBj8RrM=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
Loading
Loading