Skip to content

Commit

Permalink
various fixes (#60)
Browse files Browse the repository at this point in the history
PR includes bug fix where fastq would remove the last character of the last read's quality when at EOF.

This PR mostly just improved the typing for external functions and added a little bit to the sequencing package. The original PR #60 contained indexing for fastq files, but this should be added when it is actually needed. The implementation, however, did work.
  • Loading branch information
Koeng101 authored Feb 28, 2024
1 parent f4998ef commit 025f71d
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 179 deletions.
27 changes: 0 additions & 27 deletions .github/workflows/stale.yml

This file was deleted.

3 changes: 1 addition & 2 deletions external/minimap2/minimap2.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) e
}

// Minimap2Channeled uses channels rather than io.Reader and io.Writers.
func Minimap2Channeled(fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
ctx := context.Background()
func Minimap2Channeled(ctx context.Context, fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing fastq reads and reading them as an io.Reader
Expand Down
45 changes: 40 additions & 5 deletions external/samtools/samtools.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os/exec"
"syscall"

"github.com/koeng101/dnadesign/lib/bio/sam"
"golang.org/x/sync/errgroup"
)

Expand All @@ -22,7 +23,7 @@ import (
// The first samtools view removes unmapped sequences, the sort sorts the
// sequences for piping into pileup, and the final command builds the pileup
// file.
func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
func Pileup(ctx context.Context, templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
/*
Due to how os.exec works in Golang, we can't directly have pipes as if
the whole thing was a script. However, we can attach pipes to each
Expand All @@ -49,7 +50,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
}
tmpFile.Close() // Close the file as it's no longer needed

g, ctx := errgroup.WithContext(context.Background())
g, ctx := errgroup.WithContext(ctx)

// Setup pipe connections between commands
viewSortReader, viewSortWriter := io.Pipe()
Expand All @@ -73,7 +74,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
viewCmd.Process.Signal(syscall.SIGTERM)
_ = viewCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return viewCmd.Wait()
Expand All @@ -93,7 +94,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
sortCmd.Process.Signal(syscall.SIGTERM)
_ = sortCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return sortCmd.Wait()
Expand All @@ -111,7 +112,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
mpileupCmd.Process.Signal(syscall.SIGTERM)
_ = mpileupCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return mpileupCmd.Wait()
Expand All @@ -125,3 +126,37 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

return nil
}

// PileupChanneled processes SAM alignments from a channel and sends pileup lines to another channel.
func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-chan sam.Alignment, w io.Writer) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing SAM alignments and reading them as an io.Reader
samPr, samPw := io.Pipe()

// Goroutine to consume SAM alignments and write them to the PipeWriter
g.Go(func() error {
defer samPw.Close()
for alignment := range samChan {
// Assuming the sam.Alignment type has a WriteTo method or similar to serialize it to the writer
_, err := alignment.WriteTo(samPw)
if err != nil {
return err // return error to be handled by errgroup
}
}
return nil
})

// Run Pileup function in a goroutine
g.Go(func() error {
return Pileup(ctx, templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
})

// Wait for all goroutines in the group to finish
if err := g.Wait(); err != nil {
return err // This will return the first non-nil error from the group of goroutines
}

// At this point, all goroutines have finished successfully
return nil
}
4 changes: 3 additions & 1 deletion external/samtools/samtools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package samtools_test

import (
"bytes"
"context"
"os"
"testing"

Expand All @@ -28,7 +29,8 @@ func TestPileup(t *testing.T) {
var buf bytes.Buffer

// Execute the pileup function
err = samtools.Pileup(templateFile, samFile, &buf)
ctx := context.Background()
err = samtools.Pileup(ctx, templateFile, samFile, &buf)
if err != nil {
t.Errorf("Pileup returned error: %s", err)
}
Expand Down
6 changes: 3 additions & 3 deletions go.work
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
go 1.22.0

use (
./lib
./external
)
./external
./lib
)
4 changes: 3 additions & 1 deletion lib/align/megamash/megamash.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package megamash
import (
"encoding/json"
"fmt"
"strings"

"github.com/koeng101/dnadesign/lib/bio/fasta"
"github.com/koeng101/dnadesign/lib/transform"
Expand All @@ -19,6 +20,7 @@ import (
// StandardizedDNA returns the alphabetically lesser strand of a double
// stranded DNA molecule.
func StandardizedDNA(sequence string) string {
sequence = strings.ToUpper(sequence)
var deterministicSequence string
reverseComplement := transform.ReverseComplement(sequence)
if sequence > reverseComplement {
Expand All @@ -32,7 +34,7 @@ func StandardizedDNA(sequence string) string {
var (
DefaultKmerSize uint = 16
DefaultMinimalKmerCount uint = 10
DefaultScoreThreshold float64 = 0.2
DefaultScoreThreshold float64 = 0.5
)

type MegamashMap struct {
Expand Down
17 changes: 9 additions & 8 deletions lib/align/megamash/megamash_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package megamash
package megamash_test

import (
"testing"

"github.com/koeng101/dnadesign/lib/align/megamash"
"github.com/koeng101/dnadesign/lib/bio/fasta"
)

Expand All @@ -12,7 +13,7 @@ func TestMegamash(t *testing.T) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, err := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, err := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
if err != nil {
t.Errorf("Failed to make NewMegamashMap: %s", err)
}
Expand All @@ -31,8 +32,8 @@ func BenchmarkMegamash(b *testing.B) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, _ := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, _ := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
for _, sample := range samples {
_ = m.Match(sample)
}
Expand All @@ -41,24 +42,24 @@ func BenchmarkMegamash(b *testing.B) {

func TestMatchesConversion(t *testing.T) {
// Initial slice of Match structs
matches := []Match{
matches := []megamash.Match{
{"match1", 90.1},
{"match2", 85.5},
}
// Convert matches to JSON string
jsonStr, err := MatchesToJSON(matches)
jsonStr, err := megamash.MatchesToJSON(matches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}

// Convert JSON string back to slice of Match structs
convertedMatches, err := JSONToMatches(jsonStr)
convertedMatches, err := megamash.JSONToMatches(jsonStr)
if err != nil {
t.Fatalf("JSONToMatches failed with error: %v", err)
}

// Convert the convertedMatches back to JSON to compare strings
convertedJSONStr, err := MatchesToJSON(convertedMatches)
convertedJSONStr, err := megamash.MatchesToJSON(convertedMatches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}
Expand Down
35 changes: 26 additions & 9 deletions lib/bio/fastq/fastq.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"errors"
"fmt"
"io"
"sort"
"strings"
)

Expand All @@ -40,6 +41,17 @@ type Read struct {
Quality string `json:"quality"`
}

// DeepCopy deep copies a read. Used for when you want to modify optionals then
// pipe elsewhere.
func (read *Read) DeepCopy() Read {
newRead := Read{Identifier: read.Identifier, Sequence: read.Sequence, Quality: read.Quality}
newRead.Optionals = make(map[string]string)
for key, value := range read.Optionals {
newRead.Optionals[key] = value
}
return newRead
}

// Header is a blank struct, needed for compatibility with bio parsers. It contains nothing.
type Header struct{}

Expand Down Expand Up @@ -161,7 +173,11 @@ func (parser *Parser) Next() (Read, error) {
if len(line) <= 1 { // newline delimiter - actually checking for empty line
return Read{}, fmt.Errorf("empty quality sequence for %q, got to line %d: %w", seqIdentifier, parser.line, err)
}
quality = string(line[:len(line)-1])
if parser.atEOF {
quality = string(line)
} else {
quality = string(line[:len(line)-1])
}

// Parsing ended. Check for inconsistencies.
if lookingForIdentifier {
Expand All @@ -179,12 +195,6 @@ func (parser *Parser) Next() (Read, error) {
return fastq, nil
}

// Reset discards all data in buffer and resets state.
func (parser *Parser) Reset(r io.Reader) {
parser.reader.Reset(r)
parser.line = 0
}

/******************************************************************************
Start of Write functions
Expand All @@ -200,8 +210,15 @@ func (read *Read) WriteTo(w io.Writer) (int64, error) {
if err != nil {
return writtenBytes, err
}
for key, val := range read.Optionals {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, val)
keys := make([]string, len(read.Optionals))
i := 0
for key := range read.Optionals {
keys[i] = key
i++
}
sort.Strings(keys)
for _, key := range keys {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, read.Optionals[key])
writtenBytes += int64(newWrittenBytes)
if err != nil {
return writtenBytes, err
Expand Down
1 change: 1 addition & 0 deletions lib/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.22.0

require (
github.com/google/go-cmp v0.6.0
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117
golang.org/x/sync v0.5.0
)
2 changes: 2 additions & 0 deletions lib/go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 h1:MLWgADbigSsAmDP3yG93ESlN0Ek9QLtH5uHigmWVXwg=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117/go.mod h1:nb80z/jm5HMCxfNZ50cBJa5TffkXxpY9okvqnBj8RrM=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
Loading

0 comments on commit 025f71d

Please sign in to comment.