From e90080b83ab6c8104a41bc1ff5febbd855f7a7af Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 00:43:26 -0800 Subject: [PATCH 01/16] init --- lib/bio/bio.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lib/bio/bio.go b/lib/bio/bio.go index bbe6369..99d13dc 100644 --- a/lib/bio/bio.go +++ b/lib/bio/bio.go @@ -325,3 +325,34 @@ func FilterData[Data DataTypes](ctx context.Context, input <-chan Data, output c } } } + +/* + +We have FromIndex parsers for data types that need it: +- genbank +- fasta +- fastq +- slow5 +- sam + +We do not have them for data types that do not need it: +- pileup.Line [always small] +- uniprot.Entry [file type too complex] +*/ + +// Indexable is an interface for DataTypes to satisfy if they are indexable. +type Indexable interface { + Identifier() string +} + +func GenbankFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (genbank.Genbank, error) { + return genbank.Genbank{}, nil +} + +func FastaFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fasta.Record, error) { + return fasta.Record{}, nil +} + +func FastqFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fastq.Read, error) { + return fastq.Read{}, nil +} From d1e8f39af57494cf4bb1459de0101ac48d7a178b Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 13:29:09 -0800 Subject: [PATCH 02/16] add some better sequencing utils --- external/samtools/samtools.go | 35 +++++ lib/bio/fastq/fastq.go | 11 ++ lib/sequencing/example_test.go | 228 +++++++++++++++------------------ lib/sequencing/sequencing.go | 54 ++++++++ 4 files changed, 205 insertions(+), 123 deletions(-) diff --git a/external/samtools/samtools.go b/external/samtools/samtools.go index 8c4b1d2..da977e6 100644 --- a/external/samtools/samtools.go +++ b/external/samtools/samtools.go @@ -10,6 +10,7 @@ import ( "os/exec" "syscall" + "github.com/koeng101/dnadesign/lib/bio/sam" "golang.org/x/sync/errgroup" ) @@ -125,3 +126,37 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro return nil } + +// PileupChanneled processes SAM alignments from a channel and sends pileup lines to another channel. +func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-chan sam.Alignment, w io.Writer) error { + g, ctx := errgroup.WithContext(ctx) + + // Create a pipe for writing SAM alignments and reading them as an io.Reader + samPr, samPw := io.Pipe() + + // Goroutine to consume SAM alignments and write them to the PipeWriter + g.Go(func() error { + defer samPw.Close() + for alignment := range samChan { + // Assuming the sam.Alignment type has a WriteTo method or similar to serialize it to the writer + _, err := alignment.WriteTo(samPw) + if err != nil { + return err // return error to be handled by errgroup + } + } + return nil + }) + + // Run Pileup function in a goroutine + g.Go(func() error { + return Pileup(templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw + }) + + // Wait for all goroutines in the group to finish + if err := g.Wait(); err != nil { + return err // This will return the first non-nil error from the group of goroutines + } + + // At this point, all goroutines have finished successfully + return nil +} diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go index 15abd7d..78aca1a 100644 --- a/lib/bio/fastq/fastq.go +++ b/lib/bio/fastq/fastq.go @@ -40,6 +40,17 @@ type Read struct { Quality string `json:"quality"` } +// DeepCopy deep copies a read. Used for when you want to modify optionals then +// pipe elsewhere. +func (r *Read) DeepCopy() Read { + newRead := Read{Identifier: r.Identifier, Sequence: r.Sequence, Quality: r.Quality} + newRead.Optionals = make(map[string]string) + for key, value := range r.Optionals { + newRead.Optionals[key] = value + } + return newRead +} + // Header is a blank struct, needed for compatibility with bio parsers. It contains nothing. type Header struct{} diff --git a/lib/sequencing/example_test.go b/lib/sequencing/example_test.go index b53a794..7330151 100644 --- a/lib/sequencing/example_test.go +++ b/lib/sequencing/example_test.go @@ -1,125 +1,107 @@ package sequencing_test -import ( - "bytes" - "context" - "fmt" - "log" - "os" - "os/exec" - - "github.com/koeng101/dnadesign/external/minimap2" - "github.com/koeng101/dnadesign/lib/bio" - "github.com/koeng101/dnadesign/lib/bio/fasta" - "github.com/koeng101/dnadesign/lib/bio/fastq" - "github.com/koeng101/dnadesign/lib/bio/sam" - "github.com/koeng101/dnadesign/lib/primers/pcr" - "github.com/koeng101/dnadesign/lib/transform" - "golang.org/x/sync/errgroup" -) - -func Example_ampliconAlignment() { - // This is currently a work-in-progress. Sequencing utilities are under - // development right now. - // - // - // Only run function if minimap2 is available - _, err := exec.LookPath("minimap2") - if err != nil { - fmt.Println("oligo2") - return - } - // First, let's define the type we are looking for: amplicons in a pool. - type Amplicon struct { - Identifier string - TemplateSequence string - ForwardPrimer string - ReversePrimer string - } - - // Next, let's define data we'll be working on. In particular, the - // templates and fastq files. - - /* - Data processing steps: - - 1. Simulate PCRs of amplicons - 2. Sort for the right barcodes - 3. Trim fastq reads - 4. Minimap2 fastq reads to amplicons - 5. Filter for primary alignments - */ - var amplicons []Amplicon - var templates []fasta.Record - pcrTm := 50.0 - - forward := "CCGTGCGACAAGATTTCAAG" - reverse := transform.ReverseComplement("CGGATCGAACTTAGGTAGCC") - oligo1 := Amplicon{Identifier: "oligo1", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCCT"} - oligo2 := Amplicon{Identifier: "oligo2", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCACTAGTCATAAT"} - oligo3 := Amplicon{Identifier: "oligo3", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"} - amplicons = []Amplicon{oligo1, oligo2, oligo3} - - // Simulate PCRs - for _, amplicon := range amplicons { - fragments, _ := pcr.Simulate([]string{amplicon.TemplateSequence}, pcrTm, false, []string{amplicon.ForwardPrimer, amplicon.ReversePrimer}) - if len(fragments) != 1 { - log.Fatalf("Should only get 1 fragment from PCR!") - } - // In case your template will have multiple fragments - for _, fragment := range fragments { - // Make sure to reset identifier if you have more than 1 fragment. - templates = append(templates, fasta.Record{Identifier: amplicon.Identifier, Sequence: fragment}) - } - } - var buf bytes.Buffer - for _, template := range templates { - _, _ = template.WriteTo(&buf) - } - - // Trim fastq reads. All the following processes (trimming, minimap2, - // filtering) are all done concurrently. - - // Setup barcodes and fastq files - barcode := "barcode06" - r, _ := os.Open("data/reads.fastq") - parser := bio.NewFastqParser(r) - - // Setup errorGroups and channels - ctx := context.Background() - errorGroup, ctx := errgroup.WithContext(ctx) - - fastqReads := make(chan fastq.Read) - fastqBarcoded := make(chan fastq.Read) - samReads := make(chan sam.Alignment) - samPrimary := make(chan sam.Alignment) - - // Read fastqs into channel - errorGroup.Go(func() error { - return parser.ParseToChannel(ctx, fastqReads, false) - }) - - // Filter the right barcode fastqs from channel - errorGroup.Go(func() error { - return bio.FilterData(ctx, fastqReads, fastqBarcoded, func(data fastq.Read) bool { return data.Optionals["barcode"] == barcode }) - }) - - // Run minimap - errorGroup.Go(func() error { - return minimap2.Minimap2Channeled(&buf, fastqBarcoded, samReads) - }) - - // Sort out primary alignments - errorGroup.Go(func() error { - return bio.FilterData(ctx, samReads, samPrimary, sam.Primary) - }) - - // Read all them alignments out into memory - var outputAlignments []sam.Alignment - for alignment := range samPrimary { - outputAlignments = append(outputAlignments, alignment) - } - - fmt.Println(outputAlignments[0].RNAME) - // Output: oligo2 -} +//func Example_ampliconAlignment() { +// // This is currently a work-in-progress. Sequencing utilities are under +// // development right now. +// // +// // +// // Only run function if minimap2 is available +// _, err := exec.LookPath("minimap2") +// if err != nil { +// fmt.Println("oligo2") +// return +// } +// // First, let's define the type we are looking for: amplicons in a pool. +// type Amplicon struct { +// Identifier string +// TemplateSequence string +// ForwardPrimer string +// ReversePrimer string +// } +// +// // Next, let's define data we'll be working on. In particular, the +// // templates and fastq files. +// +// /* +// Data processing steps: +// +// 1. Simulate PCRs of amplicons +// 2. Sort for the right barcodes +// 3. Trim fastq reads +// 4. Minimap2 fastq reads to amplicons +// 5. Filter for primary alignments +// */ +// var amplicons []Amplicon +// var templates []fasta.Record +// pcrTm := 50.0 +// +// forward := "CCGTGCGACAAGATTTCAAG" +// reverse := transform.ReverseComplement("CGGATCGAACTTAGGTAGCC") +// oligo1 := Amplicon{Identifier: "oligo1", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCCT"} +// oligo2 := Amplicon{Identifier: "oligo2", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCACTAGTCATAAT"} +// oligo3 := Amplicon{Identifier: "oligo3", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"} +// amplicons = []Amplicon{oligo1, oligo2, oligo3} +// +// // Simulate PCRs +// for _, amplicon := range amplicons { +// fragments, _ := pcr.Simulate([]string{amplicon.TemplateSequence}, pcrTm, false, []string{amplicon.ForwardPrimer, amplicon.ReversePrimer}) +// if len(fragments) != 1 { +// log.Fatalf("Should only get 1 fragment from PCR!") +// } +// // In case your template will have multiple fragments +// for _, fragment := range fragments { +// // Make sure to reset identifier if you have more than 1 fragment. +// templates = append(templates, fasta.Record{Identifier: amplicon.Identifier, Sequence: fragment}) +// } +// } +// var buf bytes.Buffer +// for _, template := range templates { +// _, _ = template.WriteTo(&buf) +// } +// +// // Trim fastq reads. All the following processes (trimming, minimap2, +// // filtering) are all done concurrently. +// +// // Setup barcodes and fastq files +// barcode := "barcode06" +// r, _ := os.Open("data/reads.fastq") +// parser := bio.NewFastqParser(r) +// +// // Setup errorGroups and channels +// ctx := context.Background() +// errorGroup, ctx := errgroup.WithContext(ctx) +// +// fastqReads := make(chan fastq.Read) +// fastqBarcoded := make(chan fastq.Read) +// samReads := make(chan sam.Alignment) +// samPrimary := make(chan sam.Alignment) +// +// // Read fastqs into channel +// errorGroup.Go(func() error { +// return parser.ParseToChannel(ctx, fastqReads, false) +// }) +// +// // Filter the right barcode fastqs from channel +// errorGroup.Go(func() error { +// return bio.FilterData(ctx, fastqReads, fastqBarcoded, func(data fastq.Read) bool { return data.Optionals["barcode"] == barcode }) +// }) +// +// // Run minimap +// errorGroup.Go(func() error { +// return minimap2.Minimap2Channeled(&buf, fastqBarcoded, samReads) +// }) +// +// // Sort out primary alignments +// errorGroup.Go(func() error { +// return bio.FilterData(ctx, samReads, samPrimary, sam.Primary) +// }) +// +// // Read all them alignments out into memory +// var outputAlignments []sam.Alignment +// for alignment := range samPrimary { +// outputAlignments = append(outputAlignments, alignment) +// } +// +// fmt.Println(outputAlignments[0].RNAME) +// // Output: oligo2 +//} diff --git a/lib/sequencing/sequencing.go b/lib/sequencing/sequencing.go index fb049e5..f600175 100644 --- a/lib/sequencing/sequencing.go +++ b/lib/sequencing/sequencing.go @@ -2,3 +2,57 @@ Package sequencing contains functions associated with handling sequencing data. */ package sequencing + +import ( + "context" + + "github.com/koeng101/dnadesign/lib/align/megamash" + "github.com/koeng101/dnadesign/lib/bio/fastq" + "github.com/koeng101/dnadesign/lib/sequencing/barcoding" +) + +func MegamashFastq(ctx context.Context, megamashMap megamash.MegamashMap, input <-chan fastq.Read, output chan<- fastq.Read) error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case data, ok := <-input: + if !ok { + return nil + } + matches := megamashMap.Match(data.Sequence) + jsonStr, _ := megamash.MatchesToJSON(matches) + readCopy := data.DeepCopy() + readCopy.Optionals["megamash"] = jsonStr + select { + case output <- readCopy: + case <-ctx.Done(): + return ctx.Err() + } + } + } +} + +func DualBarcodeFastq(ctx context.Context, primerSet barcoding.DualBarcodePrimerSet, input <-chan fastq.Read, output chan<- fastq.Read) error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case data, ok := <-input: + if !ok { + return nil + } + well, err := barcoding.DualBarcodeSequence(data.Sequence, primerSet) + if err != nil { + return err + } + readCopy := data.DeepCopy() + readCopy.Optionals["dual_barcode"] = well + select { + case output <- readCopy: + case <-ctx.Done(): + return ctx.Err() + } + } + } +} From 8f279e7486c5785a6afa5ebb0c643a5ea25fae2c Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 15:06:25 -0800 Subject: [PATCH 03/16] add index and way to use index --- lib/bio/bio.go | 33 +++++++++++++++++++++++---------- lib/bio/example_test.go | 36 ++++++++++++++++++++++++++++++++++++ lib/bio/fastq/fastq.go | 12 +++++------- lib/go.mod | 2 ++ lib/go.sum | 2 ++ 5 files changed, 68 insertions(+), 17 deletions(-) diff --git a/lib/bio/bio.go b/lib/bio/bio.go index 99d13dc..fd8450c 100644 --- a/lib/bio/bio.go +++ b/lib/bio/bio.go @@ -11,8 +11,10 @@ package bio import ( "bufio" + "bytes" "context" "errors" + "fmt" "io" "math" @@ -345,14 +347,25 @@ type Indexable interface { Identifier() string } -func GenbankFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (genbank.Genbank, error) { - return genbank.Genbank{}, nil -} - -func FastaFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fasta.Record, error) { - return fasta.Record{}, nil -} - -func FastqFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fastq.Read, error) { - return fastq.Read{}, nil +func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.Read, error) { + dataBytes := make([]byte, length) + n, err := r.ReadAt(dataBytes, int64(startPosition)) + if err != nil { + if !errors.Is(err, io.EOF) { + return fastq.Read{}, err + } + dataBytes = dataBytes[:len(dataBytes)-1] + } + if int(n) != len(dataBytes) { + return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n) + } + parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n) + fastqRead, err := parser.Next() + if err != nil { + if errors.Is(err, io.EOF) { + err = nil // EOF not treated as parsing error. + } + return fastqRead, err + } + return fastqRead, nil } diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go index ce5955f..ff63e5a 100644 --- a/lib/bio/example_test.go +++ b/lib/bio/example_test.go @@ -8,7 +8,9 @@ import ( "os" "strings" + "github.com/google/uuid" "github.com/koeng101/dnadesign/lib/bio" + "github.com/koeng101/dnadesign/lib/bio/ddidx" "github.com/koeng101/dnadesign/lib/bio/fasta" "github.com/koeng101/dnadesign/lib/bio/fastq" "github.com/koeng101/dnadesign/lib/bio/sam" @@ -478,3 +480,37 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF fmt.Println(reads[0].Identifier) // Output: af86ed57-1cfe-486f-8205-b2c8d1186454 } + +func ExampleFastqFromIndex() { + file := strings.NewReader(`@289a197e-4c05-4143-80e6-488e23044378 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=34575 ch=111 start_time=2023-12-29T16:06:13.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode06 barcode_alias=barcode06 parent_read_id=289a197e-4c05-4143-80e6-488e23044378 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0 +TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG ++ +$%%&%%$$%&'+)**,-+)))+866788711112=>A?@@@BDB@>?746@?>A@D2@970,-+..*++++;662/.-.+,++,//+167>A@A@@B=<887-,'&&%%&''((5555644578::<==B?ABCIJA>>>>@DCAA99::>=<<<=67777+***)//+,,+)&&&+--.02:>442000/1225:=D?=<<=7;866/..../AAA226545+&%%$$ +@af86ed57-1cfe-486f-8205-b2c8d1186454 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=2233 ch=123 start_time=2023-12-29T10:04:32.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode07 barcode_alias=barcode07 parent_read_id=af86ed57-1cfe-486f-8205-b2c8d1186454 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0 +TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA ++ +$%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*`) + + parser := bio.NewFastqParser(file) + reads, _ := parser.Parse() + + // Create a ddidx + var indexes []ddidx.Index + + // Write the files to an io.Writer. + // All headers and all records implement io.WriterTo interfaces. + var buffer bytes.Buffer + var startPosition uint64 + for _, read := range reads { + uuidBytes, _ := uuid.Parse(read.Identifier) + length, _ := read.WriteTo(&buffer) + indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)}) + startPosition = startPosition + uint64(length) + } + + // Now, read a fastq from an index + read, _ := bio.FastqFromIndex(file, indexes[1].StartPosition, indexes[1].Length) + + fmt.Println(read.Quality) + // Output: $%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,* +} diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go index 15abd7d..806d074 100644 --- a/lib/bio/fastq/fastq.go +++ b/lib/bio/fastq/fastq.go @@ -161,7 +161,11 @@ func (parser *Parser) Next() (Read, error) { if len(line) <= 1 { // newline delimiter - actually checking for empty line return Read{}, fmt.Errorf("empty quality sequence for %q, got to line %d: %w", seqIdentifier, parser.line, err) } - quality = string(line[:len(line)-1]) + if parser.atEOF { + quality = string(line) + } else { + quality = string(line[:len(line)-1]) + } // Parsing ended. Check for inconsistencies. if lookingForIdentifier { @@ -179,12 +183,6 @@ func (parser *Parser) Next() (Read, error) { return fastq, nil } -// Reset discards all data in buffer and resets state. -func (parser *Parser) Reset(r io.Reader) { - parser.reader.Reset(r) - parser.line = 0 -} - /****************************************************************************** Start of Write functions diff --git a/lib/go.mod b/lib/go.mod index a062458..28b78fa 100644 --- a/lib/go.mod +++ b/lib/go.mod @@ -6,3 +6,5 @@ require ( github.com/google/go-cmp v0.6.0 golang.org/x/sync v0.5.0 ) + +require github.com/google/uuid v1.6.0 // indirect diff --git a/lib/go.sum b/lib/go.sum index e56dc15..f2f0d06 100644 --- a/lib/go.sum +++ b/lib/go.sum @@ -1,4 +1,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= From 4208874835d4915d6cb3737d2073f46cfa3942d7 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 15:11:47 -0800 Subject: [PATCH 04/16] add ddidx --- lib/bio/ddidx/ddidx.go | 83 +++++++++++++++++++++++++++++++++++++ lib/bio/ddidx/ddidx_test.go | 44 ++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 lib/bio/ddidx/ddidx.go create mode 100644 lib/bio/ddidx/ddidx_test.go diff --git a/lib/bio/ddidx/ddidx.go b/lib/bio/ddidx/ddidx.go new file mode 100644 index 0000000..1accfcc --- /dev/null +++ b/lib/bio/ddidx/ddidx.go @@ -0,0 +1,83 @@ +/* +Package ddidx contains information about the dnadesign index format. +*/ +package ddidx + +import ( + "encoding/binary" + "errors" + "io" +) + +// Index is a 32 byte index for individual objects. +type Index struct { + Identifier [16]byte + StartPosition uint64 + Length uint64 +} + +// WriteTo writes the binary representation of the Index to the given writer. +// It returns the number of bytes written and any error encountered. +func (i *Index) WriteTo(w io.Writer) (int64, error) { + // The total bytes written + var totalBytes int64 + + // Write Identifier + n, err := w.Write(i.Identifier[:]) + totalBytes += int64(n) + if err != nil { + return totalBytes, err + } + + // Create a buffer to write the uint64 values + buf := make([]byte, 8) + + // Write StartPosition + binary.BigEndian.PutUint64(buf, i.StartPosition) + n, err = w.Write(buf) + totalBytes += int64(n) + if err != nil { + return totalBytes, err + } + + // Write Length + binary.BigEndian.PutUint64(buf, i.Length) + n, err = w.Write(buf) + totalBytes += int64(n) + if err != nil { + return totalBytes, err + } + + return totalBytes, nil +} + +// ReadIndexes reads and returns a list of Index structs from the given reader. +func ReadIndexes(r io.Reader) ([]Index, error) { + var indexes []Index + + for { + var idx Index + + // Read Identifier + if _, err := io.ReadFull(r, idx.Identifier[:]); err != nil { + if errors.Is(err, io.EOF) { + break // End of file, stop reading + } + return indexes, err + } + + // Read StartPosition + if err := binary.Read(r, binary.BigEndian, &idx.StartPosition); err != nil { + return indexes, err + } + + // Read Length + if err := binary.Read(r, binary.BigEndian, &idx.Length); err != nil { + return indexes, err + } + + indexes = append(indexes, idx) + } + + return indexes, nil +} diff --git a/lib/bio/ddidx/ddidx_test.go b/lib/bio/ddidx/ddidx_test.go new file mode 100644 index 0000000..2ff82f0 --- /dev/null +++ b/lib/bio/ddidx/ddidx_test.go @@ -0,0 +1,44 @@ +package ddidx + +import ( + "bytes" + "reflect" + "testing" +) + +func TestIndexWriteToAndReadIndexes(t *testing.T) { + // Prepare a slice of Index instances for testing + indexes := []Index{ + { + Identifier: [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + StartPosition: 100, + Length: 200, + }, + { + Identifier: [16]byte{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, + StartPosition: 300, + Length: 400, + }, + } + + // Create a buffer to write the indexes to + var buf bytes.Buffer + + // Write each index to the buffer + for _, idx := range indexes { + if _, err := idx.WriteTo(&buf); err != nil { + t.Fatalf("WriteTo failed: %v", err) + } + } + + // Now read the indexes back from the buffer + readIndexes, err := ReadIndexes(&buf) + if err != nil { + t.Fatalf("ReadIndexes failed: %v", err) + } + + // Compare the original indexes with the ones read back + if !reflect.DeepEqual(indexes, readIndexes) { + t.Errorf("Original indexes %+v do not match read indexes %+v", indexes, readIndexes) + } +} From 4e401d3d63a7535c860582caa204d7c6ac4c9cbb Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 18:50:14 -0800 Subject: [PATCH 05/16] add example test --- lib/bio/example_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go index ff63e5a..4285614 100644 --- a/lib/bio/example_test.go +++ b/lib/bio/example_test.go @@ -8,7 +8,6 @@ import ( "os" "strings" - "github.com/google/uuid" "github.com/koeng101/dnadesign/lib/bio" "github.com/koeng101/dnadesign/lib/bio/ddidx" "github.com/koeng101/dnadesign/lib/bio/fasta" @@ -502,7 +501,9 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF var buffer bytes.Buffer var startPosition uint64 for _, read := range reads { - uuidBytes, _ := uuid.Parse(read.Identifier) + // Normally, you would want to actually parse uuidBytes, but we don't care here. + var uuidBytes [16]byte + copy(uuidBytes[:], read.Identifier[:16]) length, _ := read.WriteTo(&buffer) indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)}) startPosition = startPosition + uint64(length) From 0da01d1c5eea25bf076861809b25bb3ea6e31f9f Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Tue, 13 Feb 2024 20:09:53 -0800 Subject: [PATCH 06/16] Add cli --- cli/cmd/fastz.go | 224 +++++++++++++++++++++++++++++++++ cli/cmd/root.go | 51 ++++++++ cli/go.mod | 14 +++ cli/go.sum | 20 +++ cli/main.go | 11 ++ go.work | 7 +- lib/align/megamash/megamash.go | 2 +- lib/go.mod | 3 +- lib/go.sum | 4 +- 9 files changed, 328 insertions(+), 8 deletions(-) create mode 100644 cli/cmd/fastz.go create mode 100644 cli/cmd/root.go create mode 100644 cli/go.mod create mode 100644 cli/go.sum create mode 100644 cli/main.go diff --git a/cli/cmd/fastz.go b/cli/cmd/fastz.go new file mode 100644 index 0000000..80a57d6 --- /dev/null +++ b/cli/cmd/fastz.go @@ -0,0 +1,224 @@ +package cmd + +import ( + "context" + "encoding/csv" + "fmt" + "io" + "os" + "runtime" + + "github.com/google/uuid" + "github.com/klauspost/compress/zstd" + "github.com/koeng101/dnadesign/lib/align/megamash" + "github.com/koeng101/dnadesign/lib/bio" + "github.com/koeng101/dnadesign/lib/bio/ddidx" + "github.com/koeng101/dnadesign/lib/bio/fasta" + "github.com/koeng101/dnadesign/lib/bio/fastq" + "github.com/koeng101/dnadesign/lib/sequencing" + "github.com/koeng101/dnadesign/lib/sequencing/barcoding" + "github.com/spf13/cobra" + "gitlab.com/rackn/seekable-zstd" + "golang.org/x/sync/errgroup" +) + +// fastzCmd represents the fastz command +var fastzCmd = &cobra.Command{ + Use: "fastz", + Short: "Compresses FASTQ files using zstd with additional indexing", + Long: `fastz is a tool for compressing FASTQ files using zstd compression, while also generating a .ddidx index file. +The command requires a primer set file and a template map file to function properly. The output is a zstd compressed FASTQ file streamed to stdout, and a .ddidx index file is generated at the specified output location. + +This command also supports optional parameters for adjusting the k-mer size and threshold used in megamash, as well as a score parameter for filtering. + +Usage example: +cat input.fastq | ./dnadesign fastz --primerSet path/to/primerSet --templateMap path/to/templateMap --ddidxOutput path/to/output.ddidx --kmerSize 16 --threshold 10 --score 0.8 > output.fastq.zstd`, + Run: func(cmd *cobra.Command, args []string) { + // You can retrieve the flag values here and add your logic for processing the FASTQ file + primerSetCsvLocation, _ := cmd.Flags().GetString("primerSet") + templateMapLocation, _ := cmd.Flags().GetString("templateMap") + ddidxOutputLocation, _ := cmd.Flags().GetString("ddidxOutput") + kmerSize, _ := cmd.Flags().GetUint("kmerSize") + threshold, _ := cmd.Flags().GetUint("threshold") + score, _ := cmd.Flags().GetFloat64("score") + cpus, _ := cmd.Flags().GetInt("cpus") + + // Open the primerSet CSV file + primerSetCsv, err := os.Open(primerSetCsvLocation) + if err != nil { + // Handle error + fmt.Println("Error opening primer set CSV:", err) + return + } + defer primerSetCsv.Close() // Make sure to close the file when you're done + + // Open the templateMap file + templateMap, err := os.Open(templateMapLocation) + if err != nil { + // Handle error + fmt.Println("Error opening template map:", err) + return + } + defer templateMap.Close() // Make sure to close the file when you're done + + // Create/Open the ddidxOutput file for writing + // If you only need to write to it, use os.Create to create or truncate an existing file + ddidxOutput, err := os.Create(ddidxOutputLocation) + if err != nil { + // Handle error + fmt.Println("Error creating/opening ddidx output file:", err) + return + } + defer ddidxOutput.Close() // Make sure to close the file when you're done + + /* + Step 1: Parse initial data sets + */ + // Read primer set + primerSet, err := barcoding.ParseDualPrimerSet(primerSetCsv) + if err != nil { + fmt.Fprintf(os.Stderr, "Error parsing primerset: %v\n", err) + os.Exit(1) + } + + // Read template map + var templates []fasta.Record + reader := csv.NewReader(templateMap) + + for { + // Read each record from csv + record, err := reader.Read() + // Break the loop at the end of the file + if err == io.EOF { + break + } + // Handle any other error + if err != nil { + fmt.Fprintf(os.Stderr, "Error parsing templateMap: %v\n", err) + os.Exit(1) + } + + if len(record) == 2 { + templates = append(templates, fasta.Record{Identifier: record[0], Sequence: record[1]}) + } + } + + /* + Step 2: setup megamash + */ + m, err := megamash.NewMegamashMap(templates, kmerSize, threshold, score) + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating megamash: %v\n", err) + os.Exit(1) + } + + /* + Step 3: setup concurrent processing. + */ + parser := bio.NewFastqParser(os.Stdin) + ctx := context.Background() + errorGroup, ctx := errgroup.WithContext(ctx) + + fastqReads := make(chan fastq.Read) + fastqBarcoded := make(chan fastq.Read) + fastqBarcodedFiltered := make(chan fastq.Read) + fastqBarcodedFilteredMegamashed := make(chan fastq.Read) + + // Read fastqs into channel + errorGroup.Go(func() error { + return parser.ParseToChannel(ctx, fastqReads, false) + }) + // Barcoding can be an expensive operation + errorGroup.Go(func() error { + // We're going to start multiple workers within this errorGroup. This + // helps when doing computationally intensive operations on channels. + return bio.RunWorkers(ctx, cpus, fastqBarcoded, func(ctx context.Context) error { + return sequencing.DualBarcodeFastq(ctx, primerSet, fastqReads, fastqBarcoded) + }) + }) + // Filtering is a cheap operation, so we only have 1 worker doing it. + errorGroup.Go(func() error { + return bio.RunWorkers(ctx, 1, fastqBarcodedFiltered, func(ctx context.Context) error { + return bio.FilterData(ctx, fastqBarcoded, fastqBarcodedFiltered, func(data fastq.Read) bool { + _, ok := data.Optionals["dual_barcode"] + return ok + }) + }) + }) + // Megamash is very expensive, so we spawn many works to do it. + errorGroup.Go(func() error { + return bio.RunWorkers(ctx, cpus, fastqBarcodedFilteredMegamashed, func(ctx context.Context) error { + return sequencing.MegamashFastq(ctx, m, fastqBarcodedFiltered, fastqBarcodedFilteredMegamashed) + }) + }) + + /* + Step 4: Write to stdout + */ + // Setup seekable zstd + // Initialize the zstd encoder with desired settings + encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create zstd encoder: %v\n", err) + os.Exit(1) + } + defer encoder.Close() + + // Create a seekable zstd writer on the temp file + writer, err := seekable.NewWriter(os.Stdout, encoder) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create seekable zstd writer: %v\n", err) + os.Exit(1) + } + // Now write to stdout + var indexes []ddidx.Index + var startPosition uint64 + for read := range fastqBarcodedFilteredMegamashed { + writtenBytes, err := read.WriteTo(writer) + if err != nil { + fmt.Fprintf(os.Stderr, "Error writing to stdout: %v\n", err) + os.Exit(1) + } + identifierBytes, err := uuid.Parse(read.Identifier) + if err != nil { + fmt.Fprintf(os.Stderr, "Identifier cannot be written as 16byte uuid: %s . Got error: %v\n", read.Identifier, err) + os.Exit(1) + } + indexes = append(indexes, ddidx.Index{Identifier: identifierBytes, StartPosition: startPosition, Length: uint64(writtenBytes)}) + startPosition = startPosition + uint64(writtenBytes) + } + // Close the writer to flush the seek table + if err := writer.Close(); err != nil { + fmt.Fprintf(os.Stderr, "Failed to close seekable zstd writer: %v\n", err) + os.Exit(1) + } + // Now write ddidx file + for _, index := range indexes { + _, err := index.WriteTo(ddidxOutput) + if err != nil { + fmt.Fprintf(os.Stderr, "Error writing to ddidx: %v\n", err) + os.Exit(1) + } + } + return + }, +} + +func init() { + rootCmd.AddCommand(fastzCmd) + + // Defining flags for primerSet and templateMap files, and the output location for the ddidx file + fastzCmd.Flags().String("primerSet", "", "Path to the primer set file") + fastzCmd.Flags().String("templateMap", "", "Path to the template map file") + fastzCmd.Flags().String("ddidxOutput", "", "Output location for the .ddidx index file") + fastzCmd.Flags().Uint("kmerSize", 16, "K-mer size for megamash") + fastzCmd.Flags().Uint("threshold", 10, "Threshold for megamash") + fastzCmd.Flags().Float64("score", 0.8, "Score for filtering") + defaultCPUs := runtime.NumCPU() + fastzCmd.Flags().Int("cpus", defaultCPUs, "Number of CPUs to use") + + // Marking the flags as required + fastzCmd.MarkFlagRequired("primerSet") + fastzCmd.MarkFlagRequired("templateMap") + fastzCmd.MarkFlagRequired("ddidxOutput") +} diff --git a/cli/cmd/root.go b/cli/cmd/root.go new file mode 100644 index 0000000..7376031 --- /dev/null +++ b/cli/cmd/root.go @@ -0,0 +1,51 @@ +/* +Copyright © 2024 NAME HERE + +*/ +package cmd + +import ( + "os" + + "github.com/spf13/cobra" +) + + + +// rootCmd represents the base command when called without any subcommands +var rootCmd = &cobra.Command{ + Use: "cli", + Short: "A brief description of your application", + Long: `A longer description that spans multiple lines and likely contains +examples and usage of using your application. For example: + +Cobra is a CLI library for Go that empowers applications. +This application is a tool to generate the needed files +to quickly create a Cobra application.`, + // Uncomment the following line if your bare application + // has an action associated with it: + // Run: func(cmd *cobra.Command, args []string) { }, +} + +// Execute adds all child commands to the root command and sets flags appropriately. +// This is called by main.main(). It only needs to happen once to the rootCmd. +func Execute() { + err := rootCmd.Execute() + if err != nil { + os.Exit(1) + } +} + +func init() { + // Here you will define your flags and configuration settings. + // Cobra supports persistent flags, which, if defined here, + // will be global for your application. + + // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cli.yaml)") + + // Cobra also supports local flags, which will only run + // when this action is called directly. + rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") +} + + diff --git a/cli/go.mod b/cli/go.mod new file mode 100644 index 0000000..dc0a8df --- /dev/null +++ b/cli/go.mod @@ -0,0 +1,14 @@ +module github.com/koeng101/dnadesign/cli + +go 1.22.0 + +require ( + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/klauspost/compress v1.17.6 // indirect + github.com/spf13/cobra v1.8.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + gitlab.com/rackn/seekable-zstd v0.8.2 // indirect + gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 // indirect +) diff --git a/cli/go.sum b/cli/go.sum new file mode 100644 index 0000000..5f68dc6 --- /dev/null +++ b/cli/go.sum @@ -0,0 +1,20 @@ +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= +github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gitlab.com/rackn/seekable-zstd v0.8.2 h1:ARgDc1HRo/HnNR2NHsAoaxW2fE0vo0d+yI1+WvrIjF0= +gitlab.com/rackn/seekable-zstd v0.8.2/go.mod h1:9z8nf3qNXOi73VRm7KQgTmI3T0tz9YzDKKL7fzEBz9M= +gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 h1:lXoXk/e9YrtTyWzNZs1ak/ijpwZQDaJLEwKCjhp/dCw= +gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93/go.mod h1:pXhP0EyrEy0pGf2DW4vTKub/As/UiamLFaZ1Q9YaFTs= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/cli/main.go b/cli/main.go new file mode 100644 index 0000000..3ce1f74 --- /dev/null +++ b/cli/main.go @@ -0,0 +1,11 @@ +/* +Copyright © 2024 NAME HERE + +*/ +package main + +import "github.com/koeng101/dnadesign/cli/cmd" + +func main() { + cmd.Execute() +} diff --git a/go.work b/go.work index 2baeb4e..a99648e 100644 --- a/go.work +++ b/go.work @@ -1,6 +1,7 @@ go 1.22.0 use ( - ./lib - ./external - ) + ./cli + ./external + ./lib +) diff --git a/lib/align/megamash/megamash.go b/lib/align/megamash/megamash.go index 0893092..9c59fa4 100644 --- a/lib/align/megamash/megamash.go +++ b/lib/align/megamash/megamash.go @@ -32,7 +32,7 @@ func StandardizedDNA(sequence string) string { var ( DefaultKmerSize uint = 16 DefaultMinimalKmerCount uint = 10 - DefaultScoreThreshold float64 = 0.2 + DefaultScoreThreshold float64 = 0.5 ) type MegamashMap struct { diff --git a/lib/go.mod b/lib/go.mod index 28b78fa..15e101f 100644 --- a/lib/go.mod +++ b/lib/go.mod @@ -4,7 +4,6 @@ go 1.22.0 require ( github.com/google/go-cmp v0.6.0 + github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 golang.org/x/sync v0.5.0 ) - -require github.com/google/uuid v1.6.0 // indirect diff --git a/lib/go.sum b/lib/go.sum index f2f0d06..440d22d 100644 --- a/lib/go.sum +++ b/lib/go.sum @@ -1,6 +1,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 h1:MLWgADbigSsAmDP3yG93ESlN0Ek9QLtH5uHigmWVXwg= +github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117/go.mod h1:nb80z/jm5HMCxfNZ50cBJa5TffkXxpY9okvqnBj8RrM= golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= From 56e484d09400f96955924ba0f9966923e48e752d Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Mon, 19 Feb 2024 20:21:49 -0800 Subject: [PATCH 07/16] go work --- cli/cmd/fastz.go | 224 --------------------------------- cli/cmd/root.go | 51 -------- cli/go.mod | 14 --- cli/go.sum | 20 --- cli/main.go | 11 -- go.work | 1 - lib/align/megamash/megamash.go | 2 + lib/bio/fastq/fastq.go | 12 +- 8 files changed, 12 insertions(+), 323 deletions(-) delete mode 100644 cli/cmd/fastz.go delete mode 100644 cli/cmd/root.go delete mode 100644 cli/go.mod delete mode 100644 cli/go.sum delete mode 100644 cli/main.go diff --git a/cli/cmd/fastz.go b/cli/cmd/fastz.go deleted file mode 100644 index 80a57d6..0000000 --- a/cli/cmd/fastz.go +++ /dev/null @@ -1,224 +0,0 @@ -package cmd - -import ( - "context" - "encoding/csv" - "fmt" - "io" - "os" - "runtime" - - "github.com/google/uuid" - "github.com/klauspost/compress/zstd" - "github.com/koeng101/dnadesign/lib/align/megamash" - "github.com/koeng101/dnadesign/lib/bio" - "github.com/koeng101/dnadesign/lib/bio/ddidx" - "github.com/koeng101/dnadesign/lib/bio/fasta" - "github.com/koeng101/dnadesign/lib/bio/fastq" - "github.com/koeng101/dnadesign/lib/sequencing" - "github.com/koeng101/dnadesign/lib/sequencing/barcoding" - "github.com/spf13/cobra" - "gitlab.com/rackn/seekable-zstd" - "golang.org/x/sync/errgroup" -) - -// fastzCmd represents the fastz command -var fastzCmd = &cobra.Command{ - Use: "fastz", - Short: "Compresses FASTQ files using zstd with additional indexing", - Long: `fastz is a tool for compressing FASTQ files using zstd compression, while also generating a .ddidx index file. -The command requires a primer set file and a template map file to function properly. The output is a zstd compressed FASTQ file streamed to stdout, and a .ddidx index file is generated at the specified output location. - -This command also supports optional parameters for adjusting the k-mer size and threshold used in megamash, as well as a score parameter for filtering. - -Usage example: -cat input.fastq | ./dnadesign fastz --primerSet path/to/primerSet --templateMap path/to/templateMap --ddidxOutput path/to/output.ddidx --kmerSize 16 --threshold 10 --score 0.8 > output.fastq.zstd`, - Run: func(cmd *cobra.Command, args []string) { - // You can retrieve the flag values here and add your logic for processing the FASTQ file - primerSetCsvLocation, _ := cmd.Flags().GetString("primerSet") - templateMapLocation, _ := cmd.Flags().GetString("templateMap") - ddidxOutputLocation, _ := cmd.Flags().GetString("ddidxOutput") - kmerSize, _ := cmd.Flags().GetUint("kmerSize") - threshold, _ := cmd.Flags().GetUint("threshold") - score, _ := cmd.Flags().GetFloat64("score") - cpus, _ := cmd.Flags().GetInt("cpus") - - // Open the primerSet CSV file - primerSetCsv, err := os.Open(primerSetCsvLocation) - if err != nil { - // Handle error - fmt.Println("Error opening primer set CSV:", err) - return - } - defer primerSetCsv.Close() // Make sure to close the file when you're done - - // Open the templateMap file - templateMap, err := os.Open(templateMapLocation) - if err != nil { - // Handle error - fmt.Println("Error opening template map:", err) - return - } - defer templateMap.Close() // Make sure to close the file when you're done - - // Create/Open the ddidxOutput file for writing - // If you only need to write to it, use os.Create to create or truncate an existing file - ddidxOutput, err := os.Create(ddidxOutputLocation) - if err != nil { - // Handle error - fmt.Println("Error creating/opening ddidx output file:", err) - return - } - defer ddidxOutput.Close() // Make sure to close the file when you're done - - /* - Step 1: Parse initial data sets - */ - // Read primer set - primerSet, err := barcoding.ParseDualPrimerSet(primerSetCsv) - if err != nil { - fmt.Fprintf(os.Stderr, "Error parsing primerset: %v\n", err) - os.Exit(1) - } - - // Read template map - var templates []fasta.Record - reader := csv.NewReader(templateMap) - - for { - // Read each record from csv - record, err := reader.Read() - // Break the loop at the end of the file - if err == io.EOF { - break - } - // Handle any other error - if err != nil { - fmt.Fprintf(os.Stderr, "Error parsing templateMap: %v\n", err) - os.Exit(1) - } - - if len(record) == 2 { - templates = append(templates, fasta.Record{Identifier: record[0], Sequence: record[1]}) - } - } - - /* - Step 2: setup megamash - */ - m, err := megamash.NewMegamashMap(templates, kmerSize, threshold, score) - if err != nil { - fmt.Fprintf(os.Stderr, "Error creating megamash: %v\n", err) - os.Exit(1) - } - - /* - Step 3: setup concurrent processing. - */ - parser := bio.NewFastqParser(os.Stdin) - ctx := context.Background() - errorGroup, ctx := errgroup.WithContext(ctx) - - fastqReads := make(chan fastq.Read) - fastqBarcoded := make(chan fastq.Read) - fastqBarcodedFiltered := make(chan fastq.Read) - fastqBarcodedFilteredMegamashed := make(chan fastq.Read) - - // Read fastqs into channel - errorGroup.Go(func() error { - return parser.ParseToChannel(ctx, fastqReads, false) - }) - // Barcoding can be an expensive operation - errorGroup.Go(func() error { - // We're going to start multiple workers within this errorGroup. This - // helps when doing computationally intensive operations on channels. - return bio.RunWorkers(ctx, cpus, fastqBarcoded, func(ctx context.Context) error { - return sequencing.DualBarcodeFastq(ctx, primerSet, fastqReads, fastqBarcoded) - }) - }) - // Filtering is a cheap operation, so we only have 1 worker doing it. - errorGroup.Go(func() error { - return bio.RunWorkers(ctx, 1, fastqBarcodedFiltered, func(ctx context.Context) error { - return bio.FilterData(ctx, fastqBarcoded, fastqBarcodedFiltered, func(data fastq.Read) bool { - _, ok := data.Optionals["dual_barcode"] - return ok - }) - }) - }) - // Megamash is very expensive, so we spawn many works to do it. - errorGroup.Go(func() error { - return bio.RunWorkers(ctx, cpus, fastqBarcodedFilteredMegamashed, func(ctx context.Context) error { - return sequencing.MegamashFastq(ctx, m, fastqBarcodedFiltered, fastqBarcodedFilteredMegamashed) - }) - }) - - /* - Step 4: Write to stdout - */ - // Setup seekable zstd - // Initialize the zstd encoder with desired settings - encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) - if err != nil { - fmt.Fprintf(os.Stderr, "Failed to create zstd encoder: %v\n", err) - os.Exit(1) - } - defer encoder.Close() - - // Create a seekable zstd writer on the temp file - writer, err := seekable.NewWriter(os.Stdout, encoder) - if err != nil { - fmt.Fprintf(os.Stderr, "Failed to create seekable zstd writer: %v\n", err) - os.Exit(1) - } - // Now write to stdout - var indexes []ddidx.Index - var startPosition uint64 - for read := range fastqBarcodedFilteredMegamashed { - writtenBytes, err := read.WriteTo(writer) - if err != nil { - fmt.Fprintf(os.Stderr, "Error writing to stdout: %v\n", err) - os.Exit(1) - } - identifierBytes, err := uuid.Parse(read.Identifier) - if err != nil { - fmt.Fprintf(os.Stderr, "Identifier cannot be written as 16byte uuid: %s . Got error: %v\n", read.Identifier, err) - os.Exit(1) - } - indexes = append(indexes, ddidx.Index{Identifier: identifierBytes, StartPosition: startPosition, Length: uint64(writtenBytes)}) - startPosition = startPosition + uint64(writtenBytes) - } - // Close the writer to flush the seek table - if err := writer.Close(); err != nil { - fmt.Fprintf(os.Stderr, "Failed to close seekable zstd writer: %v\n", err) - os.Exit(1) - } - // Now write ddidx file - for _, index := range indexes { - _, err := index.WriteTo(ddidxOutput) - if err != nil { - fmt.Fprintf(os.Stderr, "Error writing to ddidx: %v\n", err) - os.Exit(1) - } - } - return - }, -} - -func init() { - rootCmd.AddCommand(fastzCmd) - - // Defining flags for primerSet and templateMap files, and the output location for the ddidx file - fastzCmd.Flags().String("primerSet", "", "Path to the primer set file") - fastzCmd.Flags().String("templateMap", "", "Path to the template map file") - fastzCmd.Flags().String("ddidxOutput", "", "Output location for the .ddidx index file") - fastzCmd.Flags().Uint("kmerSize", 16, "K-mer size for megamash") - fastzCmd.Flags().Uint("threshold", 10, "Threshold for megamash") - fastzCmd.Flags().Float64("score", 0.8, "Score for filtering") - defaultCPUs := runtime.NumCPU() - fastzCmd.Flags().Int("cpus", defaultCPUs, "Number of CPUs to use") - - // Marking the flags as required - fastzCmd.MarkFlagRequired("primerSet") - fastzCmd.MarkFlagRequired("templateMap") - fastzCmd.MarkFlagRequired("ddidxOutput") -} diff --git a/cli/cmd/root.go b/cli/cmd/root.go deleted file mode 100644 index 7376031..0000000 --- a/cli/cmd/root.go +++ /dev/null @@ -1,51 +0,0 @@ -/* -Copyright © 2024 NAME HERE - -*/ -package cmd - -import ( - "os" - - "github.com/spf13/cobra" -) - - - -// rootCmd represents the base command when called without any subcommands -var rootCmd = &cobra.Command{ - Use: "cli", - Short: "A brief description of your application", - Long: `A longer description that spans multiple lines and likely contains -examples and usage of using your application. For example: - -Cobra is a CLI library for Go that empowers applications. -This application is a tool to generate the needed files -to quickly create a Cobra application.`, - // Uncomment the following line if your bare application - // has an action associated with it: - // Run: func(cmd *cobra.Command, args []string) { }, -} - -// Execute adds all child commands to the root command and sets flags appropriately. -// This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute() { - err := rootCmd.Execute() - if err != nil { - os.Exit(1) - } -} - -func init() { - // Here you will define your flags and configuration settings. - // Cobra supports persistent flags, which, if defined here, - // will be global for your application. - - // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cli.yaml)") - - // Cobra also supports local flags, which will only run - // when this action is called directly. - rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") -} - - diff --git a/cli/go.mod b/cli/go.mod deleted file mode 100644 index dc0a8df..0000000 --- a/cli/go.mod +++ /dev/null @@ -1,14 +0,0 @@ -module github.com/koeng101/dnadesign/cli - -go 1.22.0 - -require ( - github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/klauspost/compress v1.17.6 // indirect - github.com/spf13/cobra v1.8.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect - gitlab.com/rackn/seekable-zstd v0.8.2 // indirect - gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 // indirect -) diff --git a/cli/go.sum b/cli/go.sum deleted file mode 100644 index 5f68dc6..0000000 --- a/cli/go.sum +++ /dev/null @@ -1,20 +0,0 @@ -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= -github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= -github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -gitlab.com/rackn/seekable-zstd v0.8.2 h1:ARgDc1HRo/HnNR2NHsAoaxW2fE0vo0d+yI1+WvrIjF0= -gitlab.com/rackn/seekable-zstd v0.8.2/go.mod h1:9z8nf3qNXOi73VRm7KQgTmI3T0tz9YzDKKL7fzEBz9M= -gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 h1:lXoXk/e9YrtTyWzNZs1ak/ijpwZQDaJLEwKCjhp/dCw= -gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93/go.mod h1:pXhP0EyrEy0pGf2DW4vTKub/As/UiamLFaZ1Q9YaFTs= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/cli/main.go b/cli/main.go deleted file mode 100644 index 3ce1f74..0000000 --- a/cli/main.go +++ /dev/null @@ -1,11 +0,0 @@ -/* -Copyright © 2024 NAME HERE - -*/ -package main - -import "github.com/koeng101/dnadesign/cli/cmd" - -func main() { - cmd.Execute() -} diff --git a/go.work b/go.work index a99648e..b747922 100644 --- a/go.work +++ b/go.work @@ -1,7 +1,6 @@ go 1.22.0 use ( - ./cli ./external ./lib ) diff --git a/lib/align/megamash/megamash.go b/lib/align/megamash/megamash.go index 9c59fa4..3873d0c 100644 --- a/lib/align/megamash/megamash.go +++ b/lib/align/megamash/megamash.go @@ -11,6 +11,7 @@ package megamash import ( "encoding/json" "fmt" + "strings" "github.com/koeng101/dnadesign/lib/bio/fasta" "github.com/koeng101/dnadesign/lib/transform" @@ -19,6 +20,7 @@ import ( // StandardizedDNA returns the alphabetically lesser strand of a double // stranded DNA molecule. func StandardizedDNA(sequence string) string { + sequence = strings.ToUpper(sequence) var deterministicSequence string reverseComplement := transform.ReverseComplement(sequence) if sequence > reverseComplement { diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go index b63fa23..2b1d8df 100644 --- a/lib/bio/fastq/fastq.go +++ b/lib/bio/fastq/fastq.go @@ -16,6 +16,7 @@ import ( "errors" "fmt" "io" + "sort" "strings" ) @@ -209,8 +210,15 @@ func (read *Read) WriteTo(w io.Writer) (int64, error) { if err != nil { return writtenBytes, err } - for key, val := range read.Optionals { - newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, val) + keys := make([]string, len(read.Optionals)) + i := 0 + for key := range read.Optionals { + keys[i] = key + i++ + } + sort.Strings(keys) + for _, key := range keys { + newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, read.Optionals[key]) writtenBytes += int64(newWrittenBytes) if err != nil { return writtenBytes, err From 80d3119e48164ff1ec6f8e843e140466c350a7b8 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Mon, 19 Feb 2024 20:22:55 -0800 Subject: [PATCH 08/16] Remove stale --- .github/workflows/stale.yml | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index fb99c6b..0000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,27 +0,0 @@ -# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. -# -# You can adjust the behavior by modifying this file. -# For more information, see: -# https://github.com/actions/stale -name: Mark stale issues and pull requests - -on: - schedule: - - cron: '25 18 * * *' - -jobs: - stale: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - stale-issue-message: 'This issue has had no activity in the past 2 months. Marking as `stale`.' - stale-pr-message: 'This PR has had no activity in the past 2 months. Marking as `stale`.' - stale-issue-label: 'stale' - stale-pr-label: 'stale' - days-before-stale: 60 - days-before-close: -1 From cf42b4b6cc3c6a16967a5fc19ba8a8979d45e2ae Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Mon, 19 Feb 2024 20:23:47 -0800 Subject: [PATCH 09/16] make linter happy --- lib/bio/bio.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bio/bio.go b/lib/bio/bio.go index 29adc87..98cdf0c 100644 --- a/lib/bio/bio.go +++ b/lib/bio/bio.go @@ -381,7 +381,7 @@ func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.R } dataBytes = dataBytes[:len(dataBytes)-1] } - if int(n) != len(dataBytes) { + if n != len(dataBytes) { return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n) } parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n) From 34df6e51d6a4387c1348b32a5ce2cadc8d53803e Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Mon, 19 Feb 2024 20:30:39 -0800 Subject: [PATCH 10/16] Updated samtools for linter --- external/samtools/samtools.go | 12 ++++++------ external/samtools/samtools_test.go | 4 +++- lib/align/megamash/megamash_test.go | 17 +++++++++-------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/external/samtools/samtools.go b/external/samtools/samtools.go index da977e6..d78e33b 100644 --- a/external/samtools/samtools.go +++ b/external/samtools/samtools.go @@ -23,7 +23,7 @@ import ( // The first samtools view removes unmapped sequences, the sort sorts the // sequences for piping into pileup, and the final command builds the pileup // file. -func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error { +func Pileup(ctx context.Context, templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error { /* Due to how os.exec works in Golang, we can't directly have pipes as if the whole thing was a script. However, we can attach pipes to each @@ -50,7 +50,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro } tmpFile.Close() // Close the file as it's no longer needed - g, ctx := errgroup.WithContext(context.Background()) + g, ctx := errgroup.WithContext(ctx) // Setup pipe connections between commands viewSortReader, viewSortWriter := io.Pipe() @@ -74,7 +74,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro select { case <-ctx.Done(): - viewCmd.Process.Signal(syscall.SIGTERM) + _ = viewCmd.Process.Signal(syscall.SIGTERM) return ctx.Err() default: return viewCmd.Wait() @@ -94,7 +94,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro select { case <-ctx.Done(): - sortCmd.Process.Signal(syscall.SIGTERM) + _ = sortCmd.Process.Signal(syscall.SIGTERM) return ctx.Err() default: return sortCmd.Wait() @@ -112,7 +112,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro select { case <-ctx.Done(): - mpileupCmd.Process.Signal(syscall.SIGTERM) + _ = mpileupCmd.Process.Signal(syscall.SIGTERM) return ctx.Err() default: return mpileupCmd.Wait() @@ -149,7 +149,7 @@ func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-ch // Run Pileup function in a goroutine g.Go(func() error { - return Pileup(templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw + return Pileup(ctx, templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw }) // Wait for all goroutines in the group to finish diff --git a/external/samtools/samtools_test.go b/external/samtools/samtools_test.go index 8e95930..c75aadf 100644 --- a/external/samtools/samtools_test.go +++ b/external/samtools/samtools_test.go @@ -2,6 +2,7 @@ package samtools_test import ( "bytes" + "context" "os" "testing" @@ -28,7 +29,8 @@ func TestPileup(t *testing.T) { var buf bytes.Buffer // Execute the pileup function - err = samtools.Pileup(templateFile, samFile, &buf) + ctx := context.Background() + err = samtools.Pileup(ctx, templateFile, samFile, &buf) if err != nil { t.Errorf("Pileup returned error: %s", err) } diff --git a/lib/align/megamash/megamash_test.go b/lib/align/megamash/megamash_test.go index 5e53c0f..3aeaaa1 100644 --- a/lib/align/megamash/megamash_test.go +++ b/lib/align/megamash/megamash_test.go @@ -1,8 +1,9 @@ -package megamash +package megamash_test import ( "testing" + "github.com/koeng101/dnadesign/lib/align/megamash" "github.com/koeng101/dnadesign/lib/bio/fasta" ) @@ -12,7 +13,7 @@ func TestMegamash(t *testing.T) { oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA" samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"} - m, err := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold) + m, err := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold) if err != nil { t.Errorf("Failed to make NewMegamashMap: %s", err) } @@ -31,8 +32,8 @@ func BenchmarkMegamash(b *testing.B) { oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA" samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"} - m, _ := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, - DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold) + m, _ := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, + megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold) for _, sample := range samples { _ = m.Match(sample) } @@ -41,24 +42,24 @@ func BenchmarkMegamash(b *testing.B) { func TestMatchesConversion(t *testing.T) { // Initial slice of Match structs - matches := []Match{ + matches := []megamash.Match{ {"match1", 90.1}, {"match2", 85.5}, } // Convert matches to JSON string - jsonStr, err := MatchesToJSON(matches) + jsonStr, err := megamash.MatchesToJSON(matches) if err != nil { t.Fatalf("MatchesToJSON failed with error: %v", err) } // Convert JSON string back to slice of Match structs - convertedMatches, err := JSONToMatches(jsonStr) + convertedMatches, err := megamash.JSONToMatches(jsonStr) if err != nil { t.Fatalf("JSONToMatches failed with error: %v", err) } // Convert the convertedMatches back to JSON to compare strings - convertedJSONStr, err := MatchesToJSON(convertedMatches) + convertedJSONStr, err := megamash.MatchesToJSON(convertedMatches) if err != nil { t.Fatalf("MatchesToJSON failed with error: %v", err) } From 411df738ed66596255e3fb56f9386a95d87ecccf Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Mon, 19 Feb 2024 20:49:38 -0800 Subject: [PATCH 11/16] Updated to fix linter problems --- lib/bio/fastq/fastq.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go index 2b1d8df..160cc06 100644 --- a/lib/bio/fastq/fastq.go +++ b/lib/bio/fastq/fastq.go @@ -43,10 +43,10 @@ type Read struct { // DeepCopy deep copies a read. Used for when you want to modify optionals then // pipe elsewhere. -func (r *Read) DeepCopy() Read { - newRead := Read{Identifier: r.Identifier, Sequence: r.Sequence, Quality: r.Quality} +func (read *Read) DeepCopy() Read { + newRead := Read{Identifier: read.Identifier, Sequence: read.Sequence, Quality: read.Quality} newRead.Optionals = make(map[string]string) - for key, value := range r.Optionals { + for key, value := range read.Optionals { newRead.Optionals[key] = value } return newRead From 09f9686b686d309664e7a3f1db766735cc5a6702 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Sat, 24 Feb 2024 21:08:31 -0800 Subject: [PATCH 12/16] add fastz temporarily --- go.work | 1 + 1 file changed, 1 insertion(+) diff --git a/go.work b/go.work index b747922..ebe5368 100644 --- a/go.work +++ b/go.work @@ -2,5 +2,6 @@ go 1.22.0 use ( ./external + ./fastz ./lib ) From 5cf4eafb1fe433e0a38fb9f69212da3e169a8cd1 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Wed, 28 Feb 2024 12:50:50 -0800 Subject: [PATCH 13/16] added changes to minimap2 --- external/minimap2/minimap2.go | 3 +-- go.work | 1 - lib/bio/pileup/pileup.go | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/external/minimap2/minimap2.go b/external/minimap2/minimap2.go index 578d24c..91ac73e 100644 --- a/external/minimap2/minimap2.go +++ b/external/minimap2/minimap2.go @@ -82,8 +82,7 @@ func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) e } // Minimap2Channeled uses channels rather than io.Reader and io.Writers. -func Minimap2Channeled(fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error { - ctx := context.Background() +func Minimap2Channeled(ctx context.Context, fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error { g, ctx := errgroup.WithContext(ctx) // Create a pipe for writing fastq reads and reading them as an io.Reader diff --git a/go.work b/go.work index ebe5368..b747922 100644 --- a/go.work +++ b/go.work @@ -2,6 +2,5 @@ go 1.22.0 use ( ./external - ./fastz ./lib ) diff --git a/lib/bio/pileup/pileup.go b/lib/bio/pileup/pileup.go index e87cfd9..cfed71c 100644 --- a/lib/bio/pileup/pileup.go +++ b/lib/bio/pileup/pileup.go @@ -39,6 +39,7 @@ import ( "bufio" "fmt" "io" + "regexp" "strconv" "strings" "unicode" From dd222b58407ebeefcb4775ca3f373092aa0be56d Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Wed, 28 Feb 2024 12:53:33 -0800 Subject: [PATCH 14/16] remove ddidx --- lib/bio/bio.go | 44 -------------------- lib/bio/ddidx/ddidx.go | 83 ------------------------------------- lib/bio/ddidx/ddidx_test.go | 44 -------------------- lib/bio/example_test.go | 37 ----------------- 4 files changed, 208 deletions(-) delete mode 100644 lib/bio/ddidx/ddidx.go delete mode 100644 lib/bio/ddidx/ddidx_test.go diff --git a/lib/bio/bio.go b/lib/bio/bio.go index 98cdf0c..09eb0ff 100644 --- a/lib/bio/bio.go +++ b/lib/bio/bio.go @@ -11,10 +11,8 @@ package bio import ( "bufio" - "bytes" "context" "errors" - "fmt" "io" "math" @@ -352,45 +350,3 @@ func FilterData[Data DataTypes](ctx context.Context, input <-chan Data, output c } } } - -/* - -We have FromIndex parsers for data types that need it: -- genbank -- fasta -- fastq -- slow5 -- sam - -We do not have them for data types that do not need it: -- pileup.Line [always small] -- uniprot.Entry [file type too complex] -*/ - -// Indexable is an interface for DataTypes to satisfy if they are indexable. -type Indexable interface { - Identifier() string -} - -func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.Read, error) { - dataBytes := make([]byte, length) - n, err := r.ReadAt(dataBytes, int64(startPosition)) - if err != nil { - if !errors.Is(err, io.EOF) { - return fastq.Read{}, err - } - dataBytes = dataBytes[:len(dataBytes)-1] - } - if n != len(dataBytes) { - return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n) - } - parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n) - fastqRead, err := parser.Next() - if err != nil { - if errors.Is(err, io.EOF) { - err = nil // EOF not treated as parsing error. - } - return fastqRead, err - } - return fastqRead, nil -} diff --git a/lib/bio/ddidx/ddidx.go b/lib/bio/ddidx/ddidx.go deleted file mode 100644 index 1accfcc..0000000 --- a/lib/bio/ddidx/ddidx.go +++ /dev/null @@ -1,83 +0,0 @@ -/* -Package ddidx contains information about the dnadesign index format. -*/ -package ddidx - -import ( - "encoding/binary" - "errors" - "io" -) - -// Index is a 32 byte index for individual objects. -type Index struct { - Identifier [16]byte - StartPosition uint64 - Length uint64 -} - -// WriteTo writes the binary representation of the Index to the given writer. -// It returns the number of bytes written and any error encountered. -func (i *Index) WriteTo(w io.Writer) (int64, error) { - // The total bytes written - var totalBytes int64 - - // Write Identifier - n, err := w.Write(i.Identifier[:]) - totalBytes += int64(n) - if err != nil { - return totalBytes, err - } - - // Create a buffer to write the uint64 values - buf := make([]byte, 8) - - // Write StartPosition - binary.BigEndian.PutUint64(buf, i.StartPosition) - n, err = w.Write(buf) - totalBytes += int64(n) - if err != nil { - return totalBytes, err - } - - // Write Length - binary.BigEndian.PutUint64(buf, i.Length) - n, err = w.Write(buf) - totalBytes += int64(n) - if err != nil { - return totalBytes, err - } - - return totalBytes, nil -} - -// ReadIndexes reads and returns a list of Index structs from the given reader. -func ReadIndexes(r io.Reader) ([]Index, error) { - var indexes []Index - - for { - var idx Index - - // Read Identifier - if _, err := io.ReadFull(r, idx.Identifier[:]); err != nil { - if errors.Is(err, io.EOF) { - break // End of file, stop reading - } - return indexes, err - } - - // Read StartPosition - if err := binary.Read(r, binary.BigEndian, &idx.StartPosition); err != nil { - return indexes, err - } - - // Read Length - if err := binary.Read(r, binary.BigEndian, &idx.Length); err != nil { - return indexes, err - } - - indexes = append(indexes, idx) - } - - return indexes, nil -} diff --git a/lib/bio/ddidx/ddidx_test.go b/lib/bio/ddidx/ddidx_test.go deleted file mode 100644 index 2ff82f0..0000000 --- a/lib/bio/ddidx/ddidx_test.go +++ /dev/null @@ -1,44 +0,0 @@ -package ddidx - -import ( - "bytes" - "reflect" - "testing" -) - -func TestIndexWriteToAndReadIndexes(t *testing.T) { - // Prepare a slice of Index instances for testing - indexes := []Index{ - { - Identifier: [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - StartPosition: 100, - Length: 200, - }, - { - Identifier: [16]byte{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, - StartPosition: 300, - Length: 400, - }, - } - - // Create a buffer to write the indexes to - var buf bytes.Buffer - - // Write each index to the buffer - for _, idx := range indexes { - if _, err := idx.WriteTo(&buf); err != nil { - t.Fatalf("WriteTo failed: %v", err) - } - } - - // Now read the indexes back from the buffer - readIndexes, err := ReadIndexes(&buf) - if err != nil { - t.Fatalf("ReadIndexes failed: %v", err) - } - - // Compare the original indexes with the ones read back - if !reflect.DeepEqual(indexes, readIndexes) { - t.Errorf("Original indexes %+v do not match read indexes %+v", indexes, readIndexes) - } -} diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go index e79fead..3f7e471 100644 --- a/lib/bio/example_test.go +++ b/lib/bio/example_test.go @@ -9,7 +9,6 @@ import ( "strings" "github.com/koeng101/dnadesign/lib/bio" - "github.com/koeng101/dnadesign/lib/bio/ddidx" "github.com/koeng101/dnadesign/lib/bio/fasta" "github.com/koeng101/dnadesign/lib/bio/fastq" "github.com/koeng101/dnadesign/lib/bio/sam" @@ -485,39 +484,3 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF fmt.Println(reads[0].Identifier) // Output: af86ed57-1cfe-486f-8205-b2c8d1186454 } - -func ExampleFastqFromIndex() { - file := strings.NewReader(`@289a197e-4c05-4143-80e6-488e23044378 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=34575 ch=111 start_time=2023-12-29T16:06:13.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode06 barcode_alias=barcode06 parent_read_id=289a197e-4c05-4143-80e6-488e23044378 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0 -TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG -+ -$%%&%%$$%&'+)**,-+)))+866788711112=>A?@@@BDB@>?746@?>A@D2@970,-+..*++++;662/.-.+,++,//+167>A@A@@B=<887-,'&&%%&''((5555644578::<==B?ABCIJA>>>>@DCAA99::>=<<<=67777+***)//+,,+)&&&+--.02:>442000/1225:=D?=<<=7;866/..../AAA226545+&%%$$ -@af86ed57-1cfe-486f-8205-b2c8d1186454 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=2233 ch=123 start_time=2023-12-29T10:04:32.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode07 barcode_alias=barcode07 parent_read_id=af86ed57-1cfe-486f-8205-b2c8d1186454 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0 -TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA -+ -$%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*`) - - parser := bio.NewFastqParser(file) - reads, _ := parser.Parse() - - // Create a ddidx - var indexes []ddidx.Index - - // Write the files to an io.Writer. - // All headers and all records implement io.WriterTo interfaces. - var buffer bytes.Buffer - var startPosition uint64 - for _, read := range reads { - // Normally, you would want to actually parse uuidBytes, but we don't care here. - var uuidBytes [16]byte - copy(uuidBytes[:], read.Identifier[:16]) - length, _ := read.WriteTo(&buffer) - indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)}) - startPosition = startPosition + uint64(length) - } - - // Now, read a fastq from an index - read, _ := bio.FastqFromIndex(file, indexes[1].StartPosition, indexes[1].Length) - - fmt.Println(read.Quality) - // Output: $%&$$$$$#')+)+,<>@B?>==<>>;;<<?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,* -} From c1b17cc494301dbc173caa366b64d6a3c669bf7d Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Wed, 28 Feb 2024 12:55:18 -0800 Subject: [PATCH 15/16] add work in progress to sequencing.go --- lib/sequencing/sequencing.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/sequencing/sequencing.go b/lib/sequencing/sequencing.go index f600175..0839139 100644 --- a/lib/sequencing/sequencing.go +++ b/lib/sequencing/sequencing.go @@ -1,5 +1,7 @@ /* Package sequencing contains functions associated with handling sequencing data. + +This is a work-in-progess, and not ready for production. */ package sequencing From 1de5d7559bd7c2623d3fa6a9bd67191db88b05ec Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Wed, 28 Feb 2024 13:13:18 -0800 Subject: [PATCH 16/16] Fix pileup --- lib/bio/pileup/pileup.go | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/bio/pileup/pileup.go b/lib/bio/pileup/pileup.go index cfed71c..e87cfd9 100644 --- a/lib/bio/pileup/pileup.go +++ b/lib/bio/pileup/pileup.go @@ -39,7 +39,6 @@ import ( "bufio" "fmt" "io" - "regexp" "strconv" "strings" "unicode"