From e90080b83ab6c8104a41bc1ff5febbd855f7a7af Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 00:43:26 -0800
Subject: [PATCH 01/16] init

---
 lib/bio/bio.go | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/lib/bio/bio.go b/lib/bio/bio.go
index bbe6369..99d13dc 100644
--- a/lib/bio/bio.go
+++ b/lib/bio/bio.go
@@ -325,3 +325,34 @@ func FilterData[Data DataTypes](ctx context.Context, input <-chan Data, output c
 		}
 	}
 }
+
+/*
+
+We have FromIndex parsers for data types that need it:
+- genbank
+- fasta
+- fastq
+- slow5
+- sam
+
+We do not have them for data types that do not need it:
+- pileup.Line [always small]
+- uniprot.Entry [file type too complex]
+*/
+
+// Indexable is an interface for DataTypes to satisfy if they are indexable.
+type Indexable interface {
+	Identifier() string
+}
+
+func GenbankFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (genbank.Genbank, error) {
+	return genbank.Genbank{}, nil
+}
+
+func FastaFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fasta.Record, error) {
+	return fasta.Record{}, nil
+}
+
+func FastqFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fastq.Read, error) {
+	return fastq.Read{}, nil
+}

From d1e8f39af57494cf4bb1459de0101ac48d7a178b Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 13:29:09 -0800
Subject: [PATCH 02/16] add some better sequencing utils

---
 external/samtools/samtools.go  |  35 +++++
 lib/bio/fastq/fastq.go         |  11 ++
 lib/sequencing/example_test.go | 228 +++++++++++++++------------------
 lib/sequencing/sequencing.go   |  54 ++++++++
 4 files changed, 205 insertions(+), 123 deletions(-)

diff --git a/external/samtools/samtools.go b/external/samtools/samtools.go
index 8c4b1d2..da977e6 100644
--- a/external/samtools/samtools.go
+++ b/external/samtools/samtools.go
@@ -10,6 +10,7 @@ import (
 	"os/exec"
 	"syscall"
 
+	"github.com/koeng101/dnadesign/lib/bio/sam"
 	"golang.org/x/sync/errgroup"
 )
 
@@ -125,3 +126,37 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
 
 	return nil
 }
+
+// PileupChanneled processes SAM alignments from a channel and sends pileup lines to another channel.
+func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-chan sam.Alignment, w io.Writer) error {
+	g, ctx := errgroup.WithContext(ctx)
+
+	// Create a pipe for writing SAM alignments and reading them as an io.Reader
+	samPr, samPw := io.Pipe()
+
+	// Goroutine to consume SAM alignments and write them to the PipeWriter
+	g.Go(func() error {
+		defer samPw.Close()
+		for alignment := range samChan {
+			// Assuming the sam.Alignment type has a WriteTo method or similar to serialize it to the writer
+			_, err := alignment.WriteTo(samPw)
+			if err != nil {
+				return err // return error to be handled by errgroup
+			}
+		}
+		return nil
+	})
+
+	// Run Pileup function in a goroutine
+	g.Go(func() error {
+		return Pileup(templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
+	})
+
+	// Wait for all goroutines in the group to finish
+	if err := g.Wait(); err != nil {
+		return err // This will return the first non-nil error from the group of goroutines
+	}
+
+	// At this point, all goroutines have finished successfully
+	return nil
+}
diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go
index 15abd7d..78aca1a 100644
--- a/lib/bio/fastq/fastq.go
+++ b/lib/bio/fastq/fastq.go
@@ -40,6 +40,17 @@ type Read struct {
 	Quality    string            `json:"quality"`
 }
 
+// DeepCopy deep copies a read. Used for when you want to modify optionals then
+// pipe elsewhere.
+func (r *Read) DeepCopy() Read {
+	newRead := Read{Identifier: r.Identifier, Sequence: r.Sequence, Quality: r.Quality}
+	newRead.Optionals = make(map[string]string)
+	for key, value := range r.Optionals {
+		newRead.Optionals[key] = value
+	}
+	return newRead
+}
+
 // Header is a blank struct, needed for compatibility with bio parsers. It contains nothing.
 type Header struct{}
 
diff --git a/lib/sequencing/example_test.go b/lib/sequencing/example_test.go
index b53a794..7330151 100644
--- a/lib/sequencing/example_test.go
+++ b/lib/sequencing/example_test.go
@@ -1,125 +1,107 @@
 package sequencing_test
 
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"log"
-	"os"
-	"os/exec"
-
-	"github.com/koeng101/dnadesign/external/minimap2"
-	"github.com/koeng101/dnadesign/lib/bio"
-	"github.com/koeng101/dnadesign/lib/bio/fasta"
-	"github.com/koeng101/dnadesign/lib/bio/fastq"
-	"github.com/koeng101/dnadesign/lib/bio/sam"
-	"github.com/koeng101/dnadesign/lib/primers/pcr"
-	"github.com/koeng101/dnadesign/lib/transform"
-	"golang.org/x/sync/errgroup"
-)
-
-func Example_ampliconAlignment() {
-	// This is currently a work-in-progress. Sequencing utilities are under
-	// development right now.
-	//
-	//
-	// Only run function if minimap2 is available
-	_, err := exec.LookPath("minimap2")
-	if err != nil {
-		fmt.Println("oligo2")
-		return
-	}
-	// First, let's define the type we are looking for: amplicons in a pool.
-	type Amplicon struct {
-		Identifier       string
-		TemplateSequence string
-		ForwardPrimer    string
-		ReversePrimer    string
-	}
-
-	// Next, let's define data we'll be working on. In particular, the
-	// templates and fastq files.
-
-	/*
-		Data processing steps:
-
-		1. Simulate PCRs of amplicons
-		2. Sort for the right barcodes
-		3. Trim fastq reads
-		4. Minimap2 fastq reads to amplicons
-		5. Filter for primary alignments
-	*/
-	var amplicons []Amplicon
-	var templates []fasta.Record
-	pcrTm := 50.0
-
-	forward := "CCGTGCGACAAGATTTCAAG"
-	reverse := transform.ReverseComplement("CGGATCGAACTTAGGTAGCC")
-	oligo1 := Amplicon{Identifier: "oligo1", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCCT"}
-	oligo2 := Amplicon{Identifier: "oligo2", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCACTAGTCATAAT"}
-	oligo3 := Amplicon{Identifier: "oligo3", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"}
-	amplicons = []Amplicon{oligo1, oligo2, oligo3}
-
-	// Simulate PCRs
-	for _, amplicon := range amplicons {
-		fragments, _ := pcr.Simulate([]string{amplicon.TemplateSequence}, pcrTm, false, []string{amplicon.ForwardPrimer, amplicon.ReversePrimer})
-		if len(fragments) != 1 {
-			log.Fatalf("Should only get 1 fragment from PCR!")
-		}
-		// In case your template will have multiple fragments
-		for _, fragment := range fragments {
-			// Make sure to reset identifier if you have more than 1 fragment.
-			templates = append(templates, fasta.Record{Identifier: amplicon.Identifier, Sequence: fragment})
-		}
-	}
-	var buf bytes.Buffer
-	for _, template := range templates {
-		_, _ = template.WriteTo(&buf)
-	}
-
-	// Trim fastq reads. All the following processes (trimming, minimap2,
-	// filtering) are all done concurrently.
-
-	// Setup barcodes and fastq files
-	barcode := "barcode06"
-	r, _ := os.Open("data/reads.fastq")
-	parser := bio.NewFastqParser(r)
-
-	// Setup errorGroups and channels
-	ctx := context.Background()
-	errorGroup, ctx := errgroup.WithContext(ctx)
-
-	fastqReads := make(chan fastq.Read)
-	fastqBarcoded := make(chan fastq.Read)
-	samReads := make(chan sam.Alignment)
-	samPrimary := make(chan sam.Alignment)
-
-	// Read fastqs into channel
-	errorGroup.Go(func() error {
-		return parser.ParseToChannel(ctx, fastqReads, false)
-	})
-
-	// Filter the right barcode fastqs from channel
-	errorGroup.Go(func() error {
-		return bio.FilterData(ctx, fastqReads, fastqBarcoded, func(data fastq.Read) bool { return data.Optionals["barcode"] == barcode })
-	})
-
-	// Run minimap
-	errorGroup.Go(func() error {
-		return minimap2.Minimap2Channeled(&buf, fastqBarcoded, samReads)
-	})
-
-	// Sort out primary alignments
-	errorGroup.Go(func() error {
-		return bio.FilterData(ctx, samReads, samPrimary, sam.Primary)
-	})
-
-	// Read all them alignments out into memory
-	var outputAlignments []sam.Alignment
-	for alignment := range samPrimary {
-		outputAlignments = append(outputAlignments, alignment)
-	}
-
-	fmt.Println(outputAlignments[0].RNAME)
-	// Output: oligo2
-}
+//func Example_ampliconAlignment() {
+//	// This is currently a work-in-progress. Sequencing utilities are under
+//	// development right now.
+//	//
+//	//
+//	// Only run function if minimap2 is available
+//	_, err := exec.LookPath("minimap2")
+//	if err != nil {
+//		fmt.Println("oligo2")
+//		return
+//	}
+//	// First, let's define the type we are looking for: amplicons in a pool.
+//	type Amplicon struct {
+//		Identifier       string
+//		TemplateSequence string
+//		ForwardPrimer    string
+//		ReversePrimer    string
+//	}
+//
+//	// Next, let's define data we'll be working on. In particular, the
+//	// templates and fastq files.
+//
+//	/*
+//		Data processing steps:
+//
+//		1. Simulate PCRs of amplicons
+//		2. Sort for the right barcodes
+//		3. Trim fastq reads
+//		4. Minimap2 fastq reads to amplicons
+//		5. Filter for primary alignments
+//	*/
+//	var amplicons []Amplicon
+//	var templates []fasta.Record
+//	pcrTm := 50.0
+//
+//	forward := "CCGTGCGACAAGATTTCAAG"
+//	reverse := transform.ReverseComplement("CGGATCGAACTTAGGTAGCC")
+//	oligo1 := Amplicon{Identifier: "oligo1", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCCT"}
+//	oligo2 := Amplicon{Identifier: "oligo2", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCACTAGTCATAAT"}
+//	oligo3 := Amplicon{Identifier: "oligo3", ForwardPrimer: forward, ReversePrimer: reverse, TemplateSequence: "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"}
+//	amplicons = []Amplicon{oligo1, oligo2, oligo3}
+//
+//	// Simulate PCRs
+//	for _, amplicon := range amplicons {
+//		fragments, _ := pcr.Simulate([]string{amplicon.TemplateSequence}, pcrTm, false, []string{amplicon.ForwardPrimer, amplicon.ReversePrimer})
+//		if len(fragments) != 1 {
+//			log.Fatalf("Should only get 1 fragment from PCR!")
+//		}
+//		// In case your template will have multiple fragments
+//		for _, fragment := range fragments {
+//			// Make sure to reset identifier if you have more than 1 fragment.
+//			templates = append(templates, fasta.Record{Identifier: amplicon.Identifier, Sequence: fragment})
+//		}
+//	}
+//	var buf bytes.Buffer
+//	for _, template := range templates {
+//		_, _ = template.WriteTo(&buf)
+//	}
+//
+//	// Trim fastq reads. All the following processes (trimming, minimap2,
+//	// filtering) are all done concurrently.
+//
+//	// Setup barcodes and fastq files
+//	barcode := "barcode06"
+//	r, _ := os.Open("data/reads.fastq")
+//	parser := bio.NewFastqParser(r)
+//
+//	// Setup errorGroups and channels
+//	ctx := context.Background()
+//	errorGroup, ctx := errgroup.WithContext(ctx)
+//
+//	fastqReads := make(chan fastq.Read)
+//	fastqBarcoded := make(chan fastq.Read)
+//	samReads := make(chan sam.Alignment)
+//	samPrimary := make(chan sam.Alignment)
+//
+//	// Read fastqs into channel
+//	errorGroup.Go(func() error {
+//		return parser.ParseToChannel(ctx, fastqReads, false)
+//	})
+//
+//	// Filter the right barcode fastqs from channel
+//	errorGroup.Go(func() error {
+//		return bio.FilterData(ctx, fastqReads, fastqBarcoded, func(data fastq.Read) bool { return data.Optionals["barcode"] == barcode })
+//	})
+//
+//	// Run minimap
+//	errorGroup.Go(func() error {
+//		return minimap2.Minimap2Channeled(&buf, fastqBarcoded, samReads)
+//	})
+//
+//	// Sort out primary alignments
+//	errorGroup.Go(func() error {
+//		return bio.FilterData(ctx, samReads, samPrimary, sam.Primary)
+//	})
+//
+//	// Read all them alignments out into memory
+//	var outputAlignments []sam.Alignment
+//	for alignment := range samPrimary {
+//		outputAlignments = append(outputAlignments, alignment)
+//	}
+//
+//	fmt.Println(outputAlignments[0].RNAME)
+//	// Output: oligo2
+//}
diff --git a/lib/sequencing/sequencing.go b/lib/sequencing/sequencing.go
index fb049e5..f600175 100644
--- a/lib/sequencing/sequencing.go
+++ b/lib/sequencing/sequencing.go
@@ -2,3 +2,57 @@
 Package sequencing contains functions associated with handling sequencing data.
 */
 package sequencing
+
+import (
+	"context"
+
+	"github.com/koeng101/dnadesign/lib/align/megamash"
+	"github.com/koeng101/dnadesign/lib/bio/fastq"
+	"github.com/koeng101/dnadesign/lib/sequencing/barcoding"
+)
+
+func MegamashFastq(ctx context.Context, megamashMap megamash.MegamashMap, input <-chan fastq.Read, output chan<- fastq.Read) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case data, ok := <-input:
+			if !ok {
+				return nil
+			}
+			matches := megamashMap.Match(data.Sequence)
+			jsonStr, _ := megamash.MatchesToJSON(matches)
+			readCopy := data.DeepCopy()
+			readCopy.Optionals["megamash"] = jsonStr
+			select {
+			case output <- readCopy:
+			case <-ctx.Done():
+				return ctx.Err()
+			}
+		}
+	}
+}
+
+func DualBarcodeFastq(ctx context.Context, primerSet barcoding.DualBarcodePrimerSet, input <-chan fastq.Read, output chan<- fastq.Read) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case data, ok := <-input:
+			if !ok {
+				return nil
+			}
+			well, err := barcoding.DualBarcodeSequence(data.Sequence, primerSet)
+			if err != nil {
+				return err
+			}
+			readCopy := data.DeepCopy()
+			readCopy.Optionals["dual_barcode"] = well
+			select {
+			case output <- readCopy:
+			case <-ctx.Done():
+				return ctx.Err()
+			}
+		}
+	}
+}

From 8f279e7486c5785a6afa5ebb0c643a5ea25fae2c Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 15:06:25 -0800
Subject: [PATCH 03/16] add index and way to use index

---
 lib/bio/bio.go          | 33 +++++++++++++++++++++++----------
 lib/bio/example_test.go | 36 ++++++++++++++++++++++++++++++++++++
 lib/bio/fastq/fastq.go  | 12 +++++-------
 lib/go.mod              |  2 ++
 lib/go.sum              |  2 ++
 5 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/lib/bio/bio.go b/lib/bio/bio.go
index 99d13dc..fd8450c 100644
--- a/lib/bio/bio.go
+++ b/lib/bio/bio.go
@@ -11,8 +11,10 @@ package bio
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"errors"
+	"fmt"
 	"io"
 	"math"
 
@@ -345,14 +347,25 @@ type Indexable interface {
 	Identifier() string
 }
 
-func GenbankFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (genbank.Genbank, error) {
-	return genbank.Genbank{}, nil
-}
-
-func FastaFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fasta.Record, error) {
-	return fasta.Record{}, nil
-}
-
-func FastqFromIndex(r io.ReaderAt, StartPosition uint64, Length uint64) (fastq.Read, error) {
-	return fastq.Read{}, nil
+func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.Read, error) {
+	dataBytes := make([]byte, length)
+	n, err := r.ReadAt(dataBytes, int64(startPosition))
+	if err != nil {
+		if !errors.Is(err, io.EOF) {
+			return fastq.Read{}, err
+		}
+		dataBytes = dataBytes[:len(dataBytes)-1]
+	}
+	if int(n) != len(dataBytes) {
+		return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n)
+	}
+	parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n)
+	fastqRead, err := parser.Next()
+	if err != nil {
+		if errors.Is(err, io.EOF) {
+			err = nil // EOF not treated as parsing error.
+		}
+		return fastqRead, err
+	}
+	return fastqRead, nil
 }
diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go
index ce5955f..ff63e5a 100644
--- a/lib/bio/example_test.go
+++ b/lib/bio/example_test.go
@@ -8,7 +8,9 @@ import (
 	"os"
 	"strings"
 
+	"github.com/google/uuid"
 	"github.com/koeng101/dnadesign/lib/bio"
+	"github.com/koeng101/dnadesign/lib/bio/ddidx"
 	"github.com/koeng101/dnadesign/lib/bio/fasta"
 	"github.com/koeng101/dnadesign/lib/bio/fastq"
 	"github.com/koeng101/dnadesign/lib/bio/sam"
@@ -478,3 +480,37 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF
 	fmt.Println(reads[0].Identifier)
 	// Output: af86ed57-1cfe-486f-8205-b2c8d1186454
 }
+
+func ExampleFastqFromIndex() {
+	file := strings.NewReader(`@289a197e-4c05-4143-80e6-488e23044378 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=34575 ch=111 start_time=2023-12-29T16:06:13.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode06 barcode_alias=barcode06 parent_read_id=289a197e-4c05-4143-80e6-488e23044378 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0
+TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG
++
+$%%&%%$$%&'+)**,-+)))+866788711112=>A?@@@BDB@>?746@?>A@D2@970,-+..*++++;662/.-.+,++,//+167>A@A@@B=<887-,'&&%%&''((5555644578::<==B?ABCIJA>>>>@DCAA99::<BAA@-----DECJEDDEGEFHE;;;:;;:88754998989998887,-<<;<>>=<<<=67777+***)//+,,+)&&&+--.02:>442000/1225:=D?=<<=7;866/..../AAA226545+&%%$$
+@af86ed57-1cfe-486f-8205-b2c8d1186454 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=2233 ch=123 start_time=2023-12-29T10:04:32.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode07 barcode_alias=barcode07 parent_read_id=af86ed57-1cfe-486f-8205-b2c8d1186454 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0
+TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA
++
+$%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*`)
+
+	parser := bio.NewFastqParser(file)
+	reads, _ := parser.Parse()
+
+	// Create a ddidx
+	var indexes []ddidx.Index
+
+	// Write the files to an io.Writer.
+	// All headers and all records implement io.WriterTo interfaces.
+	var buffer bytes.Buffer
+	var startPosition uint64
+	for _, read := range reads {
+		uuidBytes, _ := uuid.Parse(read.Identifier)
+		length, _ := read.WriteTo(&buffer)
+		indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)})
+		startPosition = startPosition + uint64(length)
+	}
+
+	// Now, read a fastq from an index
+	read, _ := bio.FastqFromIndex(file, indexes[1].StartPosition, indexes[1].Length)
+
+	fmt.Println(read.Quality)
+	// Output: $%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*
+}
diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go
index 15abd7d..806d074 100644
--- a/lib/bio/fastq/fastq.go
+++ b/lib/bio/fastq/fastq.go
@@ -161,7 +161,11 @@ func (parser *Parser) Next() (Read, error) {
 	if len(line) <= 1 { // newline delimiter - actually checking for empty line
 		return Read{}, fmt.Errorf("empty quality sequence for %q,  got to line %d: %w", seqIdentifier, parser.line, err)
 	}
-	quality = string(line[:len(line)-1])
+	if parser.atEOF {
+		quality = string(line)
+	} else {
+		quality = string(line[:len(line)-1])
+	}
 
 	// Parsing ended. Check for inconsistencies.
 	if lookingForIdentifier {
@@ -179,12 +183,6 @@ func (parser *Parser) Next() (Read, error) {
 	return fastq, nil
 }
 
-// Reset discards all data in buffer and resets state.
-func (parser *Parser) Reset(r io.Reader) {
-	parser.reader.Reset(r)
-	parser.line = 0
-}
-
 /******************************************************************************
 
 Start of  Write functions
diff --git a/lib/go.mod b/lib/go.mod
index a062458..28b78fa 100644
--- a/lib/go.mod
+++ b/lib/go.mod
@@ -6,3 +6,5 @@ require (
 	github.com/google/go-cmp v0.6.0
 	golang.org/x/sync v0.5.0
 )
+
+require github.com/google/uuid v1.6.0 // indirect
diff --git a/lib/go.sum b/lib/go.sum
index e56dc15..f2f0d06 100644
--- a/lib/go.sum
+++ b/lib/go.sum
@@ -1,4 +1,6 @@
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
 golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=

From 4208874835d4915d6cb3737d2073f46cfa3942d7 Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 15:11:47 -0800
Subject: [PATCH 04/16] add ddidx

---
 lib/bio/ddidx/ddidx.go      | 83 +++++++++++++++++++++++++++++++++++++
 lib/bio/ddidx/ddidx_test.go | 44 ++++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 lib/bio/ddidx/ddidx.go
 create mode 100644 lib/bio/ddidx/ddidx_test.go

diff --git a/lib/bio/ddidx/ddidx.go b/lib/bio/ddidx/ddidx.go
new file mode 100644
index 0000000..1accfcc
--- /dev/null
+++ b/lib/bio/ddidx/ddidx.go
@@ -0,0 +1,83 @@
+/*
+Package ddidx contains information about the dnadesign index format.
+*/
+package ddidx
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// Index is a 32 byte index for individual objects.
+type Index struct {
+	Identifier    [16]byte
+	StartPosition uint64
+	Length        uint64
+}
+
+// WriteTo writes the binary representation of the Index to the given writer.
+// It returns the number of bytes written and any error encountered.
+func (i *Index) WriteTo(w io.Writer) (int64, error) {
+	// The total bytes written
+	var totalBytes int64
+
+	// Write Identifier
+	n, err := w.Write(i.Identifier[:])
+	totalBytes += int64(n)
+	if err != nil {
+		return totalBytes, err
+	}
+
+	// Create a buffer to write the uint64 values
+	buf := make([]byte, 8)
+
+	// Write StartPosition
+	binary.BigEndian.PutUint64(buf, i.StartPosition)
+	n, err = w.Write(buf)
+	totalBytes += int64(n)
+	if err != nil {
+		return totalBytes, err
+	}
+
+	// Write Length
+	binary.BigEndian.PutUint64(buf, i.Length)
+	n, err = w.Write(buf)
+	totalBytes += int64(n)
+	if err != nil {
+		return totalBytes, err
+	}
+
+	return totalBytes, nil
+}
+
+// ReadIndexes reads and returns a list of Index structs from the given reader.
+func ReadIndexes(r io.Reader) ([]Index, error) {
+	var indexes []Index
+
+	for {
+		var idx Index
+
+		// Read Identifier
+		if _, err := io.ReadFull(r, idx.Identifier[:]); err != nil {
+			if errors.Is(err, io.EOF) {
+				break // End of file, stop reading
+			}
+			return indexes, err
+		}
+
+		// Read StartPosition
+		if err := binary.Read(r, binary.BigEndian, &idx.StartPosition); err != nil {
+			return indexes, err
+		}
+
+		// Read Length
+		if err := binary.Read(r, binary.BigEndian, &idx.Length); err != nil {
+			return indexes, err
+		}
+
+		indexes = append(indexes, idx)
+	}
+
+	return indexes, nil
+}
diff --git a/lib/bio/ddidx/ddidx_test.go b/lib/bio/ddidx/ddidx_test.go
new file mode 100644
index 0000000..2ff82f0
--- /dev/null
+++ b/lib/bio/ddidx/ddidx_test.go
@@ -0,0 +1,44 @@
+package ddidx
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestIndexWriteToAndReadIndexes(t *testing.T) {
+	// Prepare a slice of Index instances for testing
+	indexes := []Index{
+		{
+			Identifier:    [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+			StartPosition: 100,
+			Length:        200,
+		},
+		{
+			Identifier:    [16]byte{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
+			StartPosition: 300,
+			Length:        400,
+		},
+	}
+
+	// Create a buffer to write the indexes to
+	var buf bytes.Buffer
+
+	// Write each index to the buffer
+	for _, idx := range indexes {
+		if _, err := idx.WriteTo(&buf); err != nil {
+			t.Fatalf("WriteTo failed: %v", err)
+		}
+	}
+
+	// Now read the indexes back from the buffer
+	readIndexes, err := ReadIndexes(&buf)
+	if err != nil {
+		t.Fatalf("ReadIndexes failed: %v", err)
+	}
+
+	// Compare the original indexes with the ones read back
+	if !reflect.DeepEqual(indexes, readIndexes) {
+		t.Errorf("Original indexes %+v do not match read indexes %+v", indexes, readIndexes)
+	}
+}

From 4e401d3d63a7535c860582caa204d7c6ac4c9cbb Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 18:50:14 -0800
Subject: [PATCH 05/16] add example test

---
 lib/bio/example_test.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go
index ff63e5a..4285614 100644
--- a/lib/bio/example_test.go
+++ b/lib/bio/example_test.go
@@ -8,7 +8,6 @@ import (
 	"os"
 	"strings"
 
-	"github.com/google/uuid"
 	"github.com/koeng101/dnadesign/lib/bio"
 	"github.com/koeng101/dnadesign/lib/bio/ddidx"
 	"github.com/koeng101/dnadesign/lib/bio/fasta"
@@ -502,7 +501,9 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF
 	var buffer bytes.Buffer
 	var startPosition uint64
 	for _, read := range reads {
-		uuidBytes, _ := uuid.Parse(read.Identifier)
+		// Normally, you would want to actually parse uuidBytes, but we don't care here.
+		var uuidBytes [16]byte
+		copy(uuidBytes[:], read.Identifier[:16])
 		length, _ := read.WriteTo(&buffer)
 		indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)})
 		startPosition = startPosition + uint64(length)

From 0da01d1c5eea25bf076861809b25bb3ea6e31f9f Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Tue, 13 Feb 2024 20:09:53 -0800
Subject: [PATCH 06/16] Add cli

---
 cli/cmd/fastz.go               | 224 +++++++++++++++++++++++++++++++++
 cli/cmd/root.go                |  51 ++++++++
 cli/go.mod                     |  14 +++
 cli/go.sum                     |  20 +++
 cli/main.go                    |  11 ++
 go.work                        |   7 +-
 lib/align/megamash/megamash.go |   2 +-
 lib/go.mod                     |   3 +-
 lib/go.sum                     |   4 +-
 9 files changed, 328 insertions(+), 8 deletions(-)
 create mode 100644 cli/cmd/fastz.go
 create mode 100644 cli/cmd/root.go
 create mode 100644 cli/go.mod
 create mode 100644 cli/go.sum
 create mode 100644 cli/main.go

diff --git a/cli/cmd/fastz.go b/cli/cmd/fastz.go
new file mode 100644
index 0000000..80a57d6
--- /dev/null
+++ b/cli/cmd/fastz.go
@@ -0,0 +1,224 @@
+package cmd
+
+import (
+	"context"
+	"encoding/csv"
+	"fmt"
+	"io"
+	"os"
+	"runtime"
+
+	"github.com/google/uuid"
+	"github.com/klauspost/compress/zstd"
+	"github.com/koeng101/dnadesign/lib/align/megamash"
+	"github.com/koeng101/dnadesign/lib/bio"
+	"github.com/koeng101/dnadesign/lib/bio/ddidx"
+	"github.com/koeng101/dnadesign/lib/bio/fasta"
+	"github.com/koeng101/dnadesign/lib/bio/fastq"
+	"github.com/koeng101/dnadesign/lib/sequencing"
+	"github.com/koeng101/dnadesign/lib/sequencing/barcoding"
+	"github.com/spf13/cobra"
+	"gitlab.com/rackn/seekable-zstd"
+	"golang.org/x/sync/errgroup"
+)
+
+// fastzCmd represents the fastz command
+var fastzCmd = &cobra.Command{
+	Use:   "fastz",
+	Short: "Compresses FASTQ files using zstd with additional indexing",
+	Long: `fastz is a tool for compressing FASTQ files using zstd compression, while also generating a .ddidx index file.
+The command requires a primer set file and a template map file to function properly. The output is a zstd compressed FASTQ file streamed to stdout, and a .ddidx index file is generated at the specified output location.
+
+This command also supports optional parameters for adjusting the k-mer size and threshold used in megamash, as well as a score parameter for filtering.
+
+Usage example:
+cat input.fastq | ./dnadesign fastz --primerSet path/to/primerSet --templateMap path/to/templateMap --ddidxOutput path/to/output.ddidx --kmerSize 16 --threshold 10 --score 0.8 > output.fastq.zstd`,
+	Run: func(cmd *cobra.Command, args []string) {
+		// You can retrieve the flag values here and add your logic for processing the FASTQ file
+		primerSetCsvLocation, _ := cmd.Flags().GetString("primerSet")
+		templateMapLocation, _ := cmd.Flags().GetString("templateMap")
+		ddidxOutputLocation, _ := cmd.Flags().GetString("ddidxOutput")
+		kmerSize, _ := cmd.Flags().GetUint("kmerSize")
+		threshold, _ := cmd.Flags().GetUint("threshold")
+		score, _ := cmd.Flags().GetFloat64("score")
+		cpus, _ := cmd.Flags().GetInt("cpus")
+
+		// Open the primerSet CSV file
+		primerSetCsv, err := os.Open(primerSetCsvLocation)
+		if err != nil {
+			// Handle error
+			fmt.Println("Error opening primer set CSV:", err)
+			return
+		}
+		defer primerSetCsv.Close() // Make sure to close the file when you're done
+
+		// Open the templateMap file
+		templateMap, err := os.Open(templateMapLocation)
+		if err != nil {
+			// Handle error
+			fmt.Println("Error opening template map:", err)
+			return
+		}
+		defer templateMap.Close() // Make sure to close the file when you're done
+
+		// Create/Open the ddidxOutput file for writing
+		// If you only need to write to it, use os.Create to create or truncate an existing file
+		ddidxOutput, err := os.Create(ddidxOutputLocation)
+		if err != nil {
+			// Handle error
+			fmt.Println("Error creating/opening ddidx output file:", err)
+			return
+		}
+		defer ddidxOutput.Close() // Make sure to close the file when you're done
+
+		/*
+			Step 1: Parse initial data sets
+		*/
+		// Read primer set
+		primerSet, err := barcoding.ParseDualPrimerSet(primerSetCsv)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error parsing primerset: %v\n", err)
+			os.Exit(1)
+		}
+
+		// Read template map
+		var templates []fasta.Record
+		reader := csv.NewReader(templateMap)
+
+		for {
+			// Read each record from csv
+			record, err := reader.Read()
+			// Break the loop at the end of the file
+			if err == io.EOF {
+				break
+			}
+			// Handle any other error
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error parsing templateMap: %v\n", err)
+				os.Exit(1)
+			}
+
+			if len(record) == 2 {
+				templates = append(templates, fasta.Record{Identifier: record[0], Sequence: record[1]})
+			}
+		}
+
+		/*
+			Step 2: setup megamash
+		*/
+		m, err := megamash.NewMegamashMap(templates, kmerSize, threshold, score)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error creating megamash: %v\n", err)
+			os.Exit(1)
+		}
+
+		/*
+			Step 3: setup concurrent processing.
+		*/
+		parser := bio.NewFastqParser(os.Stdin)
+		ctx := context.Background()
+		errorGroup, ctx := errgroup.WithContext(ctx)
+
+		fastqReads := make(chan fastq.Read)
+		fastqBarcoded := make(chan fastq.Read)
+		fastqBarcodedFiltered := make(chan fastq.Read)
+		fastqBarcodedFilteredMegamashed := make(chan fastq.Read)
+
+		// Read fastqs into channel
+		errorGroup.Go(func() error {
+			return parser.ParseToChannel(ctx, fastqReads, false)
+		})
+		// Barcoding can be an expensive operation
+		errorGroup.Go(func() error {
+			// We're going to start multiple workers within this errorGroup. This
+			// helps when doing computationally intensive operations on channels.
+			return bio.RunWorkers(ctx, cpus, fastqBarcoded, func(ctx context.Context) error {
+				return sequencing.DualBarcodeFastq(ctx, primerSet, fastqReads, fastqBarcoded)
+			})
+		})
+		// Filtering is a cheap operation, so we only have 1 worker doing it.
+		errorGroup.Go(func() error {
+			return bio.RunWorkers(ctx, 1, fastqBarcodedFiltered, func(ctx context.Context) error {
+				return bio.FilterData(ctx, fastqBarcoded, fastqBarcodedFiltered, func(data fastq.Read) bool {
+					_, ok := data.Optionals["dual_barcode"]
+					return ok
+				})
+			})
+		})
+		// Megamash is very expensive, so we spawn many works to do it.
+		errorGroup.Go(func() error {
+			return bio.RunWorkers(ctx, cpus, fastqBarcodedFilteredMegamashed, func(ctx context.Context) error {
+				return sequencing.MegamashFastq(ctx, m, fastqBarcodedFiltered, fastqBarcodedFilteredMegamashed)
+			})
+		})
+
+		/*
+			Step 4: Write to stdout
+		*/
+		// Setup seekable zstd
+		// Initialize the zstd encoder with desired settings
+		encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create zstd encoder: %v\n", err)
+			os.Exit(1)
+		}
+		defer encoder.Close()
+
+		// Create a seekable zstd writer on the temp file
+		writer, err := seekable.NewWriter(os.Stdout, encoder)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create seekable zstd writer: %v\n", err)
+			os.Exit(1)
+		}
+		// Now write to stdout
+		var indexes []ddidx.Index
+		var startPosition uint64
+		for read := range fastqBarcodedFilteredMegamashed {
+			writtenBytes, err := read.WriteTo(writer)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error writing to stdout: %v\n", err)
+				os.Exit(1)
+			}
+			identifierBytes, err := uuid.Parse(read.Identifier)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Identifier cannot be written as 16byte uuid: %s . Got error: %v\n", read.Identifier, err)
+				os.Exit(1)
+			}
+			indexes = append(indexes, ddidx.Index{Identifier: identifierBytes, StartPosition: startPosition, Length: uint64(writtenBytes)})
+			startPosition = startPosition + uint64(writtenBytes)
+		}
+		// Close the writer to flush the seek table
+		if err := writer.Close(); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to close seekable zstd writer: %v\n", err)
+			os.Exit(1)
+		}
+		// Now write ddidx file
+		for _, index := range indexes {
+			_, err := index.WriteTo(ddidxOutput)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error writing to ddidx: %v\n", err)
+				os.Exit(1)
+			}
+		}
+		return
+	},
+}
+
+func init() {
+	rootCmd.AddCommand(fastzCmd)
+
+	// Defining flags for primerSet and templateMap files, and the output location for the ddidx file
+	fastzCmd.Flags().String("primerSet", "", "Path to the primer set file")
+	fastzCmd.Flags().String("templateMap", "", "Path to the template map file")
+	fastzCmd.Flags().String("ddidxOutput", "", "Output location for the .ddidx index file")
+	fastzCmd.Flags().Uint("kmerSize", 16, "K-mer size for megamash")
+	fastzCmd.Flags().Uint("threshold", 10, "Threshold for megamash")
+	fastzCmd.Flags().Float64("score", 0.8, "Score for filtering")
+	defaultCPUs := runtime.NumCPU()
+	fastzCmd.Flags().Int("cpus", defaultCPUs, "Number of CPUs to use")
+
+	// Marking the flags as required
+	fastzCmd.MarkFlagRequired("primerSet")
+	fastzCmd.MarkFlagRequired("templateMap")
+	fastzCmd.MarkFlagRequired("ddidxOutput")
+}
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
new file mode 100644
index 0000000..7376031
--- /dev/null
+++ b/cli/cmd/root.go
@@ -0,0 +1,51 @@
+/*
+Copyright © 2024 NAME HERE <EMAIL ADDRESS>
+
+*/
+package cmd
+
+import (
+	"os"
+
+	"github.com/spf13/cobra"
+)
+
+
+
+// rootCmd represents the base command when called without any subcommands
+var rootCmd = &cobra.Command{
+	Use:   "cli",
+	Short: "A brief description of your application",
+	Long: `A longer description that spans multiple lines and likely contains
+examples and usage of using your application. For example:
+
+Cobra is a CLI library for Go that empowers applications.
+This application is a tool to generate the needed files
+to quickly create a Cobra application.`,
+	// Uncomment the following line if your bare application
+	// has an action associated with it:
+	// Run: func(cmd *cobra.Command, args []string) { },
+}
+
+// Execute adds all child commands to the root command and sets flags appropriately.
+// This is called by main.main(). It only needs to happen once to the rootCmd.
+func Execute() {
+	err := rootCmd.Execute()
+	if err != nil {
+		os.Exit(1)
+	}
+}
+
+func init() {
+	// Here you will define your flags and configuration settings.
+	// Cobra supports persistent flags, which, if defined here,
+	// will be global for your application.
+
+	// rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cli.yaml)")
+
+	// Cobra also supports local flags, which will only run
+	// when this action is called directly.
+	rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
+}
+
+
diff --git a/cli/go.mod b/cli/go.mod
new file mode 100644
index 0000000..dc0a8df
--- /dev/null
+++ b/cli/go.mod
@@ -0,0 +1,14 @@
+module github.com/koeng101/dnadesign/cli
+
+go 1.22.0
+
+require (
+	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/klauspost/compress v1.17.6 // indirect
+	github.com/spf13/cobra v1.8.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	gitlab.com/rackn/seekable-zstd v0.8.2 // indirect
+	gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 // indirect
+)
diff --git a/cli/go.sum b/cli/go.sum
new file mode 100644
index 0000000..5f68dc6
--- /dev/null
+++ b/cli/go.sum
@@ -0,0 +1,20 @@
+github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
+github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
+github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
+github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+gitlab.com/rackn/seekable-zstd v0.8.2 h1:ARgDc1HRo/HnNR2NHsAoaxW2fE0vo0d+yI1+WvrIjF0=
+gitlab.com/rackn/seekable-zstd v0.8.2/go.mod h1:9z8nf3qNXOi73VRm7KQgTmI3T0tz9YzDKKL7fzEBz9M=
+gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 h1:lXoXk/e9YrtTyWzNZs1ak/ijpwZQDaJLEwKCjhp/dCw=
+gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93/go.mod h1:pXhP0EyrEy0pGf2DW4vTKub/As/UiamLFaZ1Q9YaFTs=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/cli/main.go b/cli/main.go
new file mode 100644
index 0000000..3ce1f74
--- /dev/null
+++ b/cli/main.go
@@ -0,0 +1,11 @@
+/*
+Copyright © 2024 NAME HERE <EMAIL ADDRESS>
+
+*/
+package main
+
+import "github.com/koeng101/dnadesign/cli/cmd"
+
+func main() {
+	cmd.Execute()
+}
diff --git a/go.work b/go.work
index 2baeb4e..a99648e 100644
--- a/go.work
+++ b/go.work
@@ -1,6 +1,7 @@
 go 1.22.0
 
 use (
-    ./lib
-    ./external
-    )
+	./cli
+	./external
+	./lib
+)
diff --git a/lib/align/megamash/megamash.go b/lib/align/megamash/megamash.go
index 0893092..9c59fa4 100644
--- a/lib/align/megamash/megamash.go
+++ b/lib/align/megamash/megamash.go
@@ -32,7 +32,7 @@ func StandardizedDNA(sequence string) string {
 var (
 	DefaultKmerSize         uint    = 16
 	DefaultMinimalKmerCount uint    = 10
-	DefaultScoreThreshold   float64 = 0.2
+	DefaultScoreThreshold   float64 = 0.5
 )
 
 type MegamashMap struct {
diff --git a/lib/go.mod b/lib/go.mod
index 28b78fa..15e101f 100644
--- a/lib/go.mod
+++ b/lib/go.mod
@@ -4,7 +4,6 @@ go 1.22.0
 
 require (
 	github.com/google/go-cmp v0.6.0
+	github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117
 	golang.org/x/sync v0.5.0
 )
-
-require github.com/google/uuid v1.6.0 // indirect
diff --git a/lib/go.sum b/lib/go.sum
index f2f0d06..440d22d 100644
--- a/lib/go.sum
+++ b/lib/go.sum
@@ -1,6 +1,6 @@
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 h1:MLWgADbigSsAmDP3yG93ESlN0Ek9QLtH5uHigmWVXwg=
+github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117/go.mod h1:nb80z/jm5HMCxfNZ50cBJa5TffkXxpY9okvqnBj8RrM=
 golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
 golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=

From 56e484d09400f96955924ba0f9966923e48e752d Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Mon, 19 Feb 2024 20:21:49 -0800
Subject: [PATCH 07/16] go work

---
 cli/cmd/fastz.go               | 224 ---------------------------------
 cli/cmd/root.go                |  51 --------
 cli/go.mod                     |  14 ---
 cli/go.sum                     |  20 ---
 cli/main.go                    |  11 --
 go.work                        |   1 -
 lib/align/megamash/megamash.go |   2 +
 lib/bio/fastq/fastq.go         |  12 +-
 8 files changed, 12 insertions(+), 323 deletions(-)
 delete mode 100644 cli/cmd/fastz.go
 delete mode 100644 cli/cmd/root.go
 delete mode 100644 cli/go.mod
 delete mode 100644 cli/go.sum
 delete mode 100644 cli/main.go

diff --git a/cli/cmd/fastz.go b/cli/cmd/fastz.go
deleted file mode 100644
index 80a57d6..0000000
--- a/cli/cmd/fastz.go
+++ /dev/null
@@ -1,224 +0,0 @@
-package cmd
-
-import (
-	"context"
-	"encoding/csv"
-	"fmt"
-	"io"
-	"os"
-	"runtime"
-
-	"github.com/google/uuid"
-	"github.com/klauspost/compress/zstd"
-	"github.com/koeng101/dnadesign/lib/align/megamash"
-	"github.com/koeng101/dnadesign/lib/bio"
-	"github.com/koeng101/dnadesign/lib/bio/ddidx"
-	"github.com/koeng101/dnadesign/lib/bio/fasta"
-	"github.com/koeng101/dnadesign/lib/bio/fastq"
-	"github.com/koeng101/dnadesign/lib/sequencing"
-	"github.com/koeng101/dnadesign/lib/sequencing/barcoding"
-	"github.com/spf13/cobra"
-	"gitlab.com/rackn/seekable-zstd"
-	"golang.org/x/sync/errgroup"
-)
-
-// fastzCmd represents the fastz command
-var fastzCmd = &cobra.Command{
-	Use:   "fastz",
-	Short: "Compresses FASTQ files using zstd with additional indexing",
-	Long: `fastz is a tool for compressing FASTQ files using zstd compression, while also generating a .ddidx index file.
-The command requires a primer set file and a template map file to function properly. The output is a zstd compressed FASTQ file streamed to stdout, and a .ddidx index file is generated at the specified output location.
-
-This command also supports optional parameters for adjusting the k-mer size and threshold used in megamash, as well as a score parameter for filtering.
-
-Usage example:
-cat input.fastq | ./dnadesign fastz --primerSet path/to/primerSet --templateMap path/to/templateMap --ddidxOutput path/to/output.ddidx --kmerSize 16 --threshold 10 --score 0.8 > output.fastq.zstd`,
-	Run: func(cmd *cobra.Command, args []string) {
-		// You can retrieve the flag values here and add your logic for processing the FASTQ file
-		primerSetCsvLocation, _ := cmd.Flags().GetString("primerSet")
-		templateMapLocation, _ := cmd.Flags().GetString("templateMap")
-		ddidxOutputLocation, _ := cmd.Flags().GetString("ddidxOutput")
-		kmerSize, _ := cmd.Flags().GetUint("kmerSize")
-		threshold, _ := cmd.Flags().GetUint("threshold")
-		score, _ := cmd.Flags().GetFloat64("score")
-		cpus, _ := cmd.Flags().GetInt("cpus")
-
-		// Open the primerSet CSV file
-		primerSetCsv, err := os.Open(primerSetCsvLocation)
-		if err != nil {
-			// Handle error
-			fmt.Println("Error opening primer set CSV:", err)
-			return
-		}
-		defer primerSetCsv.Close() // Make sure to close the file when you're done
-
-		// Open the templateMap file
-		templateMap, err := os.Open(templateMapLocation)
-		if err != nil {
-			// Handle error
-			fmt.Println("Error opening template map:", err)
-			return
-		}
-		defer templateMap.Close() // Make sure to close the file when you're done
-
-		// Create/Open the ddidxOutput file for writing
-		// If you only need to write to it, use os.Create to create or truncate an existing file
-		ddidxOutput, err := os.Create(ddidxOutputLocation)
-		if err != nil {
-			// Handle error
-			fmt.Println("Error creating/opening ddidx output file:", err)
-			return
-		}
-		defer ddidxOutput.Close() // Make sure to close the file when you're done
-
-		/*
-			Step 1: Parse initial data sets
-		*/
-		// Read primer set
-		primerSet, err := barcoding.ParseDualPrimerSet(primerSetCsv)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error parsing primerset: %v\n", err)
-			os.Exit(1)
-		}
-
-		// Read template map
-		var templates []fasta.Record
-		reader := csv.NewReader(templateMap)
-
-		for {
-			// Read each record from csv
-			record, err := reader.Read()
-			// Break the loop at the end of the file
-			if err == io.EOF {
-				break
-			}
-			// Handle any other error
-			if err != nil {
-				fmt.Fprintf(os.Stderr, "Error parsing templateMap: %v\n", err)
-				os.Exit(1)
-			}
-
-			if len(record) == 2 {
-				templates = append(templates, fasta.Record{Identifier: record[0], Sequence: record[1]})
-			}
-		}
-
-		/*
-			Step 2: setup megamash
-		*/
-		m, err := megamash.NewMegamashMap(templates, kmerSize, threshold, score)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error creating megamash: %v\n", err)
-			os.Exit(1)
-		}
-
-		/*
-			Step 3: setup concurrent processing.
-		*/
-		parser := bio.NewFastqParser(os.Stdin)
-		ctx := context.Background()
-		errorGroup, ctx := errgroup.WithContext(ctx)
-
-		fastqReads := make(chan fastq.Read)
-		fastqBarcoded := make(chan fastq.Read)
-		fastqBarcodedFiltered := make(chan fastq.Read)
-		fastqBarcodedFilteredMegamashed := make(chan fastq.Read)
-
-		// Read fastqs into channel
-		errorGroup.Go(func() error {
-			return parser.ParseToChannel(ctx, fastqReads, false)
-		})
-		// Barcoding can be an expensive operation
-		errorGroup.Go(func() error {
-			// We're going to start multiple workers within this errorGroup. This
-			// helps when doing computationally intensive operations on channels.
-			return bio.RunWorkers(ctx, cpus, fastqBarcoded, func(ctx context.Context) error {
-				return sequencing.DualBarcodeFastq(ctx, primerSet, fastqReads, fastqBarcoded)
-			})
-		})
-		// Filtering is a cheap operation, so we only have 1 worker doing it.
-		errorGroup.Go(func() error {
-			return bio.RunWorkers(ctx, 1, fastqBarcodedFiltered, func(ctx context.Context) error {
-				return bio.FilterData(ctx, fastqBarcoded, fastqBarcodedFiltered, func(data fastq.Read) bool {
-					_, ok := data.Optionals["dual_barcode"]
-					return ok
-				})
-			})
-		})
-		// Megamash is very expensive, so we spawn many works to do it.
-		errorGroup.Go(func() error {
-			return bio.RunWorkers(ctx, cpus, fastqBarcodedFilteredMegamashed, func(ctx context.Context) error {
-				return sequencing.MegamashFastq(ctx, m, fastqBarcodedFiltered, fastqBarcodedFilteredMegamashed)
-			})
-		})
-
-		/*
-			Step 4: Write to stdout
-		*/
-		// Setup seekable zstd
-		// Initialize the zstd encoder with desired settings
-		encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Failed to create zstd encoder: %v\n", err)
-			os.Exit(1)
-		}
-		defer encoder.Close()
-
-		// Create a seekable zstd writer on the temp file
-		writer, err := seekable.NewWriter(os.Stdout, encoder)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Failed to create seekable zstd writer: %v\n", err)
-			os.Exit(1)
-		}
-		// Now write to stdout
-		var indexes []ddidx.Index
-		var startPosition uint64
-		for read := range fastqBarcodedFilteredMegamashed {
-			writtenBytes, err := read.WriteTo(writer)
-			if err != nil {
-				fmt.Fprintf(os.Stderr, "Error writing to stdout: %v\n", err)
-				os.Exit(1)
-			}
-			identifierBytes, err := uuid.Parse(read.Identifier)
-			if err != nil {
-				fmt.Fprintf(os.Stderr, "Identifier cannot be written as 16byte uuid: %s . Got error: %v\n", read.Identifier, err)
-				os.Exit(1)
-			}
-			indexes = append(indexes, ddidx.Index{Identifier: identifierBytes, StartPosition: startPosition, Length: uint64(writtenBytes)})
-			startPosition = startPosition + uint64(writtenBytes)
-		}
-		// Close the writer to flush the seek table
-		if err := writer.Close(); err != nil {
-			fmt.Fprintf(os.Stderr, "Failed to close seekable zstd writer: %v\n", err)
-			os.Exit(1)
-		}
-		// Now write ddidx file
-		for _, index := range indexes {
-			_, err := index.WriteTo(ddidxOutput)
-			if err != nil {
-				fmt.Fprintf(os.Stderr, "Error writing to ddidx: %v\n", err)
-				os.Exit(1)
-			}
-		}
-		return
-	},
-}
-
-func init() {
-	rootCmd.AddCommand(fastzCmd)
-
-	// Defining flags for primerSet and templateMap files, and the output location for the ddidx file
-	fastzCmd.Flags().String("primerSet", "", "Path to the primer set file")
-	fastzCmd.Flags().String("templateMap", "", "Path to the template map file")
-	fastzCmd.Flags().String("ddidxOutput", "", "Output location for the .ddidx index file")
-	fastzCmd.Flags().Uint("kmerSize", 16, "K-mer size for megamash")
-	fastzCmd.Flags().Uint("threshold", 10, "Threshold for megamash")
-	fastzCmd.Flags().Float64("score", 0.8, "Score for filtering")
-	defaultCPUs := runtime.NumCPU()
-	fastzCmd.Flags().Int("cpus", defaultCPUs, "Number of CPUs to use")
-
-	// Marking the flags as required
-	fastzCmd.MarkFlagRequired("primerSet")
-	fastzCmd.MarkFlagRequired("templateMap")
-	fastzCmd.MarkFlagRequired("ddidxOutput")
-}
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
deleted file mode 100644
index 7376031..0000000
--- a/cli/cmd/root.go
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
-Copyright © 2024 NAME HERE <EMAIL ADDRESS>
-
-*/
-package cmd
-
-import (
-	"os"
-
-	"github.com/spf13/cobra"
-)
-
-
-
-// rootCmd represents the base command when called without any subcommands
-var rootCmd = &cobra.Command{
-	Use:   "cli",
-	Short: "A brief description of your application",
-	Long: `A longer description that spans multiple lines and likely contains
-examples and usage of using your application. For example:
-
-Cobra is a CLI library for Go that empowers applications.
-This application is a tool to generate the needed files
-to quickly create a Cobra application.`,
-	// Uncomment the following line if your bare application
-	// has an action associated with it:
-	// Run: func(cmd *cobra.Command, args []string) { },
-}
-
-// Execute adds all child commands to the root command and sets flags appropriately.
-// This is called by main.main(). It only needs to happen once to the rootCmd.
-func Execute() {
-	err := rootCmd.Execute()
-	if err != nil {
-		os.Exit(1)
-	}
-}
-
-func init() {
-	// Here you will define your flags and configuration settings.
-	// Cobra supports persistent flags, which, if defined here,
-	// will be global for your application.
-
-	// rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cli.yaml)")
-
-	// Cobra also supports local flags, which will only run
-	// when this action is called directly.
-	rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
-}
-
-
diff --git a/cli/go.mod b/cli/go.mod
deleted file mode 100644
index dc0a8df..0000000
--- a/cli/go.mod
+++ /dev/null
@@ -1,14 +0,0 @@
-module github.com/koeng101/dnadesign/cli
-
-go 1.22.0
-
-require (
-	github.com/cespare/xxhash/v2 v2.2.0 // indirect
-	github.com/google/uuid v1.6.0 // indirect
-	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/klauspost/compress v1.17.6 // indirect
-	github.com/spf13/cobra v1.8.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
-	gitlab.com/rackn/seekable-zstd v0.8.2 // indirect
-	gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 // indirect
-)
diff --git a/cli/go.sum b/cli/go.sum
deleted file mode 100644
index 5f68dc6..0000000
--- a/cli/go.sum
+++ /dev/null
@@ -1,20 +0,0 @@
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
-github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
-github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
-github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
-github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
-github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-gitlab.com/rackn/seekable-zstd v0.8.2 h1:ARgDc1HRo/HnNR2NHsAoaxW2fE0vo0d+yI1+WvrIjF0=
-gitlab.com/rackn/seekable-zstd v0.8.2/go.mod h1:9z8nf3qNXOi73VRm7KQgTmI3T0tz9YzDKKL7fzEBz9M=
-gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93 h1:lXoXk/e9YrtTyWzNZs1ak/ijpwZQDaJLEwKCjhp/dCw=
-gitlab.com/rackn/simplecache v0.0.0-20230324193231-44368de53d93/go.mod h1:pXhP0EyrEy0pGf2DW4vTKub/As/UiamLFaZ1Q9YaFTs=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/cli/main.go b/cli/main.go
deleted file mode 100644
index 3ce1f74..0000000
--- a/cli/main.go
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
-Copyright © 2024 NAME HERE <EMAIL ADDRESS>
-
-*/
-package main
-
-import "github.com/koeng101/dnadesign/cli/cmd"
-
-func main() {
-	cmd.Execute()
-}
diff --git a/go.work b/go.work
index a99648e..b747922 100644
--- a/go.work
+++ b/go.work
@@ -1,7 +1,6 @@
 go 1.22.0
 
 use (
-	./cli
 	./external
 	./lib
 )
diff --git a/lib/align/megamash/megamash.go b/lib/align/megamash/megamash.go
index 9c59fa4..3873d0c 100644
--- a/lib/align/megamash/megamash.go
+++ b/lib/align/megamash/megamash.go
@@ -11,6 +11,7 @@ package megamash
 import (
 	"encoding/json"
 	"fmt"
+	"strings"
 
 	"github.com/koeng101/dnadesign/lib/bio/fasta"
 	"github.com/koeng101/dnadesign/lib/transform"
@@ -19,6 +20,7 @@ import (
 // StandardizedDNA returns the alphabetically lesser strand of a double
 // stranded DNA molecule.
 func StandardizedDNA(sequence string) string {
+	sequence = strings.ToUpper(sequence)
 	var deterministicSequence string
 	reverseComplement := transform.ReverseComplement(sequence)
 	if sequence > reverseComplement {
diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go
index b63fa23..2b1d8df 100644
--- a/lib/bio/fastq/fastq.go
+++ b/lib/bio/fastq/fastq.go
@@ -16,6 +16,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"sort"
 	"strings"
 )
 
@@ -209,8 +210,15 @@ func (read *Read) WriteTo(w io.Writer) (int64, error) {
 	if err != nil {
 		return writtenBytes, err
 	}
-	for key, val := range read.Optionals {
-		newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, val)
+	keys := make([]string, len(read.Optionals))
+	i := 0
+	for key := range read.Optionals {
+		keys[i] = key
+		i++
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, read.Optionals[key])
 		writtenBytes += int64(newWrittenBytes)
 		if err != nil {
 			return writtenBytes, err

From 80d3119e48164ff1ec6f8e843e140466c350a7b8 Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Mon, 19 Feb 2024 20:22:55 -0800
Subject: [PATCH 08/16] Remove stale

---
 .github/workflows/stale.yml | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 .github/workflows/stale.yml

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index fb99c6b..0000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
-#
-# You can adjust the behavior by modifying this file.
-# For more information, see:
-# https://github.com/actions/stale
-name: Mark stale issues and pull requests
-
-on:
-  schedule:
-  - cron: '25 18 * * *'
-
-jobs:
-  stale:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-      pull-requests: write
-    steps:
-    - uses: actions/stale@v5
-      with:
-        repo-token: ${{ secrets.GITHUB_TOKEN }}
-        stale-issue-message: 'This issue has had no activity in the past 2 months. Marking as `stale`.'
-        stale-pr-message: 'This PR has had no activity in the past 2 months. Marking as `stale`.'
-        stale-issue-label: 'stale'
-        stale-pr-label: 'stale'
-        days-before-stale: 60
-        days-before-close: -1

From cf42b4b6cc3c6a16967a5fc19ba8a8979d45e2ae Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Mon, 19 Feb 2024 20:23:47 -0800
Subject: [PATCH 09/16] make linter happy

---
 lib/bio/bio.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bio/bio.go b/lib/bio/bio.go
index 29adc87..98cdf0c 100644
--- a/lib/bio/bio.go
+++ b/lib/bio/bio.go
@@ -381,7 +381,7 @@ func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.R
 		}
 		dataBytes = dataBytes[:len(dataBytes)-1]
 	}
-	if int(n) != len(dataBytes) {
+	if n != len(dataBytes) {
 		return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n)
 	}
 	parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n)

From 34df6e51d6a4387c1348b32a5ce2cadc8d53803e Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Mon, 19 Feb 2024 20:30:39 -0800
Subject: [PATCH 10/16] Updated samtools for linter

---
 external/samtools/samtools.go       | 12 ++++++------
 external/samtools/samtools_test.go  |  4 +++-
 lib/align/megamash/megamash_test.go | 17 +++++++++--------
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/external/samtools/samtools.go b/external/samtools/samtools.go
index da977e6..d78e33b 100644
--- a/external/samtools/samtools.go
+++ b/external/samtools/samtools.go
@@ -23,7 +23,7 @@ import (
 // The first samtools view removes unmapped sequences, the sort sorts the
 // sequences for piping into pileup, and the final command builds the pileup
 // file.
-func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
+func Pileup(ctx context.Context, templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
 	/*
 		Due to how os.exec works in Golang, we can't directly have pipes as if
 		the whole thing was a script. However, we can attach pipes to each
@@ -50,7 +50,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
 	}
 	tmpFile.Close() // Close the file as it's no longer needed
 
-	g, ctx := errgroup.WithContext(context.Background())
+	g, ctx := errgroup.WithContext(ctx)
 
 	// Setup pipe connections between commands
 	viewSortReader, viewSortWriter := io.Pipe()
@@ -74,7 +74,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
 
 		select {
 		case <-ctx.Done():
-			viewCmd.Process.Signal(syscall.SIGTERM)
+			_ = viewCmd.Process.Signal(syscall.SIGTERM)
 			return ctx.Err()
 		default:
 			return viewCmd.Wait()
@@ -94,7 +94,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
 
 		select {
 		case <-ctx.Done():
-			sortCmd.Process.Signal(syscall.SIGTERM)
+			_ = sortCmd.Process.Signal(syscall.SIGTERM)
 			return ctx.Err()
 		default:
 			return sortCmd.Wait()
@@ -112,7 +112,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
 
 		select {
 		case <-ctx.Done():
-			mpileupCmd.Process.Signal(syscall.SIGTERM)
+			_ = mpileupCmd.Process.Signal(syscall.SIGTERM)
 			return ctx.Err()
 		default:
 			return mpileupCmd.Wait()
@@ -149,7 +149,7 @@ func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-ch
 
 	// Run Pileup function in a goroutine
 	g.Go(func() error {
-		return Pileup(templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
+		return Pileup(ctx, templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
 	})
 
 	// Wait for all goroutines in the group to finish
diff --git a/external/samtools/samtools_test.go b/external/samtools/samtools_test.go
index 8e95930..c75aadf 100644
--- a/external/samtools/samtools_test.go
+++ b/external/samtools/samtools_test.go
@@ -2,6 +2,7 @@ package samtools_test
 
 import (
 	"bytes"
+	"context"
 	"os"
 	"testing"
 
@@ -28,7 +29,8 @@ func TestPileup(t *testing.T) {
 	var buf bytes.Buffer
 
 	// Execute the pileup function
-	err = samtools.Pileup(templateFile, samFile, &buf)
+	ctx := context.Background()
+	err = samtools.Pileup(ctx, templateFile, samFile, &buf)
 	if err != nil {
 		t.Errorf("Pileup returned error: %s", err)
 	}
diff --git a/lib/align/megamash/megamash_test.go b/lib/align/megamash/megamash_test.go
index 5e53c0f..3aeaaa1 100644
--- a/lib/align/megamash/megamash_test.go
+++ b/lib/align/megamash/megamash_test.go
@@ -1,8 +1,9 @@
-package megamash
+package megamash_test
 
 import (
 	"testing"
 
+	"github.com/koeng101/dnadesign/lib/align/megamash"
 	"github.com/koeng101/dnadesign/lib/bio/fasta"
 )
 
@@ -12,7 +13,7 @@ func TestMegamash(t *testing.T) {
 	oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"
 
 	samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
-	m, err := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
+	m, err := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
 	if err != nil {
 		t.Errorf("Failed to make NewMegamashMap: %s", err)
 	}
@@ -31,8 +32,8 @@ func BenchmarkMegamash(b *testing.B) {
 		oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"
 
 		samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
-		m, _ := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
-			DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
+		m, _ := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
+			megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
 		for _, sample := range samples {
 			_ = m.Match(sample)
 		}
@@ -41,24 +42,24 @@ func BenchmarkMegamash(b *testing.B) {
 
 func TestMatchesConversion(t *testing.T) {
 	// Initial slice of Match structs
-	matches := []Match{
+	matches := []megamash.Match{
 		{"match1", 90.1},
 		{"match2", 85.5},
 	}
 	// Convert matches to JSON string
-	jsonStr, err := MatchesToJSON(matches)
+	jsonStr, err := megamash.MatchesToJSON(matches)
 	if err != nil {
 		t.Fatalf("MatchesToJSON failed with error: %v", err)
 	}
 
 	// Convert JSON string back to slice of Match structs
-	convertedMatches, err := JSONToMatches(jsonStr)
+	convertedMatches, err := megamash.JSONToMatches(jsonStr)
 	if err != nil {
 		t.Fatalf("JSONToMatches failed with error: %v", err)
 	}
 
 	// Convert the convertedMatches back to JSON to compare strings
-	convertedJSONStr, err := MatchesToJSON(convertedMatches)
+	convertedJSONStr, err := megamash.MatchesToJSON(convertedMatches)
 	if err != nil {
 		t.Fatalf("MatchesToJSON failed with error: %v", err)
 	}

From 411df738ed66596255e3fb56f9386a95d87ecccf Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Mon, 19 Feb 2024 20:49:38 -0800
Subject: [PATCH 11/16] Updated to fix linter problems

---
 lib/bio/fastq/fastq.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/bio/fastq/fastq.go b/lib/bio/fastq/fastq.go
index 2b1d8df..160cc06 100644
--- a/lib/bio/fastq/fastq.go
+++ b/lib/bio/fastq/fastq.go
@@ -43,10 +43,10 @@ type Read struct {
 
 // DeepCopy deep copies a read. Used for when you want to modify optionals then
 // pipe elsewhere.
-func (r *Read) DeepCopy() Read {
-	newRead := Read{Identifier: r.Identifier, Sequence: r.Sequence, Quality: r.Quality}
+func (read *Read) DeepCopy() Read {
+	newRead := Read{Identifier: read.Identifier, Sequence: read.Sequence, Quality: read.Quality}
 	newRead.Optionals = make(map[string]string)
-	for key, value := range r.Optionals {
+	for key, value := range read.Optionals {
 		newRead.Optionals[key] = value
 	}
 	return newRead

From 09f9686b686d309664e7a3f1db766735cc5a6702 Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Sat, 24 Feb 2024 21:08:31 -0800
Subject: [PATCH 12/16] add fastz temporarily

---
 go.work | 1 +
 1 file changed, 1 insertion(+)

diff --git a/go.work b/go.work
index b747922..ebe5368 100644
--- a/go.work
+++ b/go.work
@@ -2,5 +2,6 @@ go 1.22.0
 
 use (
 	./external
+	./fastz
 	./lib
 )

From 5cf4eafb1fe433e0a38fb9f69212da3e169a8cd1 Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Wed, 28 Feb 2024 12:50:50 -0800
Subject: [PATCH 13/16] added changes to minimap2

---
 external/minimap2/minimap2.go | 3 +--
 go.work                       | 1 -
 lib/bio/pileup/pileup.go      | 1 +
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/external/minimap2/minimap2.go b/external/minimap2/minimap2.go
index 578d24c..91ac73e 100644
--- a/external/minimap2/minimap2.go
+++ b/external/minimap2/minimap2.go
@@ -82,8 +82,7 @@ func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) e
 }
 
 // Minimap2Channeled uses channels rather than io.Reader and io.Writers.
-func Minimap2Channeled(fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
-	ctx := context.Background()
+func Minimap2Channeled(ctx context.Context, fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
 	g, ctx := errgroup.WithContext(ctx)
 
 	// Create a pipe for writing fastq reads and reading them as an io.Reader
diff --git a/go.work b/go.work
index ebe5368..b747922 100644
--- a/go.work
+++ b/go.work
@@ -2,6 +2,5 @@ go 1.22.0
 
 use (
 	./external
-	./fastz
 	./lib
 )
diff --git a/lib/bio/pileup/pileup.go b/lib/bio/pileup/pileup.go
index e87cfd9..cfed71c 100644
--- a/lib/bio/pileup/pileup.go
+++ b/lib/bio/pileup/pileup.go
@@ -39,6 +39,7 @@ import (
 	"bufio"
 	"fmt"
 	"io"
+	"regexp"
 	"strconv"
 	"strings"
 	"unicode"

From dd222b58407ebeefcb4775ca3f373092aa0be56d Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Wed, 28 Feb 2024 12:53:33 -0800
Subject: [PATCH 14/16] remove ddidx

---
 lib/bio/bio.go              | 44 --------------------
 lib/bio/ddidx/ddidx.go      | 83 -------------------------------------
 lib/bio/ddidx/ddidx_test.go | 44 --------------------
 lib/bio/example_test.go     | 37 -----------------
 4 files changed, 208 deletions(-)
 delete mode 100644 lib/bio/ddidx/ddidx.go
 delete mode 100644 lib/bio/ddidx/ddidx_test.go

diff --git a/lib/bio/bio.go b/lib/bio/bio.go
index 98cdf0c..09eb0ff 100644
--- a/lib/bio/bio.go
+++ b/lib/bio/bio.go
@@ -11,10 +11,8 @@ package bio
 
 import (
 	"bufio"
-	"bytes"
 	"context"
 	"errors"
-	"fmt"
 	"io"
 	"math"
 
@@ -352,45 +350,3 @@ func FilterData[Data DataTypes](ctx context.Context, input <-chan Data, output c
 		}
 	}
 }
-
-/*
-
-We have FromIndex parsers for data types that need it:
-- genbank
-- fasta
-- fastq
-- slow5
-- sam
-
-We do not have them for data types that do not need it:
-- pileup.Line [always small]
-- uniprot.Entry [file type too complex]
-*/
-
-// Indexable is an interface for DataTypes to satisfy if they are indexable.
-type Indexable interface {
-	Identifier() string
-}
-
-func FastqFromIndex(r io.ReaderAt, startPosition uint64, length uint64) (fastq.Read, error) {
-	dataBytes := make([]byte, length)
-	n, err := r.ReadAt(dataBytes, int64(startPosition))
-	if err != nil {
-		if !errors.Is(err, io.EOF) {
-			return fastq.Read{}, err
-		}
-		dataBytes = dataBytes[:len(dataBytes)-1]
-	}
-	if n != len(dataBytes) {
-		return fastq.Read{}, fmt.Errorf("Failed to retrieve correct number of bytes Note expected may be off by 1 if at EOF. Expected: %d, Got: %d", len(dataBytes), n)
-	}
-	parser := NewFastqParserWithMaxLineLength(bytes.NewReader(dataBytes), n)
-	fastqRead, err := parser.Next()
-	if err != nil {
-		if errors.Is(err, io.EOF) {
-			err = nil // EOF not treated as parsing error.
-		}
-		return fastqRead, err
-	}
-	return fastqRead, nil
-}
diff --git a/lib/bio/ddidx/ddidx.go b/lib/bio/ddidx/ddidx.go
deleted file mode 100644
index 1accfcc..0000000
--- a/lib/bio/ddidx/ddidx.go
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
-Package ddidx contains information about the dnadesign index format.
-*/
-package ddidx
-
-import (
-	"encoding/binary"
-	"errors"
-	"io"
-)
-
-// Index is a 32 byte index for individual objects.
-type Index struct {
-	Identifier    [16]byte
-	StartPosition uint64
-	Length        uint64
-}
-
-// WriteTo writes the binary representation of the Index to the given writer.
-// It returns the number of bytes written and any error encountered.
-func (i *Index) WriteTo(w io.Writer) (int64, error) {
-	// The total bytes written
-	var totalBytes int64
-
-	// Write Identifier
-	n, err := w.Write(i.Identifier[:])
-	totalBytes += int64(n)
-	if err != nil {
-		return totalBytes, err
-	}
-
-	// Create a buffer to write the uint64 values
-	buf := make([]byte, 8)
-
-	// Write StartPosition
-	binary.BigEndian.PutUint64(buf, i.StartPosition)
-	n, err = w.Write(buf)
-	totalBytes += int64(n)
-	if err != nil {
-		return totalBytes, err
-	}
-
-	// Write Length
-	binary.BigEndian.PutUint64(buf, i.Length)
-	n, err = w.Write(buf)
-	totalBytes += int64(n)
-	if err != nil {
-		return totalBytes, err
-	}
-
-	return totalBytes, nil
-}
-
-// ReadIndexes reads and returns a list of Index structs from the given reader.
-func ReadIndexes(r io.Reader) ([]Index, error) {
-	var indexes []Index
-
-	for {
-		var idx Index
-
-		// Read Identifier
-		if _, err := io.ReadFull(r, idx.Identifier[:]); err != nil {
-			if errors.Is(err, io.EOF) {
-				break // End of file, stop reading
-			}
-			return indexes, err
-		}
-
-		// Read StartPosition
-		if err := binary.Read(r, binary.BigEndian, &idx.StartPosition); err != nil {
-			return indexes, err
-		}
-
-		// Read Length
-		if err := binary.Read(r, binary.BigEndian, &idx.Length); err != nil {
-			return indexes, err
-		}
-
-		indexes = append(indexes, idx)
-	}
-
-	return indexes, nil
-}
diff --git a/lib/bio/ddidx/ddidx_test.go b/lib/bio/ddidx/ddidx_test.go
deleted file mode 100644
index 2ff82f0..0000000
--- a/lib/bio/ddidx/ddidx_test.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package ddidx
-
-import (
-	"bytes"
-	"reflect"
-	"testing"
-)
-
-func TestIndexWriteToAndReadIndexes(t *testing.T) {
-	// Prepare a slice of Index instances for testing
-	indexes := []Index{
-		{
-			Identifier:    [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-			StartPosition: 100,
-			Length:        200,
-		},
-		{
-			Identifier:    [16]byte{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
-			StartPosition: 300,
-			Length:        400,
-		},
-	}
-
-	// Create a buffer to write the indexes to
-	var buf bytes.Buffer
-
-	// Write each index to the buffer
-	for _, idx := range indexes {
-		if _, err := idx.WriteTo(&buf); err != nil {
-			t.Fatalf("WriteTo failed: %v", err)
-		}
-	}
-
-	// Now read the indexes back from the buffer
-	readIndexes, err := ReadIndexes(&buf)
-	if err != nil {
-		t.Fatalf("ReadIndexes failed: %v", err)
-	}
-
-	// Compare the original indexes with the ones read back
-	if !reflect.DeepEqual(indexes, readIndexes) {
-		t.Errorf("Original indexes %+v do not match read indexes %+v", indexes, readIndexes)
-	}
-}
diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go
index e79fead..3f7e471 100644
--- a/lib/bio/example_test.go
+++ b/lib/bio/example_test.go
@@ -9,7 +9,6 @@ import (
 	"strings"
 
 	"github.com/koeng101/dnadesign/lib/bio"
-	"github.com/koeng101/dnadesign/lib/bio/ddidx"
 	"github.com/koeng101/dnadesign/lib/bio/fasta"
 	"github.com/koeng101/dnadesign/lib/bio/fastq"
 	"github.com/koeng101/dnadesign/lib/bio/sam"
@@ -485,39 +484,3 @@ $%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFF
 	fmt.Println(reads[0].Identifier)
 	// Output: af86ed57-1cfe-486f-8205-b2c8d1186454
 }
-
-func ExampleFastqFromIndex() {
-	file := strings.NewReader(`@289a197e-4c05-4143-80e6-488e23044378 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=34575 ch=111 start_time=2023-12-29T16:06:13.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode06 barcode_alias=barcode06 parent_read_id=289a197e-4c05-4143-80e6-488e23044378 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0
-TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG
-+
-$%%&%%$$%&'+)**,-+)))+866788711112=>A?@@@BDB@>?746@?>A@D2@970,-+..*++++;662/.-.+,++,//+167>A@A@@B=<887-,'&&%%&''((5555644578::<==B?ABCIJA>>>>@DCAA99::<BAA@-----DECJEDDEGEFHE;;;:;;:88754998989998887,-<<;<>>=<<<=67777+***)//+,,+)&&&+--.02:>442000/1225:=D?=<<=7;866/..../AAA226545+&%%$$
-@af86ed57-1cfe-486f-8205-b2c8d1186454 runid=bb4427242f6da39e67293199a11c6c4b6ab2b141 read=2233 ch=123 start_time=2023-12-29T10:04:32.719061-08:00 flow_cell_id=AQY258 protocol_group_id=nseq28 sample_id=build3-build3gg-u11 barcode=barcode07 barcode_alias=barcode07 parent_read_id=af86ed57-1cfe-486f-8205-b2c8d1186454 basecall_model_version_id=dna_r10.4.1_e8.2_400bps_sup@v4.2.0
-TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA
-+
-$%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*`)
-
-	parser := bio.NewFastqParser(file)
-	reads, _ := parser.Parse()
-
-	// Create a ddidx
-	var indexes []ddidx.Index
-
-	// Write the files to an io.Writer.
-	// All headers and all records implement io.WriterTo interfaces.
-	var buffer bytes.Buffer
-	var startPosition uint64
-	for _, read := range reads {
-		// Normally, you would want to actually parse uuidBytes, but we don't care here.
-		var uuidBytes [16]byte
-		copy(uuidBytes[:], read.Identifier[:16])
-		length, _ := read.WriteTo(&buffer)
-		indexes = append(indexes, ddidx.Index{Identifier: uuidBytes, StartPosition: startPosition, Length: uint64(length)})
-		startPosition = startPosition + uint64(length)
-	}
-
-	// Now, read a fastq from an index
-	read, _ := bio.FastqFromIndex(file, indexes[1].StartPosition, indexes[1].Length)
-
-	fmt.Println(read.Quality)
-	// Output: $%&$$$$$#')+)+,<>@B?>==<>>;;<<<B??>?@DA@?=>==>??<>??7;<706=>=>CBCCB????@CCBDAGFFFGJ<<<<<=54455>@?>:::9..++?@BDCCDCGECFHD@>=<<==>@@B@?@@>>>==>>===>>>A?@ADFGDCA@?????CCCEFDDDDDGJODAA@A;;ABBD<=<:92222223:>>@?@@B?@=<62212=<<<=>AAB=<'&&&'-,-.,**)'&'(,,,-.114888&&&&&'+++++,,*
-}

From c1b17cc494301dbc173caa366b64d6a3c669bf7d Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Wed, 28 Feb 2024 12:55:18 -0800
Subject: [PATCH 15/16] add work in progress to sequencing.go

---
 lib/sequencing/sequencing.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/sequencing/sequencing.go b/lib/sequencing/sequencing.go
index f600175..0839139 100644
--- a/lib/sequencing/sequencing.go
+++ b/lib/sequencing/sequencing.go
@@ -1,5 +1,7 @@
 /*
 Package sequencing contains functions associated with handling sequencing data.
+
+This is a work-in-progess, and not ready for production.
 */
 package sequencing
 

From 1de5d7559bd7c2623d3fa6a9bd67191db88b05ec Mon Sep 17 00:00:00 2001
From: Keoni Gandall <koeng101@gmail.com>
Date: Wed, 28 Feb 2024 13:13:18 -0800
Subject: [PATCH 16/16] Fix pileup

---
 lib/bio/pileup/pileup.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/bio/pileup/pileup.go b/lib/bio/pileup/pileup.go
index cfed71c..e87cfd9 100644
--- a/lib/bio/pileup/pileup.go
+++ b/lib/bio/pileup/pileup.go
@@ -39,7 +39,6 @@ import (
 	"bufio"
 	"fmt"
 	"io"
-	"regexp"
 	"strconv"
 	"strings"
 	"unicode"