Skip to content

Commit

Permalink
Add priorityqueue
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Mar 19, 2024
1 parent 88a217a commit 654158c
Showing 1 changed file with 90 additions and 18 deletions.
108 changes: 90 additions & 18 deletions vectorstore/in_memory.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package vectorstore

import (
"container/heap"
"context"
"sort"

"github.com/hupe1980/golc/internal/util"
"github.com/hupe1980/golc/metric"
Expand All @@ -19,13 +19,64 @@ type InMemoryItem struct {
Metadata map[string]any `json:"metadata"`
}

// priorityQueueItem represents an item in the priority queue.
type priorityQueueItem struct {
Data InMemoryItem // Data associated with the item
distance float32 // Distance from the query vector

index int // Index of the item in the priority queue
}

// priorityQueue is a priority queue for InMemoryItem items.
type priorityQueue []*priorityQueueItem

// Len returns the length of the priority queue.
func (pq priorityQueue) Len() int { return len(pq) }

// Less reports whether the element with index i should sort before the element with index j.
func (pq priorityQueue) Less(i, j int) bool { return pq[i].distance < pq[j].distance }

// Swap swaps the elements with indexes i and j.
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].index = i
pq[j].index = j
}

// Push adds an item to the priority queue.
func (pq *priorityQueue) Push(x any) {
n := len(*pq)
item, _ := x.(*priorityQueueItem)
item.index = n
*pq = append(*pq, item)
}

// Pop removes and returns the item with the highest priority (distance).
func (pq *priorityQueue) Pop() any {
old := *pq
n := len(old)
item := old[n-1]
item.index = -1
*pq = old[0 : n-1]

return item
}

// Top returns the top element of the priority queue.
func (pq *priorityQueue) Top() any {
return (*pq)[0]
}

// DistanceFunc represents a function for calculating the distance between two vectors
type DistanceFunc func(v1, v2 []float32) (float32, error)

// InMemoryOptions represents options for the in-memory vector store.
type InMemoryOptions struct {
TopK int
TopK int
DistanceFunc DistanceFunc
}

// InMemory represents an in-memory vector store.
// Note: This implementation is intended for testing and demonstration purposes, not for production use.
type InMemory struct {
embedder schema.Embedder
data []InMemoryItem
Expand All @@ -35,7 +86,8 @@ type InMemory struct {
// NewInMemory creates a new instance of the in-memory vector store.
func NewInMemory(embedder schema.Embedder, optFns ...func(*InMemoryOptions)) *InMemory {
opts := InMemoryOptions{
TopK: 3,
TopK: 3,
DistanceFunc: metric.SquaredL2,
}

for _, fn := range optFns {
Expand Down Expand Up @@ -89,36 +141,56 @@ func (vs *InMemory) SimilaritySearch(ctx context.Context, query string) ([]schem
return nil, err
}

topCandidates := &priorityQueue{}

heap.Init(topCandidates)

type searchResult struct {
Item InMemoryItem
Similarity float32
}

results := make([]searchResult, len(vs.data))

for i, item := range vs.data {
similarity, err := metric.CosineSimilarity(queryVector, item.Vector)
for _, item := range vs.data {
similarity, err := vs.opts.DistanceFunc(queryVector, item.Vector)
if err != nil {
return nil, err
}

results[i] = searchResult{Item: item, Similarity: similarity}
}
if topCandidates.Len() < vs.opts.TopK {
heap.Push(topCandidates, &priorityQueueItem{
Data: item,
distance: similarity,
})

continue
}

largestDist, _ := topCandidates.Top().(*priorityQueueItem)

// Sort results by similarity in descending order
sort.Slice(results, func(i, j int) bool {
return results[i].Similarity > results[j].Similarity
})
if similarity < largestDist.distance {
_ = heap.Pop(topCandidates)

heap.Push(topCandidates, &priorityQueueItem{
Data: item,
distance: similarity,
})
}
}

docLen := util.Min(len(results), vs.opts.TopK)

// Extract documents from sorted results
documents := make([]schema.Document, docLen)
for i := 0; i < docLen; i++ {
documents[i] = schema.Document{
PageContent: results[i].Item.Content,
Metadata: results[i].Item.Metadata,
}
documents := make([]schema.Document, 0, docLen)

for topCandidates.Len() > 0 {
item, _ := heap.Pop(topCandidates).(*priorityQueueItem)

documents = append(documents, schema.Document{
PageContent: item.Data.Content,
Metadata: item.Data.Metadata,
})
}

return documents, nil
Expand Down

0 comments on commit 654158c

Please sign in to comment.