Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
varungandhi-src committed Dec 3, 2024
1 parent 3376768 commit fc33e49
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
1 change: 0 additions & 1 deletion bindings/go/scip/internal/old_symbol_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ func tryParseLocalSymbol(symbol string) (string, error) {
// reasons. We can remove this in the future once we're confident that the new
// parser handles everything correctly.
func ParsePartialSymbolV1ToBeDeleted(symbol string, includeDescriptors bool) (*scip.Symbol, error) {
// TODO: Rip out this, and call
local, err := tryParseLocalSymbol(symbol)
if err != nil {
return nil, err
Expand Down
18 changes: 9 additions & 9 deletions bindings/go/scip/speedtest/speedtest_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ func benchmark(path string) {
for i := 0; i < b.N; i++ {
occ := allOccurrences[i]
_, err = internal.ParsePartialSymbolV1ToBeDeleted(occ.Symbol, true)
if err != nil {
//panic(fmt.Sprintf("v1: index path: %v: error: %v", path, err))
}
// Ignore errors here as error compatibility is handled by TestParseCompat,
// Currently, a bunch of indexes use invalid syntax for locals.
_ = err
}
}
parserV2Benchmark := func(b *simpleBenchmark) {
Expand All @@ -61,19 +61,19 @@ func benchmark(path string) {
str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString)
sym := scip.Symbol{}
err = scip.ParseSymbolUTF8With(str, scip.ParseSymbolOptions{IncludeDescriptors: true, RecordOutput: &sym})
if err != nil {
//panic(fmt.Sprintf("v2: index path: %v: error: %v", path, err))
}
// Ignore errors here as error compatibility is handled by TestParseCompat.
// Currently, a bunch of indexes use invalid syntax for locals.
_ = err
}
}
parserV2ValidateBenchmark := func(b *simpleBenchmark) {
for i := 0; i < b.N; i++ {
occ := allOccurrences[i]
str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString)
err = scip.ValidateSymbolUTF8(str)
if err != nil {
//panic(fmt.Sprintf("v2: index path: %v: error: %v", path, err))
}
// Ignore errors here as error compatibility is handled by TestParseCompat.
// Currently, a bunch of indexes use invalid syntax for locals.
_ = err
}
}
sb := simpleBenchmark{MaxN: 100 * 1000}
Expand Down
19 changes: 17 additions & 2 deletions bindings/go/scip/symbol_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (dw *descriptorsWriter) getDisambiguatorForLast() *stringWriter {
//
// Pre-condition: This should only be called in the shouldWrite case.
func (dw *descriptorsWriter) internalPrepareSlot(suffix Descriptor_Suffix) (*string, *Descriptor_Suffix) {
assert(dw.shouldWrite, "should call internalPrepareSlot in the shouldWrite case")
assert(dw.shouldWrite, "should only call internalPrepareSlot in the shouldWrite case")
zeroLastSlot := func() {
lastPtr := &dw.descriptors[dw.count]
if last := *lastPtr; last == nil {
Expand Down Expand Up @@ -258,14 +258,29 @@ func (dw *descriptorsWriter) validDescriptors() []*Descriptor {

// symbolParserV2 parses symbols while minimizing heap allocations.
type symbolParserV2 struct {
SymbolString string
SymbolString string
// byteIndex is the index in SymbolString for the first byte of currentRune.
// However, at the end of parsing, this can be equal to len(SymbolString);
// see advanceOneByte.
byteIndex int
currentRune rune
bytesToNextRune int32
}

// findRuneAtIndex returns the rune at the given index.
//
// Pre-condition: string is well-formed UTF-8
// Pre-condition: byteIndex is in bounds
//
// NOTE: We could technically use utf8.DecodeRuneInString here, but that
// has a more complex implementation which tries to handle invalid UTF-8.
// Looking at the code itself, it seems like it would be slower given that
// it is doing more operations.
//
// https://sourcegraph.com/github.com/golang/go/-/blob/src/unicode/utf8/utf8.go?L205-243
//
// It might be worth benchmarking to see if that's faster/doesn't make
// a difference.
func findRuneAtIndex(s string, byteIndex int) (r rune, bytesRead int32) {
b1 := s[byteIndex]
if b1 < 0x80 {
Expand Down

0 comments on commit fc33e49

Please sign in to comment.