diff --git a/bindings/go/scip/internal/old_symbol_parser.go b/bindings/go/scip/internal/old_symbol_parser.go index 6d13894c..524e284b 100644 --- a/bindings/go/scip/internal/old_symbol_parser.go +++ b/bindings/go/scip/internal/old_symbol_parser.go @@ -27,7 +27,6 @@ func tryParseLocalSymbol(symbol string) (string, error) { // reasons. We can remove this in the future once we're confident that the new // parser handles everything correctly. func ParsePartialSymbolV1ToBeDeleted(symbol string, includeDescriptors bool) (*scip.Symbol, error) { - // TODO: Rip out this, and call local, err := tryParseLocalSymbol(symbol) if err != nil { return nil, err diff --git a/bindings/go/scip/speedtest/speedtest_main.go b/bindings/go/scip/speedtest/speedtest_main.go index fff5612f..b7b44fd5 100644 --- a/bindings/go/scip/speedtest/speedtest_main.go +++ b/bindings/go/scip/speedtest/speedtest_main.go @@ -50,9 +50,9 @@ func benchmark(path string) { for i := 0; i < b.N; i++ { occ := allOccurrences[i] _, err = internal.ParsePartialSymbolV1ToBeDeleted(occ.Symbol, true) - if err != nil { - //panic(fmt.Sprintf("v1: index path: %v: error: %v", path, err)) - } + // Ignore errors here as error compatibility is handled by TestParseCompat, + // Currently, a bunch of indexes use invalid syntax for locals. + _ = err } } parserV2Benchmark := func(b *simpleBenchmark) { @@ -61,9 +61,9 @@ func benchmark(path string) { str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString) sym := scip.Symbol{} err = scip.ParseSymbolUTF8With(str, scip.ParseSymbolOptions{IncludeDescriptors: true, RecordOutput: &sym}) - if err != nil { - //panic(fmt.Sprintf("v2: index path: %v: error: %v", path, err)) - } + // Ignore errors here as error compatibility is handled by TestParseCompat. + // Currently, a bunch of indexes use invalid syntax for locals. + _ = err } } parserV2ValidateBenchmark := func(b *simpleBenchmark) { @@ -71,9 +71,9 @@ func benchmark(path string) { occ := allOccurrences[i] str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString) err = scip.ValidateSymbolUTF8(str) - if err != nil { - //panic(fmt.Sprintf("v2: index path: %v: error: %v", path, err)) - } + // Ignore errors here as error compatibility is handled by TestParseCompat. + // Currently, a bunch of indexes use invalid syntax for locals. + _ = err } } sb := simpleBenchmark{MaxN: 100 * 1000} diff --git a/bindings/go/scip/symbol_parser.go b/bindings/go/scip/symbol_parser.go index 4656ffef..816b4476 100644 --- a/bindings/go/scip/symbol_parser.go +++ b/bindings/go/scip/symbol_parser.go @@ -229,7 +229,7 @@ func (dw *descriptorsWriter) getDisambiguatorForLast() *stringWriter { // // Pre-condition: This should only be called in the shouldWrite case. func (dw *descriptorsWriter) internalPrepareSlot(suffix Descriptor_Suffix) (*string, *Descriptor_Suffix) { - assert(dw.shouldWrite, "should call internalPrepareSlot in the shouldWrite case") + assert(dw.shouldWrite, "should only call internalPrepareSlot in the shouldWrite case") zeroLastSlot := func() { lastPtr := &dw.descriptors[dw.count] if last := *lastPtr; last == nil { @@ -258,14 +258,29 @@ func (dw *descriptorsWriter) validDescriptors() []*Descriptor { // symbolParserV2 parses symbols while minimizing heap allocations. type symbolParserV2 struct { - SymbolString string + SymbolString string + // byteIndex is the index in SymbolString for the first byte of currentRune. + // However, at the end of parsing, this can be equal to len(SymbolString); + // see advanceOneByte. byteIndex int currentRune rune bytesToNextRune int32 } +// findRuneAtIndex returns the rune at the given index. +// // Pre-condition: string is well-formed UTF-8 // Pre-condition: byteIndex is in bounds +// +// NOTE: We could technically use utf8.DecodeRuneInString here, but that +// has a more complex implementation which tries to handle invalid UTF-8. +// Looking at the code itself, it seems like it would be slower given that +// it is doing more operations. +// +// https://sourcegraph.com/github.com/golang/go/-/blob/src/unicode/utf8/utf8.go?L205-243 +// +// It might be worth benchmarking to see if that's faster/doesn't make +// a difference. func findRuneAtIndex(s string, byteIndex int) (r rune, bytesRead int32) { b1 := s[byteIndex] if b1 < 0x80 {