From a426c37e21b278ebe116c97dc48000e5aea25273 Mon Sep 17 00:00:00 2001 From: Noah Gorstein Date: Fri, 7 Jun 2024 13:43:58 -0400 Subject: [PATCH 1/2] fix: dynamically increase buffer size to handle processing large JSON lines --- tui/bubbles/jqplayground/commands.go | 15 +++--- tui/utils/json.go | 68 +++++++++++++++------------- tui/utils/scan.go | 50 ++++++++++++++++++++ 3 files changed, 93 insertions(+), 40 deletions(-) create mode 100644 tui/utils/scan.go diff --git a/tui/bubbles/jqplayground/commands.go b/tui/bubbles/jqplayground/commands.go index 792b636..a4eb4d9 100644 --- a/tui/bubbles/jqplayground/commands.go +++ b/tui/bubbles/jqplayground/commands.go @@ -1,8 +1,6 @@ package jqplayground import ( - "bufio" - "bytes" "context" "encoding/json" "fmt" @@ -66,14 +64,13 @@ func processJSONWithQuery(ctx context.Context, results *strings.Builder, query * } func processJSONLinesWithQuery(ctx context.Context, results *strings.Builder, query *gojq.Query, data []byte) error { - scanner := bufio.NewScanner(bytes.NewReader(data)) - for scanner.Scan() { - line := scanner.Bytes() - if err := processJSONWithQuery(ctx, results, query, line); err != nil { - return err - } + const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size + + processLine := func(line []byte) error { + return processJSONWithQuery(ctx, results, query, line) } - return nil + + return utils.ScanLinesWithDynamicBufferSize(data, maxBufferSize, processLine) } func (b *Bubble) executeQueryOnInput(ctx context.Context) (string, error) { diff --git a/tui/utils/json.go b/tui/utils/json.go index c2ed3d6..d6de047 100644 --- a/tui/utils/json.go +++ b/tui/utils/json.go @@ -1,7 +1,6 @@ package utils import ( - "bufio" "bytes" "encoding/json" "errors" @@ -21,11 +20,19 @@ const FourSpaces = " " // whether the data is valid JSON and valid JSON lines, along with an error // if the data is not valid in either format. func IsValidInput(data []byte) (isValidJSON bool, isValidJSONLines bool, err error) { - isValidJSON = IsValidJSON(data) - isValidJSONLines = IsValidJSONLines(data) + if len(data) == 0 { + err = errors.New("Data is not valid JSON or NDJSON") + return false, false, err + } + + isValidJSON = IsValidJSON(data) == nil + isValidJSONLines = IsValidJSONLines(data) == nil + if !isValidJSON && !isValidJSONLines { - return false, false, errors.New("Data is not valid JSON or NDJSON") + err = errors.New("Data is not valid JSON or NDJSON") + return false, false, err } + return isValidJSON, isValidJSONLines, nil } @@ -52,33 +59,28 @@ func highlightJSON(w io.Writer, source string, style *chroma.Style) error { return f.Format(w, style, it) } -func IsValidJSON(input []byte) bool { +func IsValidJSON(input []byte) error { var js json.RawMessage - return json.Unmarshal(input, &js) == nil + return json.Unmarshal(input, &js) } -func IsValidJSONLines(input []byte) bool { - if len(input) == 0 { - return false - } - - reader := bytes.NewReader(input) - scanner := bufio.NewScanner(reader) - scanner.Split(bufio.ScanLines) - for scanner.Scan() { - if !IsValidJSON(scanner.Bytes()) { - return false - } +func IsValidJSONLines(input []byte) error { + maxBufferSize := 10 * 1024 * 1024 // 10MB + err := ScanLinesWithDynamicBufferSize(input, maxBufferSize, IsValidJSON) + if err != nil { + return err } - return true + return nil } func indentJSON(input *[]byte, output *bytes.Buffer) error { - if IsValidJSON(*input) { - err := json.Indent(output, []byte(*input), "", FourSpaces) - if err != nil { - return err - } + err := IsValidJSON(*input) + if err != nil { + return nil + } + err = json.Indent(output, []byte(*input), "", FourSpaces) + if err != nil { + return err } return nil } @@ -108,20 +110,24 @@ func Prettify(inputJSON []byte, chromaStyle *chroma.Style, isJSONLines bool) (*b if !isJSONLines { return prettifyJSON(inputJSON, chromaStyle) } + var buf bytes.Buffer - reader := bytes.NewReader(inputJSON) - scanner := bufio.NewScanner(reader) - scanner.Split(bufio.ScanLines) - for scanner.Scan() { - line := scanner.Bytes() + processLine := func(line []byte) error { hightlighedLine, err := prettifyJSON(line, chromaStyle) if err != nil { - return nil, err + return err } _, err = buf.WriteString(fmt.Sprintf("%v\n", hightlighedLine)) if err != nil { - return nil, err + return err } + return nil + } + + const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size + err := ScanLinesWithDynamicBufferSize(inputJSON, maxBufferSize, processLine) + if err != nil { + return nil, err } return &buf, nil } diff --git a/tui/utils/scan.go b/tui/utils/scan.go new file mode 100644 index 0000000..7f2bf1d --- /dev/null +++ b/tui/utils/scan.go @@ -0,0 +1,50 @@ +package utils + +import ( + "bufio" + "bytes" + "errors" + "fmt" +) + +// ScanLinesWithDynamicBufferSize scans the input byte slice line by line, using a dynamically +// increasing buffer size. It starts with an initial buffer size of 64KB and doubles the buffer +// size each time a line exceeds the current buffer size, up to the specified maximum buffer size. +// +// If a line exceeds the maximum buffer size, it returns an error. +// +// The processLine function is called for each line and should return an error if processing fails. +// +// The function returns an error if the input exceeds the maximum buffer size or if any other +// error occurs during line processing. It returns nil if all lines are processed successfully. +func ScanLinesWithDynamicBufferSize(input []byte, maxBufferSize int, processLine func([]byte) error) error { + scanner := bufio.NewScanner(bytes.NewReader(input)) + initialBufferSize := 64 * 1024 // 64KB initial buffer size + + for bufferSize := initialBufferSize; bufferSize <= maxBufferSize; bufferSize *= 2 { + if err := scanWithBufferSize(scanner, bufferSize, maxBufferSize, processLine); err != nil { + if errors.Is(err, bufio.ErrTooLong) { + // Buffer size is too small, retry with a larger buffer + continue + } + return err + } + // All lines are processed successfully + return nil + } + + // Input exceeds maximum buffer size + return fmt.Errorf("input exceeds maximum buffer size of %d bytes", maxBufferSize) +} + +func scanWithBufferSize(scanner *bufio.Scanner, bufferSize, maxBufferSize int, processLine func([]byte) error) error { + scanner.Buffer(make([]byte, bufferSize), maxBufferSize) + + for scanner.Scan() { + if err := processLine(scanner.Bytes()); err != nil { + return err + } + } + + return scanner.Err() +} From 81d357a4fa3d8f1bd8a108b09d13cd17c44d9c13 Mon Sep 17 00:00:00 2001 From: Noah Gorstein Date: Fri, 7 Jun 2024 13:47:50 -0400 Subject: [PATCH 2/2] increase max buffer size in json lines valid check --- tui/utils/json.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tui/utils/json.go b/tui/utils/json.go index d6de047..6306e71 100644 --- a/tui/utils/json.go +++ b/tui/utils/json.go @@ -65,7 +65,7 @@ func IsValidJSON(input []byte) error { } func IsValidJSONLines(input []byte) error { - maxBufferSize := 10 * 1024 * 1024 // 10MB + maxBufferSize := 100 * 1024 * 1024 // 100MB err := ScanLinesWithDynamicBufferSize(input, maxBufferSize, IsValidJSON) if err != nil { return err