Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: dynamically increase buffer size to handle processing large JSON lines #93

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions tui/bubbles/jqplayground/commands.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package jqplayground

import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -66,14 +64,13 @@ func processJSONWithQuery(ctx context.Context, results *strings.Builder, query *
}

func processJSONLinesWithQuery(ctx context.Context, results *strings.Builder, query *gojq.Query, data []byte) error {
scanner := bufio.NewScanner(bytes.NewReader(data))
for scanner.Scan() {
line := scanner.Bytes()
if err := processJSONWithQuery(ctx, results, query, line); err != nil {
return err
}
const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size

processLine := func(line []byte) error {
return processJSONWithQuery(ctx, results, query, line)
}
return nil

return utils.ScanLinesWithDynamicBufferSize(data, maxBufferSize, processLine)
}

func (b *Bubble) executeQueryOnInput(ctx context.Context) (string, error) {
Expand Down
68 changes: 37 additions & 31 deletions tui/utils/json.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package utils

import (
"bufio"
"bytes"
"encoding/json"
"errors"
Expand All @@ -21,11 +20,19 @@ const FourSpaces = " "
// whether the data is valid JSON and valid JSON lines, along with an error
// if the data is not valid in either format.
func IsValidInput(data []byte) (isValidJSON bool, isValidJSONLines bool, err error) {
isValidJSON = IsValidJSON(data)
isValidJSONLines = IsValidJSONLines(data)
if len(data) == 0 {
err = errors.New("Data is not valid JSON or NDJSON")
return false, false, err
}

isValidJSON = IsValidJSON(data) == nil
isValidJSONLines = IsValidJSONLines(data) == nil

if !isValidJSON && !isValidJSONLines {
return false, false, errors.New("Data is not valid JSON or NDJSON")
err = errors.New("Data is not valid JSON or NDJSON")
return false, false, err
}

return isValidJSON, isValidJSONLines, nil
}

Expand All @@ -52,33 +59,28 @@ func highlightJSON(w io.Writer, source string, style *chroma.Style) error {
return f.Format(w, style, it)
}

func IsValidJSON(input []byte) bool {
func IsValidJSON(input []byte) error {
var js json.RawMessage
return json.Unmarshal(input, &js) == nil
return json.Unmarshal(input, &js)
}

func IsValidJSONLines(input []byte) bool {
if len(input) == 0 {
return false
}

reader := bytes.NewReader(input)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
if !IsValidJSON(scanner.Bytes()) {
return false
}
func IsValidJSONLines(input []byte) error {
maxBufferSize := 100 * 1024 * 1024 // 100MB
err := ScanLinesWithDynamicBufferSize(input, maxBufferSize, IsValidJSON)
if err != nil {
return err
}
return true
return nil
}

func indentJSON(input *[]byte, output *bytes.Buffer) error {
if IsValidJSON(*input) {
err := json.Indent(output, []byte(*input), "", FourSpaces)
if err != nil {
return err
}
err := IsValidJSON(*input)
if err != nil {
return nil
}
err = json.Indent(output, []byte(*input), "", FourSpaces)
if err != nil {
return err
}
return nil
}
Expand Down Expand Up @@ -108,20 +110,24 @@ func Prettify(inputJSON []byte, chromaStyle *chroma.Style, isJSONLines bool) (*b
if !isJSONLines {
return prettifyJSON(inputJSON, chromaStyle)
}

var buf bytes.Buffer
reader := bytes.NewReader(inputJSON)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
line := scanner.Bytes()
processLine := func(line []byte) error {
hightlighedLine, err := prettifyJSON(line, chromaStyle)
if err != nil {
return nil, err
return err
}
_, err = buf.WriteString(fmt.Sprintf("%v\n", hightlighedLine))
if err != nil {
return nil, err
return err
}
return nil
}

const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size
err := ScanLinesWithDynamicBufferSize(inputJSON, maxBufferSize, processLine)
if err != nil {
return nil, err
}
return &buf, nil
}
50 changes: 50 additions & 0 deletions tui/utils/scan.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package utils

import (
"bufio"
"bytes"
"errors"
"fmt"
)

// ScanLinesWithDynamicBufferSize scans the input byte slice line by line, using a dynamically
// increasing buffer size. It starts with an initial buffer size of 64KB and doubles the buffer
// size each time a line exceeds the current buffer size, up to the specified maximum buffer size.
//
// If a line exceeds the maximum buffer size, it returns an error.
//
// The processLine function is called for each line and should return an error if processing fails.
//
// The function returns an error if the input exceeds the maximum buffer size or if any other
// error occurs during line processing. It returns nil if all lines are processed successfully.
func ScanLinesWithDynamicBufferSize(input []byte, maxBufferSize int, processLine func([]byte) error) error {
scanner := bufio.NewScanner(bytes.NewReader(input))
initialBufferSize := 64 * 1024 // 64KB initial buffer size

for bufferSize := initialBufferSize; bufferSize <= maxBufferSize; bufferSize *= 2 {
if err := scanWithBufferSize(scanner, bufferSize, maxBufferSize, processLine); err != nil {
if errors.Is(err, bufio.ErrTooLong) {
// Buffer size is too small, retry with a larger buffer
continue
}
return err
}
// All lines are processed successfully
return nil
}

// Input exceeds maximum buffer size
return fmt.Errorf("input exceeds maximum buffer size of %d bytes", maxBufferSize)
}

func scanWithBufferSize(scanner *bufio.Scanner, bufferSize, maxBufferSize int, processLine func([]byte) error) error {
scanner.Buffer(make([]byte, bufferSize), maxBufferSize)

for scanner.Scan() {
if err := processLine(scanner.Bytes()); err != nil {
return err
}
}

return scanner.Err()
}
Loading