Skip to content

Commit

Permalink
fix: dynamically increase buffer size to handle processing large JSON…
Browse files Browse the repository at this point in the history
… lines (#93)
  • Loading branch information
noahgorstein authored Jun 7, 2024
1 parent 2731b74 commit 2121bb4
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 40 deletions.
15 changes: 6 additions & 9 deletions tui/bubbles/jqplayground/commands.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package jqplayground

import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -66,14 +64,13 @@ func processJSONWithQuery(ctx context.Context, results *strings.Builder, query *
}

func processJSONLinesWithQuery(ctx context.Context, results *strings.Builder, query *gojq.Query, data []byte) error {
scanner := bufio.NewScanner(bytes.NewReader(data))
for scanner.Scan() {
line := scanner.Bytes()
if err := processJSONWithQuery(ctx, results, query, line); err != nil {
return err
}
const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size

processLine := func(line []byte) error {
return processJSONWithQuery(ctx, results, query, line)
}
return nil

return utils.ScanLinesWithDynamicBufferSize(data, maxBufferSize, processLine)
}

func (b *Bubble) executeQueryOnInput(ctx context.Context) (string, error) {
Expand Down
68 changes: 37 additions & 31 deletions tui/utils/json.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package utils

import (
"bufio"
"bytes"
"encoding/json"
"errors"
Expand All @@ -21,11 +20,19 @@ const FourSpaces = " "
// whether the data is valid JSON and valid JSON lines, along with an error
// if the data is not valid in either format.
func IsValidInput(data []byte) (isValidJSON bool, isValidJSONLines bool, err error) {
isValidJSON = IsValidJSON(data)
isValidJSONLines = IsValidJSONLines(data)
if len(data) == 0 {
err = errors.New("Data is not valid JSON or NDJSON")
return false, false, err
}

isValidJSON = IsValidJSON(data) == nil
isValidJSONLines = IsValidJSONLines(data) == nil

if !isValidJSON && !isValidJSONLines {
return false, false, errors.New("Data is not valid JSON or NDJSON")
err = errors.New("Data is not valid JSON or NDJSON")
return false, false, err
}

return isValidJSON, isValidJSONLines, nil
}

Expand All @@ -52,33 +59,28 @@ func highlightJSON(w io.Writer, source string, style *chroma.Style) error {
return f.Format(w, style, it)
}

func IsValidJSON(input []byte) bool {
func IsValidJSON(input []byte) error {
var js json.RawMessage
return json.Unmarshal(input, &js) == nil
return json.Unmarshal(input, &js)
}

func IsValidJSONLines(input []byte) bool {
if len(input) == 0 {
return false
}

reader := bytes.NewReader(input)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
if !IsValidJSON(scanner.Bytes()) {
return false
}
func IsValidJSONLines(input []byte) error {
maxBufferSize := 100 * 1024 * 1024 // 100MB
err := ScanLinesWithDynamicBufferSize(input, maxBufferSize, IsValidJSON)
if err != nil {
return err
}
return true
return nil
}

func indentJSON(input *[]byte, output *bytes.Buffer) error {
if IsValidJSON(*input) {
err := json.Indent(output, []byte(*input), "", FourSpaces)
if err != nil {
return err
}
err := IsValidJSON(*input)
if err != nil {
return nil
}
err = json.Indent(output, []byte(*input), "", FourSpaces)
if err != nil {
return err
}
return nil
}
Expand Down Expand Up @@ -108,20 +110,24 @@ func Prettify(inputJSON []byte, chromaStyle *chroma.Style, isJSONLines bool) (*b
if !isJSONLines {
return prettifyJSON(inputJSON, chromaStyle)
}

var buf bytes.Buffer
reader := bytes.NewReader(inputJSON)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
line := scanner.Bytes()
processLine := func(line []byte) error {
hightlighedLine, err := prettifyJSON(line, chromaStyle)
if err != nil {
return nil, err
return err
}
_, err = buf.WriteString(fmt.Sprintf("%v\n", hightlighedLine))
if err != nil {
return nil, err
return err
}
return nil
}

const maxBufferSize = 100 * 1024 * 1024 // 100MB max buffer size
err := ScanLinesWithDynamicBufferSize(inputJSON, maxBufferSize, processLine)
if err != nil {
return nil, err
}
return &buf, nil
}
50 changes: 50 additions & 0 deletions tui/utils/scan.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package utils

import (
"bufio"
"bytes"
"errors"
"fmt"
)

// ScanLinesWithDynamicBufferSize scans the input byte slice line by line, using a dynamically
// increasing buffer size. It starts with an initial buffer size of 64KB and doubles the buffer
// size each time a line exceeds the current buffer size, up to the specified maximum buffer size.
//
// If a line exceeds the maximum buffer size, it returns an error.
//
// The processLine function is called for each line and should return an error if processing fails.
//
// The function returns an error if the input exceeds the maximum buffer size or if any other
// error occurs during line processing. It returns nil if all lines are processed successfully.
func ScanLinesWithDynamicBufferSize(input []byte, maxBufferSize int, processLine func([]byte) error) error {
scanner := bufio.NewScanner(bytes.NewReader(input))
initialBufferSize := 64 * 1024 // 64KB initial buffer size

for bufferSize := initialBufferSize; bufferSize <= maxBufferSize; bufferSize *= 2 {
if err := scanWithBufferSize(scanner, bufferSize, maxBufferSize, processLine); err != nil {
if errors.Is(err, bufio.ErrTooLong) {
// Buffer size is too small, retry with a larger buffer
continue
}
return err
}
// All lines are processed successfully
return nil
}

// Input exceeds maximum buffer size
return fmt.Errorf("input exceeds maximum buffer size of %d bytes", maxBufferSize)
}

func scanWithBufferSize(scanner *bufio.Scanner, bufferSize, maxBufferSize int, processLine func([]byte) error) error {
scanner.Buffer(make([]byte, bufferSize), maxBufferSize)

for scanner.Scan() {
if err := processLine(scanner.Bytes()); err != nil {
return err
}
}

return scanner.Err()
}

0 comments on commit 2121bb4

Please sign in to comment.