Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numeric range support #403

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions PATTERNS.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,51 @@ Thus, the following Pattern would match both JSON events above:
Quamina can match numeric values with precision and range exactly the same as that provided by
Go's `float64` data type, which is said to conform to IEEE 754 `binary64`.

## Numeric Range Matching

Quamina supports matching numeric values against ranges. You can specify ranges using various operators and combine them:

```json
{
"item": {
"quantity": [ { "numeric": [ ">", 0, "<=", 5 ] } ],
"price": [ { "numeric": [ "<", 10 ] } ],
"quantity": [ { "numeric": [ "=", 35 ] } ]
}
}
```

### Operators
- `=`: Exact match
- `<`: Less than
- `<=`: Less than or equal to
- `>`: Greater than
- `>=`: Greater than or equal to

### Examples
```json
// Match prices between $50 and $100 (exclusive)
{
"price": [ {"numeric": [">", 50, "<", 100]} ]
}

// Match quantities greater than or equal to 10
{
"quantity": [ {"numeric": [">=", 10]} ]
}

// Match temperatures less than 0
{
"quantity": [ {"numeric": ["<", 0]} ]
}
```

### Notes
- Operators can be combined to create ranges
- Each bound (upper/lower) can only be specified once
- Values must be numeric (integers or floating point)
- Ranges support negative numbers and decimals

## Extended Patterns
An **Extended Pattern** **MUST** be a JSON object containing
a single field whose name is called the **Pattern Type**.
Expand Down
45 changes: 45 additions & 0 deletions cl2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,47 @@ var (
"}",
}
regexpMatches = []int{220}

numericRules = []string{
"{\n" +
" \"geometry\": {\n" +
" \"type\": [ \"Polygon\" ],\n" +
" \"firstCoordinates\": {\n" +
" \"x\": [ { \"numeric\": [ \"=\", -122.42916360922355 ] } ]\n" +
" }\n" +
" }\n" +
"}",
"{\n" +
" \"geometry\": {\n" +
" \"type\": [ \"MultiPolygon\" ],\n" +
" \"firstCoordinates\": {\n" +
" \"z\": [ { \"numeric\": [ \"=\", 0 ] } ]\n" +
" }\n" +
" }\n" +
"}",
"{\n" +
" \"geometry\": {\n" +
" \"firstCoordinates\": {\n" +
" \"x\": [ { \"numeric\": [ \"<\", -122.41600944012424 ] } ]\n" +
" }\n" +
" }\n" +
"}",
"{\n" +
" \"geometry\": {\n" +
" \"firstCoordinates\": {\n" +
" \"x\": [ { \"numeric\": [ \">\", -122.41600944012424 ] } ]\n" +
" }\n" +
" }\n" +
"}",
"{\n" +
" \"geometry\": {\n" +
" \"firstCoordinates\": {\n" +
" \"x\": [ { \"numeric\": [ \">\", -122.46471267081272, \"<\", -122.4063085128395 ] } ]\n" +
" }\n" +
" }\n" +
"}",
}
numericMatches = []int{2, 120, 148948, 64120, 127053}
/* will add when we have numeric
complexArraysRules := []string{
"{\n" +
Expand Down Expand Up @@ -280,6 +321,10 @@ func TestRulerCl2(t *testing.T) {
bm = newBenchmarker()
bm.addRules(regexpRules, regexpMatches, true)
fmt.Printf("REGEXP events/sec: %.1f\n", bm.run(t, lines))

bm = newBenchmarker()
bm.addRules(numericRules, numericMatches, true)
fmt.Printf("NUMERIC MATCHES events/sec: %.1f\n", bm.run(t, lines))
}

type benchmarker struct {
Expand Down
Binary file added code_gen/code_gen
Binary file not shown.
6 changes: 5 additions & 1 deletion field_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
// thread-safe.
type fieldMatcher struct {
updateable atomic.Pointer[fmFields]
table *valueMatcher
vals []typedVal
}

// fmFields contains the updateable fields in fieldMatcher.
Expand Down Expand Up @@ -131,7 +133,9 @@ func (m *fieldMatcher) addTransition(field *patternField, printer printer) []*fi
// cases where this doesn't happen and reduce the number of fieldMatchStates
var nextFieldMatchers []*fieldMatcher
for _, val := range field.vals {
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val, printer))
fm := vm.addTransition(val, printer)
fm.vals = append(fm.vals, val)
nextFieldMatchers = append(nextFieldMatchers, fm)
}
m.update(freshStart)
return nextFieldMatchers
Expand Down
42 changes: 42 additions & 0 deletions numbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,45 @@
}
return b
}

// qNumberToFloat64 converts a qNumber back to float64
func qNumberToFloat64(qn qNumber) float64 {
// Convert from base-128 back to numbits
var nb numbits
// Process bytes in big-endian order
for i := 0; i < len(qn); i++ {
// Shift existing bits left by 7 and add new 7 bits
nb = (nb << 7) | numbits(qn[i]&0x7f)
}

// Determine how many 7-bit groups were dropped during encoding.
// The original encoding uses MaxBytesInEncoding groups.
dropped := MaxBytesInEncoding - len(qn)

// Restore the original numbits value by left-shifting to “recreate” the dropped 7-bit groups.
restored := nb << (7 * uint(dropped))

Check failure on line 78 in numbits.go

View workflow job for this annotation

GitHub Actions / Code Linting (1.22, ubuntu-latest)

G115: integer overflow conversion int -> uint (gosec)

// Convert numbits to a uint64
u := uint64(restored)

// Unmask.
// Notice: The original masking did:
// mask = (if original was positive: 1<<63, or if negative: ^0)
// and then: masked = original ^ mask.
// Since our masked value now (u) has its sign bit inverted relative to the original,
// we can recover the original by testing u’s top bit.
var mask uint64
if u&(1<<63) != 0 {
// Originally positive: mask was 1<<63.
mask = 1 << 63
} else {
// Originally negative: mask was all ones.
mask = 0xffffffffffffffff
}

// Unmask the value by XORing with the mask.
u = u ^ mask

// Convert the result back into a float64.
return math.Float64frombits(u)
}
102 changes: 102 additions & 0 deletions pattern.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const (
monocaseType
wildcardType
regexpType
numericRangeType
)

// typedVal represents the value of a field in a pattern, giving the value and the type of pattern.
Expand All @@ -33,6 +34,7 @@ type typedVal struct {
val string
list [][]byte
parsedRegexp regexpRoot
numericRange *Range
}

// patternField represents a field in a pattern.
Expand Down Expand Up @@ -186,6 +188,7 @@ func readPatternArray(pb *patternBuild) error {
func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, containsExclusive string, err error) {
containsExclusive = ""
pathVals = valsIn

t, err := pb.jd.Token()
if err != nil {
return
Expand All @@ -211,9 +214,12 @@ func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVa
case "regexp":
containsExclusive = tt
pathVals, err = readRegexpSpecial(pb, pathVals)
case "numeric":
pathVals, err = readNumericRangeSpecial(pb, valsIn)
default:
err = errors.New("unrecognized in special pattern: " + tt)
}

return
}

Expand Down Expand Up @@ -270,3 +276,99 @@ func readExistsSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal
}
return
}

func readNumericRangeSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) {
t, err := pb.jd.Token()
if err != nil {
return nil, err
}

// Expect an array
delim, ok := t.(json.Delim)
if !ok || delim != '[' {
return nil, errors.New("numeric range pattern must be an array")
}

// Read operators and values
var bottom, top string
openBottom := true // Initialize as true since ranges are unbounded by default
openTop := true // Initialize as true since ranges are unbounded by default
seenOps := make(map[string]bool) // Track which operators we've seen

for {
// Read operator
operator, err := pb.jd.Token()
if err != nil {
return nil, err
}

// Check for end of array
if delim, ok := operator.(json.Delim); ok && delim == ']' {
break
}

opStr, ok := operator.(string)
if !ok {
return nil, errors.New("numeric range operator must be a string")
}

// Check for duplicate operators
if opStr != "=" { // equals is special as it sets both bounds
if (opStr == "<" || opStr == "<=") && seenOps["top"] {
return nil, errors.New("duplicate upper bound in numeric range")
}
if (opStr == ">" || opStr == ">=") && seenOps["bottom"] {
return nil, errors.New("duplicate lower bound in numeric range")
}
}

// Read value
value, err := pb.jd.Token()
if err != nil {
return nil, err
}
valStr := fmt.Sprintf("%v", value)

// Process operator and value
switch opStr {
case "=":
bottom, top = valStr, valStr
openBottom, openTop = false, false
case "<":
top = valStr
openTop = true
seenOps["top"] = true
case "<=":
top = valStr
openTop = false
seenOps["top"] = true
case ">":
bottom = valStr
openBottom = true
seenOps["bottom"] = true
case ">=":
bottom = valStr
openBottom = false
seenOps["bottom"] = true
default:
return nil, fmt.Errorf("invalid numeric range operator: %s", opStr)
}
}

// Create range based on operator
r, err := NewRange(bottom, openBottom, top, openTop, false)
if err != nil {
return nil, err
}

// Add to pattern values
val := typedVal{
vType: numericRangeType,
numericRange: r,
}
pathVals = append(valsIn, val)

// Expect closing brace
_, err = pb.jd.Token()
return pathVals, err
}
Loading
Loading