Skip to content

Commit

Permalink
materialize-elasticsearch: extend index name normalization
Browse files Browse the repository at this point in the history
Previously we would only normalize characters in index names that would be allowed by Flow
collection names, but that's not fully sufficient since the configured index name in the resource
configuration can be anything. It will usually be the collection name, but not always.

This extends normalization to all characters that pose problems in index names.
  • Loading branch information
williamhbaker committed Feb 15, 2024
1 parent a03bf81 commit 4d9d8cc
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
21 changes: 13 additions & 8 deletions materialize-elasticsearch/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,19 +305,24 @@ func normalizeIndexName(index string, truncateLimit int) string {
afterPrefix := false
var b strings.Builder
for _, r := range index {
// Replace disallowed characters with underscore. Of the various problematic characters in
// an Elasticsearch index name, a "." is the only one that Flow allows in a collection name.
if r == '.' {
r = '_'
// Strip disallowed prefixes.
if !afterPrefix && slices.Contains([]rune{'_', '-', '+', '.'}, r) {
continue
}

// Strip disallowed prefixes that may be present in a Flow collection name. A '.' is also a
// bad prefix, but those were already replaced with '_' above.
if !afterPrefix && (r == '-' || r == '_') {
continue
// Replace disallowed characters with underscore as long as we aren't still at the beginning
// of the string. Most of the characters in this list are named in the docs, but ES will
// also truncate index names that contain a '#' character to drop everything including &
// after that '#' character, so we'll normalize those too.
if slices.Contains([]rune{'*', '<', '"', ' ', '\\', '/', ',', '|', '>', '?', ':', '#'}, r) {
if !afterPrefix {
continue
}
r = '_'
}
afterPrefix = true

// Index names must be lowercase.
char := strings.ToLower(string(r))
if b.Len()+len(char) > truncateLimit {
// Truncate extremely long names. These must be less than 255 bytes.
Expand Down
8 changes: 7 additions & 1 deletion materialize-elasticsearch/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ func TestNormalizeIndexName(t *testing.T) {
name: "dots",
byteLength: maxByteLength,
input: ".some..collection",
want: "some__collection",
want: "some..collection",
},
{
name: "normalized characters",
byteLength: maxByteLength,
input: `??weird*<"\/,|>?:#index`,
want: "weird___________index",
},
{
name: "capitalized",
Expand Down

0 comments on commit 4d9d8cc

Please sign in to comment.