diff --git a/materialize-elasticsearch/driver.go b/materialize-elasticsearch/driver.go index bd984337b9..7f590a67c5 100644 --- a/materialize-elasticsearch/driver.go +++ b/materialize-elasticsearch/driver.go @@ -305,19 +305,24 @@ func normalizeIndexName(index string, truncateLimit int) string { afterPrefix := false var b strings.Builder for _, r := range index { - // Replace disallowed characters with underscore. Of the various problematic characters in - // an Elasticsearch index name, a "." is the only one that Flow allows in a collection name. - if r == '.' { - r = '_' + // Strip disallowed prefixes. + if !afterPrefix && slices.Contains([]rune{'_', '-', '+', '.'}, r) { + continue } - // Strip disallowed prefixes that may be present in a Flow collection name. A '.' is also a - // bad prefix, but those were already replaced with '_' above. - if !afterPrefix && (r == '-' || r == '_') { - continue + // Replace disallowed characters with underscore as long as we aren't still at the beginning + // of the string. Most of the characters in this list are named in the docs, but ES will + // also truncate index names that contain a '#' character to drop everything including & + // after that '#' character, so we'll normalize those too. + if slices.Contains([]rune{'*', '<', '"', ' ', '\\', '/', ',', '|', '>', '?', ':', '#'}, r) { + if !afterPrefix { + continue + } + r = '_' } afterPrefix = true + // Index names must be lowercase. char := strings.ToLower(string(r)) if b.Len()+len(char) > truncateLimit { // Truncate extremely long names. These must be less than 255 bytes. diff --git a/materialize-elasticsearch/driver_test.go b/materialize-elasticsearch/driver_test.go index 2c75ed98c0..51f815a4d7 100644 --- a/materialize-elasticsearch/driver_test.go +++ b/materialize-elasticsearch/driver_test.go @@ -118,7 +118,13 @@ func TestNormalizeIndexName(t *testing.T) { name: "dots", byteLength: maxByteLength, input: ".some..collection", - want: "some__collection", + want: "some..collection", + }, + { + name: "normalized characters", + byteLength: maxByteLength, + input: `??weird*<"\/,|>?:#index`, + want: "weird___________index", }, { name: "capitalized",