diff --git a/CHANGELOG.md b/CHANGELOG.md index 2355614..684e567 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html ### Breaking changes +- only parse strings with single colons as fields + ### Compatible changes diff --git a/lib/minidusen/parser.rb b/lib/minidusen/parser.rb index 4f1ccda..faa2466 100644 --- a/lib/minidusen/parser.rb +++ b/lib/minidusen/parser.rb @@ -3,8 +3,8 @@ class Parser class CannotParse < StandardError; end - TEXT_QUERY = /(?:(\-)?"([^"]+)"|(\-)?([\S]+))/ - FIELD_QUERY = /(?:\s|^|(\-))(\w+)\:#{TEXT_QUERY}/ + TEXT_QUERY = /(?:(-)?"([^"]+)"|(-)?(\S+))/ + FIELD_QUERY = /(?:\s|^|(-))(\w+):(?!:)#{TEXT_QUERY}/ class << self diff --git a/spec/minidusen/parser_spec.rb b/spec/minidusen/parser_spec.rb index 9219597..ae37a81 100644 --- a/spec/minidusen/parser_spec.rb +++ b/spec/minidusen/parser_spec.rb @@ -63,6 +63,15 @@ query[0].exclude.should == true end + it 'only parses single colons as fields' do + query = Minidusen::Parser.parse('filetype:docx Namespaced::Klass') + expect(query.size).to eq(2) + expect(query[0].field).to eq('filetype') + expect(query[0].value).to eq('docx') + expect(query[1].field).to eq('text') + expect(query[1].value).to eq('Namespaced::Klass') + end + it 'should ignore invalid utf-8 byte sequences' do term_with_invalid_byte_sequence = "word\255".force_encoding('UTF-8')