Skip to content

Commit

Permalink
feat: can find words that include special characters
Browse files Browse the repository at this point in the history
Previously, only words that consisted of letters, numbers, underscores,
and dashes were matched. This change allows for words that include
special word like characters such as

- umlauts (e.g. 'ö', 'ä', 'å', 'ü')
- accented characters (e.g. 'é', 'ç', 'ñ', 'ü')
- Cyrillic characters (e.g. 'к', 'о', 'с', 'м')
- Chinese characters (e.g. '你', '好')
- Japanese characters (e.g. '日', '本', '語')
- Korean characters (e.g. '한', '국', '어')
- Greek characters (e.g. 'τ', 'ο', 'π', 'ι', 'κ', 'ή')
- emoji (e.g. '😄', '👍', '👎')
  • Loading branch information
mikavilpas committed Nov 7, 2024
1 parent 41276a0 commit 81cc172
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 1 deletion.
24 changes: 23 additions & 1 deletion lua/blink-ripgrep/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,34 @@
---@field get_completions? fun(self: blink.cmp.Source, context: blink.cmp.Context, callback: fun(response: blink.cmp.CompletionResponse | nil)): nil
local RgSource = {}

local word_character = vim.lpeg.R("az", "AZ", "09", "\128\255")
+ vim.lpeg.P("_")
+ vim.lpeg.P("-")
local non_word_character = vim.lpeg.P(1) - word_character

local collect_pattern = vim.lpeg.Ct(
-- Skip non-word characters first
(
non_word_character ^ 0
* vim.lpeg.C(word_character ^ 1)
* non_word_character ^ 0
) ^ 0
)

---@param text_before_cursor string "The text of the entire line before the cursor"
---@return string
function RgSource.match_prefix(text_before_cursor)
local matches = vim.lpeg.match(collect_pattern, text_before_cursor)
return matches and matches[#matches] or ""
end

---@param context blink.cmp.Context
---@return string
local function default_get_prefix(context)
local line = context.line
local col = context.cursor[2]
local prefix = line:sub(1, col):match("[%w_-]+$") or ""
local text = line:sub(1, col)
local prefix = RgSource.match_prefix(text)
return prefix
end

Expand Down
129 changes: 129 additions & 0 deletions spec/blink-ripgrep/get_prefix_spec.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
local assert = require("luassert")
local blink_ripgrep = require("blink-ripgrep")

describe("match_prefix", function()
it("for simple strings", function()
assert.are_same(blink_ripgrep.match_prefix("hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("abc123"), "abc123")
assert.are_same(blink_ripgrep.match_prefix("abc-123"), "abc-123")
assert.are_same(blink_ripgrep.match_prefix("abc_123"), "abc_123")
end)

it(
"matches when there is one nonmatching piece of input in the beginning",
function()
assert.are_same(blink_ripgrep.match_prefix(".hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix(",hello"), "hello")
assert.are_same(
blink_ripgrep.match_prefix(".,,!@!@$@%<<@$<hello"),
"hello"
)
assert.are_same(blink_ripgrep.match_prefix(" hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("random_text hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("-- hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("-- abc123"), "abc123")
assert.are_same(blink_ripgrep.match_prefix("-- abc-123"), "abc-123")
assert.are_same(blink_ripgrep.match_prefix("-- abc_123"), "abc_123")
end
)

it(
"matches when there are multiple nonmatching pieces of input in the beginning",
function()
assert.are_same(blink_ripgrep.match_prefix(".hello.hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix(",hello,hello"), "hello")
assert.are_same(
blink_ripgrep.match_prefix(".,,!@!@$@%<<@$<hello.,,!@!@$@%<<@$<hello"),
"hello"
)
assert.are_same(blink_ripgrep.match_prefix(" hello hello"), "hello")

assert.are_same(
blink_ripgrep.match_prefix("random_text hello hello"),
"hello"
)
assert.are_same(blink_ripgrep.match_prefix("-- hello hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("-- abc123 abc123"), "abc123")
assert.are_same(
blink_ripgrep.match_prefix("-- abc-123 abc-123"),
"abc-123"
)
assert.are_same(
blink_ripgrep.match_prefix("-- abc_123 abc_123"),
"abc_123"
)
end
)

it("for multipart strings", function()
-- three parts
assert.are_same(
blink_ripgrep.match_prefix("hello-world-today"),
"hello-world-today"
)

-- three parts with numbers
assert.are_same(
blink_ripgrep.match_prefix("abc123-def456-ghi789"),
"abc123-def456-ghi789"
)

-- multiple parts with mixed dashes and underscores
assert.are_same(
blink_ripgrep.match_prefix("abc-123_def-456_ghi-789"),
"abc-123_def-456_ghi-789"
)
end)

it("matches special characters", function()
-- umlauts and other special characters
assert.are_same(blink_ripgrep.match_prefix("yöllä"), "yöllä") -- Finnish word with 'ö' and 'ä'
assert.are_same(blink_ripgrep.match_prefix("über"), "über") -- German word with 'ü'
assert.are_same(blink_ripgrep.match_prefix("übermensch"), "übermensch") -- German compound word with 'ü'
assert.are_same(blink_ripgrep.match_prefix("mañana"), "mañana") -- Spanish word with 'ñ'
assert.are_same(blink_ripgrep.match_prefix("Ångström"), "Ångström") -- Swedish word with 'Å' and 'ö'
assert.are_same(blink_ripgrep.match_prefix("Straße"), "Straße") -- German word with 'ß'
assert.are_same(blink_ripgrep.match_prefix("český"), "český") -- Czech word with 'č'
assert.are_same(blink_ripgrep.match_prefix("naïve"), "naïve") -- French word with 'ï'
assert.are_same(blink_ripgrep.match_prefix("façade"), "façade") -- French word with 'ç'
assert.are_same(blink_ripgrep.match_prefix("résumé"), "résumé") -- French word with 'é'
assert.are_same(blink_ripgrep.match_prefix("космос"), "космос") -- Russian word with Cyrillic characters
assert.are_same(blink_ripgrep.match_prefix("你好"), "你好") -- Chinese characters
assert.are_same(blink_ripgrep.match_prefix("日本語"), "日本語") -- Japanese characters
assert.are_same(blink_ripgrep.match_prefix("한국어"), "한국어") -- Korean characters
assert.are_same(blink_ripgrep.match_prefix("τοπική"), "τοπική") -- Greek word with 'π' and 'ή'
end)

it("matches emoji", function()
-- because why not 😄
assert.are_same(blink_ripgrep.match_prefix("👍👎"), "👍👎")
assert.are_same(blink_ripgrep.match_prefix("👍-👎"), "👍-👎")
end)

it("does not include punctuation characters", function()
assert.are_same(blink_ripgrep.match_prefix("!hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("?world"), "world")
assert.are_same(blink_ripgrep.match_prefix("#hashtag"), "hashtag")
assert.are_same(blink_ripgrep.match_prefix("$money"), "money")
assert.are_same(blink_ripgrep.match_prefix("%value"), "value")
assert.are_same(blink_ripgrep.match_prefix("&and"), "and")
assert.are_same(blink_ripgrep.match_prefix("*star"), "star")
assert.are_same(blink_ripgrep.match_prefix("@email"), "email")
assert.are_same(blink_ripgrep.match_prefix("~tilde"), "tilde")
assert.are_same(blink_ripgrep.match_prefix(";semicolon"), "semicolon")
assert.are_same(blink_ripgrep.match_prefix(":colon"), "colon")
end)

it("does not include whitespace and control characters", function()
assert.are_same(blink_ripgrep.match_prefix(" hello"), "hello")
assert.are_same(blink_ripgrep.match_prefix("world "), "world")
assert.are_same(blink_ripgrep.match_prefix("\t\ttext"), "text")
assert.are_same(blink_ripgrep.match_prefix("\nnewline"), "newline")
end)

it("includes symbols", function()
assert.are_same(blink_ripgrep.match_prefix("©copyright"), "©copyright")
assert.are_same(blink_ripgrep.match_prefix("®registered"), "®registered")
assert.are_same(blink_ripgrep.match_prefix("™trademark"), "™trademark")
end)
end)

0 comments on commit 81cc172

Please sign in to comment.