Skip to content

Commit

Permalink
Only lowercase ascii, add software terms
Browse files Browse the repository at this point in the history
  • Loading branch information
blopker committed Feb 13, 2025
1 parent 19b2bc5 commit 996919e
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 32 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
[0.1.16]
[0.1.17]

- Added a download manager for adding many different dictionaries later
- Using a larger en_us dictionary as default
- Now checks on every change, instead of on save. May add an option later to toggle this off
- Add a command to the LSP binary to clear cache
- Don't give a code action when a word is not misspelled
- Vendor OpenSSL
- Add 'software_terms'
- Only lowercase ascii letters when checking

[0.1.15]

Expand Down
8 changes: 4 additions & 4 deletions crates/codebook-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl<'de> Deserialize<'de> for ConfigSettings {
D: serde::Deserializer<'de>,
{
fn to_lowercase_vec(v: Vec<String>) -> Vec<String> {
v.into_iter().map(|s| s.to_lowercase()).collect()
v.into_iter().map(|s| s.to_ascii_lowercase()).collect()
}
#[derive(Deserialize)]
struct Helper {
Expand Down Expand Up @@ -163,7 +163,7 @@ impl CodebookConfig {
/// Add a word to the allowlist and save the configuration
pub fn add_word(&self, word: &str) -> Result<bool> {
{
let word = word.to_lowercase();
let word = word.to_ascii_lowercase();
let settings = &mut self.settings.write().unwrap();
// Check if word already exists
if settings.words.contains(&word.to_string()) {
Expand Down Expand Up @@ -289,7 +289,7 @@ impl CodebookConfig {

/// Check if a word is in the custom allowlist
pub fn is_allowed_word(&self, word: &str) -> bool {
let word = word.to_lowercase();
let word = word.to_ascii_lowercase();
self.settings
.read()
.unwrap()
Expand All @@ -300,7 +300,7 @@ impl CodebookConfig {

/// Check if a word should be flagged
pub fn should_flag_word(&self, word: &str) -> bool {
let word = word.to_lowercase();
let word = word.to_ascii_lowercase();
self.settings
.read()
.unwrap()
Expand Down
9 changes: 7 additions & 2 deletions crates/codebook/src/dictionaries/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use lru::LruCache;

use std::{
num::NonZeroUsize,
path::PathBuf,
sync::{Arc, RwLock},
};

Expand Down Expand Up @@ -122,7 +123,7 @@ pub struct TextDictionary {

impl Dictionary for TextDictionary {
fn check(&self, word: &str) -> bool {
let lower = word.to_lowercase();
let lower = word.to_ascii_lowercase();
let words = self
.word_list
.lines()
Expand All @@ -142,9 +143,13 @@ impl Dictionary for TextDictionary {
impl TextDictionary {
pub fn new(word_list: &str) -> Self {
Self {
word_list: word_list.to_owned(),
word_list: word_list.to_ascii_lowercase(),
}
}
pub fn new_from_path(path: &PathBuf) -> Self {
let word_list = std::fs::read_to_string(path).unwrap().to_ascii_lowercase();
Self { word_list }
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion crates/codebook/src/dictionaries/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl DictionaryManager {
return None;
}
};
let dict = TextDictionary::new(text_path.to_str().unwrap());
let dict = TextDictionary::new_from_path(&text_path);
Some(Arc::new(dict))
}
}
47 changes: 27 additions & 20 deletions crates/codebook/src/dictionaries/repo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,47 +42,54 @@ pub enum DictionaryRepo {
Text(TextRepo),
}

static DICTIONARIES: LazyLock<Vec<DictionaryRepo>> = LazyLock::new(|| {
vec![DictionaryRepo::Hunspell(
static HUNSPELL_DICTIONARIES: LazyLock<Vec<HunspellRepo>> = LazyLock::new(|| {
vec![
HunspellRepo::new(
"en_us",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.aff",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.dic",
)),
DictionaryRepo::Hunspell(
),
HunspellRepo::new(
"en",
"https://raw.githubusercontent.com/blopker/dictionaries/refs/heads/main/dictionaries/en/index.aff",
"https://raw.githubusercontent.com/blopker/dictionaries/refs/heads/main/dictionaries/en/index.dic",
)),
DictionaryRepo::Hunspell(
),
HunspellRepo::new(
"en_gb",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.aff",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.dic",
)),
DictionaryRepo::Text(
),]
});

static TEXT_DICTIONARIES: LazyLock<Vec<TextRepo>> = LazyLock::new(|| {
vec![
TextRepo::new(
"rust",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/rust/dict/rust.txt",
)),
DictionaryRepo::Text(
),
TextRepo::new(
"software_terms",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/softwareTerms.txt",
),
TextRepo::new(
"computing_acronyms",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/computing-acronyms.txt",
),
TextRepo{
name: "codebook".to_string(),
text: Some(CODEBOOK_DICTIONARY),
url: None
}),
]
},]
});

pub fn get_repo(name: &str) -> Option<DictionaryRepo> {
let res = DICTIONARIES.iter().find(|d| match d {
DictionaryRepo::Hunspell(h) => h.name == name,
DictionaryRepo::Text(t) => t.name == name,
});

match res {
Some(d) => Some(d.clone()),
None => None,
let res = HUNSPELL_DICTIONARIES.iter().find(|d| d.name == name);
if res.is_some() {
return Some(DictionaryRepo::Hunspell(res.unwrap().clone()));
}
let res = TEXT_DICTIONARIES.iter().find(|d| d.name == name);
if res.is_some() {
return Some(DictionaryRepo::Text(res.unwrap().clone()));
}
None
}
6 changes: 4 additions & 2 deletions crates/codebook/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ pub struct Codebook {
manager: DictionaryManager,
}

// Custom 'codebook' dictionary could be removed later for a more general solution.
static DEFAULT_DICTIONARIES: &[&str; 3] = &["codebook", "software_terms", "computing_acronyms"];

impl Codebook {
pub fn new(config: Arc<CodebookConfig>) -> Result<Self, Box<dyn std::error::Error>> {
crate::log::init_logging();
Expand Down Expand Up @@ -76,8 +79,7 @@ impl Codebook {
}
None => {}
};
// Push custom codebook dictionary. Could be removed later for a more general solution.
dictionary_ids.push("codebook".to_string());
dictionary_ids.extend(DEFAULT_DICTIONARIES.iter().map(|f| f.to_string()));
let mut dictionaries = Vec::with_capacity(dictionary_ids.len());
for dictionary_id in dictionary_ids {
let dictionary = self.manager.get_dictionary(&dictionary_id);
Expand Down
5 changes: 3 additions & 2 deletions word_lists/codebook.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@ htm
jsx
linter
linux
lru
lsp
macos
msvc
pbkdf
pc
prerelease
pst
redownload
roadmap
rpc
semibold
sha
ssl
tsx
ttf
typescript
Expand All @@ -36,5 +39,3 @@ uncomment
validator
validators
webp
redownload
LRU
1 change: 1 addition & 0 deletions word_lists/combined.gen.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1997,6 +1997,7 @@ sqrt
square
src
srv
ssl
st
stable
stack
Expand Down

0 comments on commit 996919e

Please sign in to comment.