diff --git a/codebook-config/src/lib.rs b/codebook-config/src/lib.rs index 00f0f16..49de660 100644 --- a/codebook-config/src/lib.rs +++ b/codebook-config/src/lib.rs @@ -32,7 +32,7 @@ pub struct ConfigSettings { impl Default for ConfigSettings { fn default() -> Self { Self { - dictionaries: vec!["en".to_string()], + dictionaries: vec!["en_us".to_string()], words: Vec::new(), flag_words: Vec::new(), ignore_paths: Vec::new(), diff --git a/codebook.toml b/codebook.toml index 5605224..1338112 100644 --- a/codebook.toml +++ b/codebook.toml @@ -1,11 +1,22 @@ -# List of dictionaries to use -dictionaries = ["en_US", "tech_terms"] - -# Custom allowed words -words = ["codebook", "allowlist", "aff", "dic", "blopker", "spellbook"] - -# Words to always flag -flag_words = ["todo", "fixme"] - -# Paths to ignore (glob patterns) -ignore_paths = ["target/**/*", "**/*.json", ".git/**/*"] +dictionaries = [ + "en_us", + "tech_terms", +] +words = [ + "aff", + "allowlist", + "blopker", + "codebook", + "declarator", + "dic", + "spellbook", +] +flag_words = [ + "todo", + "fixme", +] +ignore_paths = [ + "target/**/*", + "**/*.json", + ".git/**/*", +] diff --git a/codebook/src/dictionary.rs b/codebook/src/dictionary.rs index 42f594c..6c1e53c 100644 --- a/codebook/src/dictionary.rs +++ b/codebook/src/dictionary.rs @@ -1,4 +1,4 @@ -use crate::splitter; +use crate::{dictionary_repo::get_codebook_dictionary, splitter}; use codebook_config::CodebookConfig; use log::{debug, info}; use lru::LruCache; @@ -14,11 +14,6 @@ use std::{ use streaming_iterator::StreamingIterator; use tree_sitter::{Parser, Query, QueryCursor}; -static COMMON_DICTIONARY: &str = include_str!("../../word_lists/combined.gen.txt"); -fn get_common_dictionary() -> impl Iterator { - COMMON_DICTIONARY.lines().filter(|l| !l.contains('#')) -} - #[derive(Debug, Clone, PartialEq)] pub struct SpellCheckResult { pub word: String, @@ -58,7 +53,7 @@ impl CodeDictionary { let dict = spellbook::Dictionary::new(&aff, &dic) .map_err(|e| format!("Dictionary parse error: {}", e))?; let mut custom_dictionary: HashSet = HashSet::new(); - for word in get_common_dictionary() { + for word in get_codebook_dictionary() { custom_dictionary.insert(word.to_string()); } Ok(CodeDictionary { diff --git a/codebook/src/dictionary_repo.rs b/codebook/src/dictionary_repo.rs new file mode 100644 index 0000000..f77dd5a --- /dev/null +++ b/codebook/src/dictionary_repo.rs @@ -0,0 +1,64 @@ +use std::sync::LazyLock; + +static CODEBOOK_DICTIONARY: &str = include_str!("../../word_lists/combined.gen.txt"); + +#[derive(Clone, Debug)] +struct HunspellDictionaryLocation { + pub aff_url: String, + pub dict_url: String, + pub name: String, +} + +impl HunspellDictionaryLocation { + pub fn new(name: &str, aff_url: &str, dict_url: &str) -> Self { + Self { + aff_url: aff_url.to_string(), + dict_url: dict_url.to_string(), + name: name.to_string(), + } + } +} + +#[derive(Clone, Debug)] +struct TextDictionaryLocation { + pub url: String, + pub name: String, +} + +#[derive(Clone, Debug)] +enum DictionaryLocation { + Hunspell(HunspellDictionaryLocation), + Text(TextDictionaryLocation), +} + +static NATRUAL_DICTIONARIES: LazyLock> = LazyLock::new(|| { + vec![DictionaryLocation::Hunspell( + HunspellDictionaryLocation::new( + "en_us", + "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.aff", + "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.dic", + )), + DictionaryLocation::Hunspell( + HunspellDictionaryLocation::new( + "en_gb", + "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.aff", + "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.dic", + )), + ] +}); + +pub fn get_codebook_dictionary() -> impl Iterator { + CODEBOOK_DICTIONARY.lines().filter(|l| !l.contains('#')) +} + +pub fn get_natural_dictionary(name: &str) -> Option { + let res = NATRUAL_DICTIONARIES.iter().find(|d| match d { + DictionaryLocation::Hunspell(h) => h.name == name, + _ => false, + }); + + match res { + Some(d) => Some(d.clone()), + None => None, + } +} diff --git a/codebook/src/lib.rs b/codebook/src/lib.rs index ee802e0..25f00b3 100644 --- a/codebook/src/lib.rs +++ b/codebook/src/lib.rs @@ -1,4 +1,5 @@ pub mod dictionary; +mod dictionary_repo; pub mod downloader; mod log; mod queries; diff --git a/codebook/src/queries.rs b/codebook/src/queries.rs index 7a2a973..6794289 100644 --- a/codebook/src/queries.rs +++ b/codebook/src/queries.rs @@ -31,7 +31,7 @@ impl LanguageType { } // Use https://intmainreturn0.com/ts-visualizer/ to help with writing grammar queries -pub static LANGUAGE_SETTINGS: [LanguageSetting; 8] = [ +pub static LANGUAGE_SETTINGS: &[LanguageSetting] = &[ LanguageSetting { type_: LanguageType::Rust, name: "rust",