Skip to content

Commit

Permalink
Start work on more dict support
Browse files Browse the repository at this point in the history
  • Loading branch information
blopker committed Jan 21, 2025
1 parent 4b5a337 commit a06808a
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 20 deletions.
2 changes: 1 addition & 1 deletion codebook-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub struct ConfigSettings {
impl Default for ConfigSettings {
fn default() -> Self {
Self {
dictionaries: vec!["en".to_string()],
dictionaries: vec!["en_us".to_string()],
words: Vec::new(),
flag_words: Vec::new(),
ignore_paths: Vec::new(),
Expand Down
33 changes: 22 additions & 11 deletions codebook.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
# List of dictionaries to use
dictionaries = ["en_US", "tech_terms"]

# Custom allowed words
words = ["codebook", "allowlist", "aff", "dic", "blopker", "spellbook"]

# Words to always flag
flag_words = ["todo", "fixme"]

# Paths to ignore (glob patterns)
ignore_paths = ["target/**/*", "**/*.json", ".git/**/*"]
dictionaries = [
"en_us",
"tech_terms",
]
words = [
"aff",
"allowlist",
"blopker",
"codebook",
"declarator",
"dic",
"spellbook",
]
flag_words = [
"todo",
"fixme",
]
ignore_paths = [
"target/**/*",
"**/*.json",
".git/**/*",
]
9 changes: 2 additions & 7 deletions codebook/src/dictionary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::splitter;
use crate::{dictionary_repo::get_codebook_dictionary, splitter};
use codebook_config::CodebookConfig;
use log::{debug, info};
use lru::LruCache;
Expand All @@ -14,11 +14,6 @@ use std::{
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor};

static COMMON_DICTIONARY: &str = include_str!("../../word_lists/combined.gen.txt");
fn get_common_dictionary() -> impl Iterator<Item = &'static str> {
COMMON_DICTIONARY.lines().filter(|l| !l.contains('#'))
}

#[derive(Debug, Clone, PartialEq)]
pub struct SpellCheckResult {
pub word: String,
Expand Down Expand Up @@ -58,7 +53,7 @@ impl CodeDictionary {
let dict = spellbook::Dictionary::new(&aff, &dic)
.map_err(|e| format!("Dictionary parse error: {}", e))?;
let mut custom_dictionary: HashSet<String> = HashSet::new();
for word in get_common_dictionary() {
for word in get_codebook_dictionary() {
custom_dictionary.insert(word.to_string());
}
Ok(CodeDictionary {
Expand Down
64 changes: 64 additions & 0 deletions codebook/src/dictionary_repo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use std::sync::LazyLock;

static CODEBOOK_DICTIONARY: &str = include_str!("../../word_lists/combined.gen.txt");

#[derive(Clone, Debug)]
struct HunspellDictionaryLocation {
pub aff_url: String,
pub dict_url: String,
pub name: String,
}

impl HunspellDictionaryLocation {
pub fn new(name: &str, aff_url: &str, dict_url: &str) -> Self {
Self {
aff_url: aff_url.to_string(),
dict_url: dict_url.to_string(),
name: name.to_string(),
}
}
}

#[derive(Clone, Debug)]
struct TextDictionaryLocation {
pub url: String,
pub name: String,
}

#[derive(Clone, Debug)]
enum DictionaryLocation {
Hunspell(HunspellDictionaryLocation),
Text(TextDictionaryLocation),
}

static NATRUAL_DICTIONARIES: LazyLock<Vec<DictionaryLocation>> = LazyLock::new(|| {
vec![DictionaryLocation::Hunspell(
HunspellDictionaryLocation::new(
"en_us",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.aff",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_US/src/hunspell/en_US-large.dic",
)),
DictionaryLocation::Hunspell(
HunspellDictionaryLocation::new(
"en_gb",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.aff",
"https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/en_GB/src/hunspell/en_GB-large.dic",
)),
]
});

pub fn get_codebook_dictionary() -> impl Iterator<Item = &'static str> {
CODEBOOK_DICTIONARY.lines().filter(|l| !l.contains('#'))
}

pub fn get_natural_dictionary(name: &str) -> Option<DictionaryLocation> {
let res = NATRUAL_DICTIONARIES.iter().find(|d| match d {
DictionaryLocation::Hunspell(h) => h.name == name,
_ => false,
});

match res {
Some(d) => Some(d.clone()),
None => None,
}
}
1 change: 1 addition & 0 deletions codebook/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod dictionary;
mod dictionary_repo;
pub mod downloader;
mod log;
mod queries;
Expand Down
2 changes: 1 addition & 1 deletion codebook/src/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl LanguageType {
}

// Use https://intmainreturn0.com/ts-visualizer/ to help with writing grammar queries
pub static LANGUAGE_SETTINGS: [LanguageSetting; 8] = [
pub static LANGUAGE_SETTINGS: &[LanguageSetting] = &[
LanguageSetting {
type_: LanguageType::Rust,
name: "rust",
Expand Down

0 comments on commit a06808a

Please sign in to comment.