Skip to content

Commit

Permalink
Ignore words less than 3 chars long
Browse files Browse the repository at this point in the history
  • Loading branch information
blopker committed Jan 21, 2025
1 parent d68c9f1 commit 4b5a337
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
19 changes: 9 additions & 10 deletions codebook/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ impl CodeDictionary {
/// Return Vec of words and their start char and line
/// Skips URLs
fn get_words_from_text(&self, text: &str) -> Vec<(String, (u32, u32))> {
const MIN_WORD_LENGTH: usize = 3;
let mut words = Vec::new();
let mut current_word = String::new();
let mut word_start_char: u32 = 0;
Expand All @@ -184,6 +185,10 @@ impl CodeDictionary {
word_start_char: u32,
current_line: u32| {
if !current_word.is_empty() {
if current_word.len() < MIN_WORD_LENGTH {
current_word.clear();
return;
}
let split = splitter::split_camel_case(&current_word);
for split_word in split {
words.push((
Expand Down Expand Up @@ -349,15 +354,10 @@ mod dictionary_tests {
("calc", (23, 1)),
("wrld", (28, 1)),
("I'm", (12, 2)),
("a", (16, 2)),
("contraction", (18, 2)),
("don't", (31, 2)),
("ignore", (37, 2)),
("me", (44, 2)),
("this", (12, 3)),
("is", (17, 3)),
("a", (20, 3)),
("rd", (23, 3)),
("line", (26, 3)),
];
let words = dict.get_words_from_text(text);
Expand Down Expand Up @@ -397,11 +397,10 @@ mod dictionary_tests {
let words = dict.get_words_from_text(text);
println!("{:?}", words);
assert_eq!(words[0].0, "I'm");
assert_eq!(words[1].0, "a");
assert_eq!(words[2].0, "contraction");
assert_eq!(words[3].0, "wouldn't");
assert_eq!(words[4].0, "you");
assert_eq!(words[5].0, "agree");
assert_eq!(words[1].0, "contraction");
assert_eq!(words[2].0, "wouldn't");
assert_eq!(words[3].0, "you");
assert_eq!(words[4].0, "agree");
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion codebook/tests/test_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ fn test_example_files() {
"example.md",
vec!["Wolrd", "bvd", "regulr", "splellin", "wolrd"],
),
("example.txt", vec!["Splellin", "bd"]),
("example.txt", vec!["Splellin"]),
("example.rs", vec!["birt", "calclate", "curent", "jalopin"]),
(
"example.go",
Expand Down
2 changes: 1 addition & 1 deletion codebook/tests/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ pub fn get_processor() -> CodeDictionary {
}

pub fn init_logging() {
env_logger::builder().is_test(true).try_init().unwrap();
let _ = env_logger::builder().is_test(true).try_init();
}

0 comments on commit 4b5a337

Please sign in to comment.