From 766839bf4bb26416028261431456cdf083f40303 Mon Sep 17 00:00:00 2001 From: travolin Date: Fri, 22 Nov 2024 09:24:54 -0800 Subject: [PATCH] Add paging to results (#557) * add page offset for paging based on selection * add check for empty queries --------- Co-authored-by: travolin --- .../src/pages/bigmode/BigMode.tsx | 1 + .../src/pages/search/SearchPage.tsx | 32 ++++++ apps/tauri/src/cmd.rs | 2 + crates/shared/src/request.rs | 1 + crates/spyglass-searcher/src/client/local.rs | 3 +- crates/spyglass-searcher/src/lib.rs | 7 +- crates/spyglass/src/api/handler/search.rs | 104 +++++++++--------- 7 files changed, 97 insertions(+), 53 deletions(-) diff --git a/apps/desktop-client/src/pages/bigmode/BigMode.tsx b/apps/desktop-client/src/pages/bigmode/BigMode.tsx index fd3f5fca5..13fbbb596 100644 --- a/apps/desktop-client/src/pages/bigmode/BigMode.tsx +++ b/apps/desktop-client/src/pages/bigmode/BigMode.tsx @@ -111,6 +111,7 @@ export function BigMode() { const resp = await invoke("search_docs", { query, lenses: selectedLenses, + offset: 0, }); setResultMode(ResultDisplayMode.Documents); setDocResults(resp.results); diff --git a/apps/desktop-client/src/pages/search/SearchPage.tsx b/apps/desktop-client/src/pages/search/SearchPage.tsx index e64291f18..6d3d54ec7 100644 --- a/apps/desktop-client/src/pages/search/SearchPage.tsx +++ b/apps/desktop-client/src/pages/search/SearchPage.tsx @@ -45,6 +45,7 @@ export function SearchPage() { const [selectedActionIdx, setSelectedActionIdx] = useState(0); const [searchMeta, setSearchMeta] = useState(null); + const [offset, setOffset] = useState(0); const [query, setQuery] = useState(""); @@ -64,6 +65,7 @@ export function SearchPage() { setShowActions(false); setSelectedActionIdx(0); setSearchMeta(null); + setOffset(0); await requestResize(); }, []); @@ -192,6 +194,35 @@ export function SearchPage() { setShowActions(false); }; + useEffect(() => { + if (resultMode === ResultDisplayMode.Documents) { + const doc_count = docResults.length; + const max = doc_count - 1; + if (selectedIdx === max) { + const remainder = doc_count % 5; + if (remainder === 0) { + setOffset(doc_count); + } + } + } + }, [selectedIdx, resultMode, docResults.length]); + + useEffect(() => { + invoke("search_docs", { + query, + lenses: selectedLenses, + offset, + }).then((resp: SearchResults) => { + setDocResults((results: SearchResult[]) => { + const values = [...results]; + for (const result of resp.results) { + values.push(result); + } + return values; + }); + }); + }, [offset]); + // when the query changes shoot it over to the server. useEffect(() => { if (query.length === 0) { @@ -218,6 +249,7 @@ export function SearchPage() { const resp = await invoke("search_docs", { query, lenses: selectedLenses, + offset: 0, }); setResultMode(ResultDisplayMode.Documents); setDocResults(resp.results); diff --git a/apps/tauri/src/cmd.rs b/apps/tauri/src/cmd.rs index f59bf5b93..e5606fc06 100644 --- a/apps/tauri/src/cmd.rs +++ b/apps/tauri/src/cmd.rs @@ -140,11 +140,13 @@ pub async fn search_docs<'r>( win: tauri::Window, lenses: Vec, query: &str, + offset: u32, ) -> Result { if let Some(rpc) = win.app_handle().try_state::() { let data = request::SearchParam { lenses, query: query.to_string(), + offset: Some(offset), }; let rpc = rpc.lock().await; diff --git a/crates/shared/src/request.rs b/crates/shared/src/request.rs index 441f05b8e..bcd2bf591 100644 --- a/crates/shared/src/request.rs +++ b/crates/shared/src/request.rs @@ -6,6 +6,7 @@ use strum_macros::{Display, EnumString}; pub struct SearchParam { pub lenses: Vec, pub query: String, + pub offset: Option, } #[derive(Debug, Deserialize, Serialize)] diff --git a/crates/spyglass-searcher/src/client/local.rs b/crates/spyglass-searcher/src/client/local.rs index 1bc4bcd95..2bc374977 100644 --- a/crates/spyglass-searcher/src/client/local.rs +++ b/crates/spyglass-searcher/src/client/local.rs @@ -101,6 +101,7 @@ impl SearchTrait for Searcher { filters: &[QueryBoost], boosts: &[QueryBoost], num_results: usize, + offset: usize, ) -> SearchQueryResult { let start_timer = Instant::now(); @@ -116,7 +117,7 @@ impl SearchTrait for Searcher { QueryOptions::default(), ); - let collector = TopDocs::with_limit(num_results); + let collector = TopDocs::with_limit(num_results).and_offset(offset); let top_docs = searcher .search(&query, &collector) diff --git a/crates/spyglass-searcher/src/lib.rs b/crates/spyglass-searcher/src/lib.rs index eac6f23ff..7ef208fbb 100644 --- a/crates/spyglass-searcher/src/lib.rs +++ b/crates/spyglass-searcher/src/lib.rs @@ -102,6 +102,7 @@ pub trait SearchTrait { filters: &[QueryBoost], boosts: &[QueryBoost], num_results: usize, + offset: usize, ) -> SearchQueryResult; } @@ -284,7 +285,7 @@ mod test { let query = "salinas"; let filters = vec![QueryBoost::new(Boost::Tag(2_u64))]; - let results = searcher.search(query, &filters, &[], 5).await; + let results = searcher.search(query, &filters, &[], 5, 0).await; assert_eq!(results.documents.len(), 1); } @@ -297,7 +298,7 @@ mod test { let query = "salinas"; let filters = vec![QueryBoost::new(Boost::Tag(2_u64))]; - let results = searcher.search(query, &filters, &[], 5).await; + let results = searcher.search(query, &filters, &[], 5, 0).await; assert_eq!(results.documents.len(), 1); } @@ -310,7 +311,7 @@ mod test { let query = "salinasd"; let filters = vec![QueryBoost::new(Boost::Tag(2_u64))]; - let results = searcher.search(query, &filters, &[], 5).await; + let results = searcher.search(query, &filters, &[], 5, 0).await; assert_eq!(results.documents.len(), 0); } } diff --git a/crates/spyglass/src/api/handler/search.rs b/crates/spyglass/src/api/handler/search.rs index 4af72bd22..1ae71c978 100644 --- a/crates/spyglass/src/api/handler/search.rs +++ b/crates/spyglass/src/api/handler/search.rs @@ -64,63 +64,69 @@ pub async fn search_docs( } if let Some(embedding_api) = state.embedding_api.load_full().as_ref() { - match embedding_api - .embed(&query, EmbeddingContentType::Query) - .map(|embedding| embedding.first().map(|val| val.to_owned())) - { - Ok(Some(embedding)) => { - let mut distances = vec_documents::get_document_distance( - &state.db, - &lens_ids, - &embedding.embedding, - 10, - ) - .await; - - match distances.as_mut() { - Ok(distances) => { - let mut distances = distances - .iter() - .filter(|dist| dist.distance < 25.0) - .collect::>(); - distances.sort_by(|a, b| a.distance.total_cmp(&b.distance)); - - let min_value = distances - .iter() - .map(|distance| distance.distance) - .reduce(f64::min); - let max_value = distances - .iter() - .map(|distance| distance.distance) - .reduce(f64::max); - if let (Some(min), Some(max)) = (min_value, max_value) { - for distance in distances { - let boost_normalized = - (distance.distance - min) / (max - min) * 3.0; - let boost = 3.0 - boost_normalized; - - boosts.push(QueryBoost::with_value( - Boost::DocId(distance.doc_id.clone()), - boost as f32, - )); + if !query.trim().is_empty() { + match embedding_api + .embed(&query, EmbeddingContentType::Query) + .map(|embedding| embedding.first().map(|val| val.to_owned())) + { + Ok(Some(embedding)) => { + let mut distances = vec_documents::get_document_distance( + &state.db, + &lens_ids, + &embedding.embedding, + 10, + ) + .await; + + match distances.as_mut() { + Ok(distances) => { + let mut distances = distances + .iter() + .filter(|dist| dist.distance < 25.0) + .collect::>(); + distances.sort_by(|a, b| a.distance.total_cmp(&b.distance)); + + let min_value = distances + .iter() + .map(|distance| distance.distance) + .reduce(f64::min); + let max_value = distances + .iter() + .map(|distance| distance.distance) + .reduce(f64::max); + if let (Some(min), Some(max)) = (min_value, max_value) { + for distance in distances { + let boost_normalized = + (distance.distance - min) / (max - min) * 3.0; + let boost = 3.0 - boost_normalized; + + boosts.push(QueryBoost::with_value( + Boost::DocId(distance.doc_id.clone()), + boost as f32, + )); + } } } - } - Err(error) => { - log::error!("Error accessing distances {:?}", error); + Err(error) => { + log::error!("Error accessing distances {:?}", error); + } } } - } - Ok(None) => { - log::error!("No embedding could be generated"); - } - Err(err) => { - log::error!("Error embedding query {:?}", err); + Ok(None) => { + log::error!("No embedding could be generated"); + } + Err(err) => { + log::error!("Error embedding query {:?}", err); + } } } } - let search_result = state.index.search(&query, &filters, &boosts, 5).await; + let offset = search_req.offset.unwrap_or(0); + let search_result = state + .index + .search(&query, &filters, &boosts, 5, offset as usize) + .await; log::debug!( "query {}: {} results from {} docs in {}ms", query,