Skip to content

Commit

Permalink
Support fast text fields (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
GodTamIt authored Dec 28, 2023
1 parent 8e589c9 commit 3219743
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions src/schemabuilder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub(crate) struct SchemaBuilder {
pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
}

const NO_TOKENIZER_NAME: &str = "raw";
const TOKENIZER: &str = "default";
const RECORD: &str = "position";

Expand All @@ -53,6 +54,14 @@ impl SchemaBuilder {
/// stored (bool, optional): If true sets the field as stored, the
/// content of the field can be later restored from a Searcher.
/// Defaults to False.
/// fast (bool, optional): Set the text options as a fast field. A
/// fast field is a column-oriented fashion storage for tantivy.
/// Text fast fields will have the term ids stored in the fast
/// field. The fast field will be a multivalued fast field.
/// It is recommended to use the "raw" tokenizer, since it will
/// store the original text unchanged. The "default" tokenizer will
/// store the terms as lower case and this will be reflected in the
/// dictionary.
/// tokenizer_name (str, optional): The name of the tokenizer that
/// should be used to process the field. Defaults to 'default'
/// index_option (str, optional): Sets which information should be
Expand All @@ -68,19 +77,22 @@ impl SchemaBuilder {
#[pyo3(signature = (
name,
stored = false,
fast = false,
tokenizer_name = TOKENIZER,
index_option = RECORD
))]
fn add_text_field(
&mut self,
name: &str,
stored: bool,
fast: bool,
tokenizer_name: &str,
index_option: &str,
) -> PyResult<Self> {
let builder = &mut self.builder;
let options = SchemaBuilder::build_text_option(
stored,
fast,
tokenizer_name,
index_option,
)?;
Expand Down Expand Up @@ -296,6 +308,14 @@ impl SchemaBuilder {
/// stored (bool, optional): If true sets the field as stored, the
/// content of the field can be later restored from a Searcher.
/// Defaults to False.
/// fast (bool, optional): Set the text options as a fast field. A
/// fast field is a column-oriented fashion storage for tantivy.
/// Text fast fields will have the term ids stored in the fast
/// field. The fast field will be a multivalued fast field.
/// It is recommended to use the "raw" tokenizer, since it will
/// store the original text unchanged. The "default" tokenizer will
/// store the terms as lower case and this will be reflected in the
/// dictionary.
/// tokenizer_name (str, optional): The name of the tokenizer that
/// should be used to process the field. Defaults to 'default'
/// index_option (str, optional): Sets which information should be
Expand All @@ -311,19 +331,22 @@ impl SchemaBuilder {
#[pyo3(signature = (
name,
stored = false,
fast = false,
tokenizer_name = TOKENIZER,
index_option = RECORD
))]
fn add_json_field(
&mut self,
name: &str,
stored: bool,
fast: bool,
tokenizer_name: &str,
index_option: &str,
) -> PyResult<Self> {
let builder = &mut self.builder;
let options = SchemaBuilder::build_text_option(
stored,
fast,
tokenizer_name,
index_option,
)?;
Expand Down Expand Up @@ -482,6 +505,7 @@ impl SchemaBuilder {

fn build_text_option(
stored: bool,
fast: bool,
tokenizer_name: &str,
index_option: &str,
) -> PyResult<schema::TextOptions> {
Expand All @@ -506,6 +530,17 @@ impl SchemaBuilder {
options
};

let options = if fast {
let text_tokenizer = if tokenizer_name != NO_TOKENIZER_NAME {
Some(tokenizer_name)
} else {
None
};
options.set_fast(text_tokenizer)
} else {
options
};

Ok(options)
}
}

0 comments on commit 3219743

Please sign in to comment.