From 7651d2b2cbba41aa12f1c4d4eaf109d932175896 Mon Sep 17 00:00:00 2001 From: Caleb Hattingh Date: Wed, 24 Apr 2024 14:01:32 +0200 Subject: [PATCH 1/3] chore: update python versions in CI (#251) --- .github/workflows/ci.yml | 10 ++++++++-- noxfile.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2bbd0630..1b4b1e58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,16 +40,21 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.9] + python-version: ["3.12"] + allow-prereleases: [false] include: - os: ubuntu-latest - python-version: "3.12" + python-version: "3.13" + allow-prereleases: true - os: ubuntu-latest python-version: "3.11" + allow-prereleases: false - os: ubuntu-latest python-version: "3.10" + allow-prereleases: false - os: ubuntu-latest python-version: 3.8 + allow-prereleases: false runs-on: "${{ matrix.os }}" steps: - name: Harden Runner @@ -72,6 +77,7 @@ jobs: - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # 5.1.0 with: python-version: ${{ matrix.python-version }} + allow-prereleases: ${{ matrix.allow-prereleases }} - uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07 with: diff --git a/noxfile.py b/noxfile.py index 61652492..388d359d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,7 +1,7 @@ import nox -@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"]) +@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) def test(session): session.install("-rrequirements-dev.txt") session.install("-e", ".", "--no-build-isolation") From deb88ccdcdbbb1aad0bca3e691bc58bfaca23133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?A=C3=A9cio=20Santos?= <150570+aecio@users.noreply.github.com> Date: Wed, 24 Apr 2024 05:12:24 -0700 Subject: [PATCH 2/3] Expose Tantivy's DisjunctionMaxQuery (#244) Co-authored-by: Caleb Hattingh --- src/query.rs | 29 ++++++++++++++++++++++++++++- tantivy/tantivy.pyi | 5 +++++ tests/tantivy_test.py | 23 ++++++++++++++++++++++- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/query.rs b/src/query.rs index bf036fe9..a310d040 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,6 +1,8 @@ use crate::{make_term, Schema}; use pyo3::{ - exceptions, prelude::*, types::PyAny, types::PyString, types::PyTuple, + exceptions, + prelude::*, + types::{PyAny, PyFloat, PyString, PyTuple}, }; use tantivy as tv; @@ -151,4 +153,29 @@ impl Query { inner: Box::new(inner), }) } + + /// Construct a Tantivy's DisjunctionMaxQuery + #[staticmethod] + pub(crate) fn disjunction_max_query( + subqueries: Vec, + tie_breaker: Option<&PyFloat>, + ) -> PyResult { + let inner_queries: Vec> = subqueries + .iter() + .map(|query| query.inner.box_clone()) + .collect(); + + let dismax_query = if let Some(tie_breaker) = tie_breaker { + tv::query::DisjunctionMaxQuery::with_tie_breaker( + inner_queries, + tie_breaker.extract::()?, + ) + } else { + tv::query::DisjunctionMaxQuery::new(inner_queries) + }; + + Ok(Query { + inner: Box::new(dismax_query), + }) + } } diff --git a/tantivy/tantivy.pyi b/tantivy/tantivy.pyi index 466a7442..ee267b8d 100644 --- a/tantivy/tantivy.pyi +++ b/tantivy/tantivy.pyi @@ -209,6 +209,11 @@ class Query: def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query: pass + @staticmethod + def disjunction_max_query(subqueries: Sequence[Query], tie_breaker: Optional[float] = None) -> Query: + pass + + class Order(Enum): Asc = 1 Desc = 2 diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 90f3b63e..0124c2f7 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -877,4 +877,25 @@ def test_boolean_query(self, ram_index): with pytest.raises(TypeError, match = r"'Query' object cannot be converted to 'Occur'"): Query.boolean_query([ (query1, Occur.Must), - ]) \ No newline at end of file + ]) + + def test_disjunction_max_query(self, ram_index): + index = ram_index + + # query1 should match the doc: "The Old Man and the Sea" + query1 = Query.term_query(index.schema, "title", "sea") + # query2 should matches the doc: "Of Mice and Men" + query2 = Query.term_query(index.schema, "title", "mice") + # the disjunction max query should match both docs. + query = Query.disjunction_max_query([query1, query2]) + + result = index.searcher().search(query, 10) + assert len(result.hits) == 2 + + # the disjunction max query should also take a tie_breaker parameter + query = Query.disjunction_max_query([query1, query2], tie_breaker=0.5) + result = index.searcher().search(query, 10) + assert len(result.hits) == 2 + + with pytest.raises(TypeError, match = r"'str' object cannot be converted to 'Query'"): + query = Query.disjunction_max_query([query1, "not a query"], tie_breaker=0.5) From ed7374c7bd2277ccfc795e3fdada555f0e610f50 Mon Sep 17 00:00:00 2001 From: Caleb Hattingh Date: Wed, 24 Apr 2024 15:10:45 +0200 Subject: [PATCH 3/3] fix: incorrect test name for fuzzy_fields (#252) --- tests/tantivy_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 0124c2f7..806cd673 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -101,7 +101,7 @@ def test_parse_query_field_boosts(self, ram_index): == """Query(BooleanQuery { subqueries: [(Should, Boost(query=TermQuery(Term(field=0, type=Str, "winter")), boost=2.3)), (Should, TermQuery(Term(field=1, type=Str, "winter")))] })""" ) - def test_parse_query_field_boosts(self, ram_index): + def test_parse_query_fuzzy_fields(self, ram_index): query = ram_index.parse_query("winter", fuzzy_fields={"title": (True, 1, False)}) assert ( repr(query)