From 1004215bdf7642575eb63bf649d96fedd7b17b3d Mon Sep 17 00:00:00 2001 From: David Leifker Date: Thu, 27 Feb 2025 18:37:01 -0600 Subject: [PATCH] feat(graphql): implement sort and fact for scroll --- .../GetMetadataAnalyticsResolver.java | 2 +- .../container/ContainerEntitiesResolver.java | 3 +- .../ListDataProductAssetsResolver.java | 1 - .../domain/DomainEntitiesResolver.java | 3 +- .../search/GetQuickFiltersResolver.java | 3 +- .../search/ScrollAcrossEntitiesResolver.java | 3 + .../search/SearchAcrossEntitiesResolver.java | 3 +- .../graphql/resolvers/search/SearchUtils.java | 21 +- .../siblings/SiblingsSearchResolver.java | 3 + .../src/main/resources/search.graphql | 5 + .../ContainerEntitiesResolverTest.java | 3 +- .../domain/DomainEntitiesResolverTest.java | 3 +- .../search/GetQuickFiltersResolverTest.java | 3 +- .../VersionsSearchResolverTest.java | 43 ++-- docs/api/graphql/graphql-best-practices.md | 44 +++- .../metadata/client/JavaEntityClient.java | 26 +-- .../metadata/search/LineageSearchService.java | 3 +- .../metadata/search/SearchService.java | 188 ++++++++++++------ .../client/CachingEntitySearchService.java | 31 +-- .../elasticsearch/ElasticSearchService.java | 12 +- .../elasticsearch/query/ESSearchDAO.java | 8 +- .../request/AggregationQueryBuilder.java | 8 +- .../query/request/SearchRequestHandler.java | 4 +- .../fixtures/SampleDataFixtureTestBase.java | 69 ++++++- .../search/query/SearchDAOTestBase.java | 2 +- .../request/SearchRequestHandlerTest.java | 16 +- .../test/search/SearchTestUtils.java | 25 ++- .../datahub/authorization/PolicyFetcher.java | 1 + .../authorization/DataHubAuthorizerTest.java | 7 + .../elastic/OperationsController.java | 8 +- .../linkedin/entity/client/EntityClient.java | 40 +++- .../entity/client/RestliEntityClient.java | 25 +-- .../metadata/search/EntitySearchService.java | 78 +++++++- .../SearchBasedFormAssignmentManager.java | 1 + .../search/EntitySearchServiceTest.java | 26 ++- .../gms/servlet/ConfigSearchExport.java | 3 +- 36 files changed, 519 insertions(+), 205 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java index 6045b1e726c7a..74f5a81dc0150 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java @@ -78,7 +78,7 @@ private List getCharts(MetadataAnalyticsInput input, OperationCo SearchResult searchResult = _entityClient.searchAcrossEntities( - opContext, entities, query, filter, 0, 0, Collections.emptyList(), null); + opContext, entities, query, filter, 0, 0, Collections.emptyList()); List aggregationMetadataList = searchResult.getMetadata().getAggregations(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java index 82a476ec56ddc..82d3dc642c99e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java @@ -91,8 +91,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro new CriterionArray(ImmutableList.of(filterCriterion))))), start, count, - Collections.emptyList(), - null)); + Collections.emptyList())); } catch (Exception e) { throw new RuntimeException( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java index e59f7b3116acd..f544b4a4f1f89 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java @@ -185,7 +185,6 @@ public CompletableFuture get(DataFetchingEnvironment environment) finalFilter, start, count, - null, null)); results .getSearchResults() diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index c27fa1d195a76..ed79829ddc94a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -95,8 +95,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro new ConjunctiveCriterion().setAnd(criteria))), start, count, - Collections.emptyList(), - null)); + Collections.emptyList())); } catch (Exception e) { throw new RuntimeException( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java index b07e3fa912641..27f96f588082b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java @@ -108,8 +108,7 @@ private SearchResult getSearchResults( : null, 0, 0, - Collections.emptyList(), - null); + Collections.emptyList()); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java index 77eef1b9a25c6..180ba39fb8588 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java @@ -16,6 +16,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import graphql.schema.DataFetcher; @@ -80,6 +81,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) } else { searchFlags = null; } + List sortCriteria = SearchUtils.getSortCriteria(input.getSortInput()); try { log.debug( @@ -108,6 +110,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) : baseFilter, scrollId, keepAlive, + sortCriteria, count)); } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index 29bc3a82a1649..791215a78764e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -146,8 +146,7 @@ private List getStructuredPropertyFacets(final QueryContext context) { createStructuredPropertyFilter(), 0, 100, - Collections.emptyList(), - null); + Collections.emptyList()); return result.getEntities().stream() .map(entity -> String.format("structuredProperties.%s", entity.getEntity().getId())) .collect(Collectors.toList()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index 1591b206d99b9..3cd5043ee73b8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -378,6 +378,8 @@ public static CompletableFuture scrollAcrossEntities( Integer inputCount, String scrollId, String inputKeepAlive, + List sortCriteria, + List facets, String className) { final List entityTypes = @@ -431,7 +433,15 @@ public static CompletableFuture scrollAcrossEntities( try { final ScrollResult scrollResult = _entityClient.scrollAcrossEntities( - context, finalEntityNames, query, finalFilters, scrollId, keepAlive, count); + context, + finalEntityNames, + query, + finalFilters, + scrollId, + keepAlive, + sortCriteria, + count, + facets); return UrnScrollResultsMapper.map(inputContext, scrollResult); } catch (Exception e) { log.warn( @@ -518,14 +528,7 @@ public static CompletableFuture searchAcrossEntities( try { final SearchResult searchResult = _entityClient.searchAcrossEntities( - context, - finalEntityNames, - query, - finalFilters, - start, - count, - sortCriteria, - null); + context, finalEntityNames, query, finalFilters, start, count, sortCriteria); return UrnSearchResultsMapper.map(inputContext, searchResult); } catch (Exception e) { log.warn( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java index 6e1425bc44166..2d7b1a4354c9c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.utils.CriterionUtils; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.List; import java.util.concurrent.CompletableFuture; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -61,6 +62,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getCount(), input.getScrollId(), input.getKeepAlive(), + List.of(), + List.of(), this.getClass().getSimpleName()); } } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 34169d8c6d18a..c0bec68cc23c5 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -347,6 +347,11 @@ input ScrollAcrossEntitiesInput { Flags controlling search options """ searchFlags: SearchFlags + + """ + Optional - Information on how to sort this search result + """ + sortInput: SearchSortInput } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java index 5af236d7e81e5..654a22a078e3c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java @@ -60,8 +60,7 @@ public void testGetSuccess() throws Exception { new CriterionArray(ImmutableList.of(filterCriterion)))))), Mockito.eq(0), Mockito.eq(20), - Mockito.eq(Collections.emptyList()), - Mockito.eq(null))) + Mockito.eq(Collections.emptyList()))) .thenReturn( new SearchResult() .setFrom(0) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java index 5be65703846a9..dafcea37adcb7 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java @@ -65,8 +65,7 @@ public void testGetSuccess() throws Exception { new CriterionArray(ImmutableList.of(filterCriterion)))))), Mockito.eq(0), Mockito.eq(20), - Mockito.eq(Collections.emptyList()), - Mockito.eq(null))) + Mockito.eq(Collections.emptyList()))) .thenReturn( new SearchResult() .setFrom(0) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java index 64042e82bbfe8..96ae7c231549a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java @@ -301,8 +301,7 @@ private static EntityClient initMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(Collections.emptyList()), - Mockito.eq(null))) + Mockito.eq(Collections.emptyList()))) .thenReturn(result); return client; } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java index 3554df074df69..04abb5b5e7231 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java @@ -4,6 +4,7 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.utils.CriterionUtils.*; import static org.mockito.ArgumentMatchers.*; +import static org.mockito.ArgumentMatchers.anyString; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertThrows; @@ -39,6 +40,7 @@ import com.linkedin.view.DataHubViewInfo; import com.linkedin.view.DataHubViewType; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -144,8 +146,7 @@ public void testGetSuccessBasic() throws Exception { List.of( new com.linkedin.metadata.query.filter.SortCriterion() .setField(VERSION_SORT_ID_FIELD_NAME) - .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), - any()); + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING)))); } @Test @@ -240,8 +241,7 @@ public void testGetSuccessComplex() throws Exception { .setOrder(com.linkedin.metadata.query.filter.SortOrder.ASCENDING), new com.linkedin.metadata.query.filter.SortCriterion() .setField(VERSION_SORT_ID_FIELD_NAME) - .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), - any()); + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING)))); } @Test @@ -251,7 +251,7 @@ public void testThrowsError() throws Exception { Mockito.when( mockEntityClient.searchAcrossEntities( - any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any(), any())) + any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any())) .thenThrow(new RemoteInvocationException()); VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); @@ -271,23 +271,36 @@ public void testThrowsError() throws Exception { private EntityClient initMockEntityClient() throws Exception { EntityClient client = Mockito.mock(EntityClient.class); + SearchResult result = + new SearchResult() + .setEntities(new SearchEntityArray()) + .setNumEntities(0) + .setFrom(0) + .setPageSize(0) + .setMetadata(new SearchResultMetadata()); + Mockito.when( client.searchAcrossEntities( - any(), + any(OperationContext.class), any(), Mockito.anyString(), - any(), + any(Filter.class), Mockito.anyInt(), Mockito.anyInt(), + anyList(), + anyList())) + .thenReturn(result); + + Mockito.when( + client.searchAcrossEntities( + any(OperationContext.class), any(), - Mockito.eq(null))) - .thenReturn( - new SearchResult() - .setEntities(new SearchEntityArray()) - .setNumEntities(0) - .setFrom(0) - .setPageSize(0) - .setMetadata(new SearchResultMetadata())); + anyString(), + any(Filter.class), + anyInt(), + anyInt(), + anyList())) + .thenReturn(result); return client; } diff --git a/docs/api/graphql/graphql-best-practices.md b/docs/api/graphql/graphql-best-practices.md index d4c85d52e29f9..a39cba3ad7190 100644 --- a/docs/api/graphql/graphql-best-practices.md +++ b/docs/api/graphql/graphql-best-practices.md @@ -33,7 +33,14 @@ This technique makes maintaining your GraphQL queries much more doable. For exam `search*` APIs such as [`searchAcrossEntities`](https://datahubproject.io/docs/GraphQL/queries/#searchacrossentities) are designed for minimal pagination (< ~50). They do not perform well for deep pagination requests. Use the equivalent `scroll*` APIs such as [`scrollAcrossEntities`](https://datahubproject.io/docs/GraphQL/queries/#scrollacrossentities) when expecting the need to paginate deeply into the result set. -Note: that it is impossible to use `search*` for paginating beyond 10k results. +:::note +It is impossible to use `search*` for paginating beyond 10k results. +::: + +:::caution +In order to `scroll*` through the entire result set it is required to use a stable sort order. This means using `_score` as +the first sort order cannot be used. Use the `urn` field as the sort order instead. +::: #### Examples @@ -54,7 +61,15 @@ Page 1 Request: orFilters: [ { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } - ] + ], + sortInput: { + sortCriteria: [ + { + field: "urn", + sortOrder: ASCENDING + } + ] + } } ) { nextScrollId @@ -110,7 +125,15 @@ Page 2 Request: orFilters: [ { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } - ] + ], + sortInput: { + sortCriteria: [ + { + field: "urn", + sortOrder: ASCENDING + } + ] + } } ) { nextScrollId @@ -282,7 +305,20 @@ Example for skipping highlighting and aggregates, typically used for scrolling s ```graphql { scrollAcrossEntities( - input: {types: [DATASET], count: 2, query: "pet", searchFlags: {skipAggregates: true, skipHighlighting: true}} + input: { + types: [DATASET], + count: 2, + query: "pet", + searchFlags: {skipAggregates: true, skipHighlighting: true}, + sortInput: { + sortCriteria: [ + { + field: "urn", + sortOrder: ASCENDING + } + ] + }, + } ) { searchResults { entity { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 71e1aea59c711..d643b4eceadf5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -460,21 +460,6 @@ public SearchResult search( entityService); } - @Override - @Nonnull - public SearchResult searchAcrossEntities( - @Nonnull OperationContext opContext, - @Nonnull List entities, - @Nonnull String input, - @Nullable Filter filter, - int start, - int count, - List sortCriteria) - throws RemoteInvocationException { - return searchAcrossEntities( - opContext, entities, input, filter, start, count, sortCriteria, null); - } - /** * Searches for entities matching to a given query and filters across multiple entity types * @@ -497,7 +482,7 @@ public SearchResult searchAcrossEntities( int start, int count, List sortCriteria, - @Nullable List facets) + @Nonnull List facets) throws RemoteInvocationException { return ValidationUtils.validateSearchResult( @@ -523,7 +508,9 @@ public ScrollResult scrollAcrossEntities( @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, - int count) + List sortCriteria, + int count, + @Nullable List facets) throws RemoteInvocationException { return ValidationUtils.validateScrollResult( @@ -533,10 +520,11 @@ public ScrollResult scrollAcrossEntities( entities, input, filter, - null, + sortCriteria, scrollId, keepAlive, - count), + count, + facets), entityService); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index f77b5097db80c..9ae2618e1f07c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -838,7 +838,8 @@ private LineageScrollResult getScrollResultInBatches( sortCriteria, scrollId, keepAlive, - querySize), + querySize, + List.of()), urnToRelationship); querySize = Math.max(0, size - resultForBatch.getEntities().size()); finalResult = mergeScrollResult(finalResult, resultForBatch); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index 731517ba3290f..84a285f574e4e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -87,7 +87,7 @@ public SearchResult search( } SearchResult result = _cachingEntitySearchService.search( - opContext, entitiesToSearch, input, postFilters, sortCriteria, from, size, null); + opContext, entitiesToSearch, input, postFilters, sortCriteria, from, size, List.of()); try { return result @@ -141,18 +141,8 @@ public SearchResult searchAcrossEntities( String.format( "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", entities, input, postFilters, sortCriteria, from, size)); - // DEPRECATED - // This is the legacy version of `_entityType`-- it operates as a special case and does not - // support ORs, Unions, etc. - // We will still provide it for backwards compatibility but when sending filters to the backend - // use the new - // filter name `_entityType` that we provide above. This is just provided to prevent a breaking - // change for old clients. - boolean aggregateByLegacyEntityFacet = facets != null && facets.contains("entity"); - if (aggregateByLegacyEntityFacet) { - facets = new ArrayList<>(facets); - facets.add(INDEX_VIRTUAL_FIELD); - } + + final List finalFacets = facetInput(facets); List nonEmptyEntities = getEntitiesToSearch(opContext, entities, size); if (nonEmptyEntities.isEmpty()) { // Optimization: If the indices are all empty, return empty result @@ -160,49 +150,12 @@ public SearchResult searchAcrossEntities( } SearchResult result = _cachingEntitySearchService.search( - opContext, nonEmptyEntities, input, postFilters, sortCriteria, from, size, facets); - if (facets == null || facets.contains("entity") || facets.contains("_entityType")) { - Optional entityTypeAgg = - result.getMetadata().getAggregations().stream() - .filter(aggMeta -> aggMeta.getName().equals(INDEX_VIRTUAL_FIELD)) - .findFirst(); - if (entityTypeAgg.isPresent()) { - LongMap numResultsPerEntity = entityTypeAgg.get().getAggregations(); - result - .getMetadata() - .getAggregations() - .add( - new AggregationMetadata() - .setName("entity") - .setDisplayName("Type") - .setAggregations(numResultsPerEntity) - .setFilterValues( - new FilterValueArray( - SearchUtil.convertToFilters( - numResultsPerEntity, Collections.emptySet())))); - } else { - // Should not happen due to the adding of the _entityType aggregation before, but if it - // does, best-effort count of entity types - // Will not include entity types that had 0 results - Map numResultsPerEntity = - result.getEntities().stream() - .collect( - Collectors.groupingBy( - entity -> entity.getEntity().getEntityType(), Collectors.counting())); - result - .getMetadata() - .getAggregations() - .add( - new AggregationMetadata() - .setName("entity") - .setDisplayName("Type") - .setAggregations(new LongMap(numResultsPerEntity)) - .setFilterValues( - new FilterValueArray( - SearchUtil.convertToFilters( - numResultsPerEntity, Collections.emptySet())))); - } - } + opContext, nonEmptyEntities, input, postFilters, sortCriteria, from, size, finalFacets); + result + .getMetadata() + .setAggregations( + withAdditionalAggregates( + result.getEntities(), result.getMetadata().getAggregations(), finalFacets)); return result; } @@ -242,7 +195,6 @@ public List getEntitiesToSearch( * @return a {@link ScrollResult} that contains a list of matched documents and related search * result metadata */ - @Nonnull public ScrollResult scrollAcrossEntities( @Nonnull OperationContext opContext, @Nonnull Collection entities, @@ -252,6 +204,46 @@ public ScrollResult scrollAcrossEntities( @Nullable String scrollId, @Nullable String keepAlive, int size) { + return scrollAcrossEntities( + opContext, + entities, + input, + postFilters, + sortCriteria, + scrollId, + keepAlive, + size, + List.of()); + } + + /** + * Gets a list of documents that match given search request across multiple entities. The results + * are aggregated and filters are applied to the search hits and not the aggregation results. + * + * @param entities list of entities to search (If empty, searches across all entities) + * @param input the search input text + * @param postFilters the request map with fields and values as filters to be applied to search + * hits + * @param sortCriteria list of {@link SortCriterion} to be applied to search results + * @param scrollId opaque scroll identifier for passing to search backend + * @param size the number of search hits to return + * @param facets list of facets we want aggregations for + * @return a {@link ScrollResult} that contains a list of matched documents and related search + * result metadata + */ + @Nonnull + public ScrollResult scrollAcrossEntities( + @Nonnull OperationContext opContext, + @Nonnull Collection entities, + @Nonnull String input, + @Nullable Filter postFilters, + List sortCriteria, + @Nullable String scrollId, + @Nullable String keepAlive, + int size, + @Nullable List facets) { + + final List finalFacets = facetInput(facets); log.debug( String.format( "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, from: %s, size: %s", @@ -261,8 +253,86 @@ public ScrollResult scrollAcrossEntities( // No indices with non-zero entries: skip querying and return empty result return getEmptyScrollResult(size); } - return _cachingEntitySearchService.scroll( - opContext, entitiesToSearch, input, postFilters, sortCriteria, scrollId, keepAlive, size); + ScrollResult result = + _cachingEntitySearchService.scroll( + opContext, + entitiesToSearch, + input, + postFilters, + sortCriteria, + scrollId, + keepAlive, + size, + finalFacets); + + result + .getMetadata() + .setAggregations( + withAdditionalAggregates( + result.getEntities(), result.getMetadata().getAggregations(), finalFacets)); + return result; + } + + @Nonnull + private static List facetInput(@Nullable List facets) { + // DEPRECATED + // This is the legacy version of `_entityType`-- it operates as a special case and does not + // support ORs, Unions, etc. + // We will still provide it for backwards compatibility but when sending filters to the backend + // use the new + // filter name `_entityType` that we provide above. This is just provided to prevent a breaking + // change for old clients. + boolean aggregateByLegacyEntityFacet = facets != null && facets.contains("entity"); + if (aggregateByLegacyEntityFacet) { + facets = new ArrayList<>(facets); + facets.add(INDEX_VIRTUAL_FIELD); + } + return facets == null ? List.of() : facets; + } + + @Nonnull + private static AggregationMetadataArray withAdditionalAggregates( + @Nonnull SearchEntityArray entities, + @Nullable AggregationMetadataArray aggregates, + @Nonnull List facets) { + AggregationMetadataArray aggregationMetadata = + aggregates == null ? new AggregationMetadataArray() : aggregates; + + if (facets.isEmpty() || facets.contains("entity") || facets.contains("_entityType")) { + Optional entityTypeAgg = + aggregationMetadata.stream() + .filter(aggMeta -> aggMeta.getName().equals(INDEX_VIRTUAL_FIELD)) + .findFirst(); + if (entityTypeAgg.isPresent()) { + LongMap numResultsPerEntity = entityTypeAgg.get().getAggregations(); + aggregationMetadata.add( + new AggregationMetadata() + .setName("entity") + .setDisplayName("Type") + .setAggregations(numResultsPerEntity) + .setFilterValues( + new FilterValueArray( + SearchUtil.convertToFilters(numResultsPerEntity, Collections.emptySet())))); + } else { + // Should not happen due to the adding of the _entityType aggregation before, but if it + // does, best-effort count of entity types + // Will not include entity types that had 0 results + Map numResultsPerEntity = + entities.stream() + .collect( + Collectors.groupingBy( + entity -> entity.getEntity().getEntityType(), Collectors.counting())); + aggregationMetadata.add( + new AggregationMetadata() + .setName("entity") + .setDisplayName("Type") + .setAggregations(new LongMap(numResultsPerEntity)) + .setFilterValues( + new FilterValueArray( + SearchUtil.convertToFilters(numResultsPerEntity, Collections.emptySet())))); + } + } + return aggregationMetadata; } private static SearchResult getEmptySearchResult(int from, int size) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 7272809bb1221..8a5a90c234828 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -22,6 +22,7 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import org.apache.commons.collections.CollectionUtils; +import org.javatuples.Octet; import org.javatuples.Septet; import org.javatuples.Sextet; import org.springframework.cache.Cache; @@ -63,7 +64,7 @@ public SearchResult search( List sortCriteria, int from, int size, - @Nullable List facets) { + @Nonnull List facets) { return getCachedSearchResults( opContext, entityNames, query, filters, sortCriteria, from, size, facets); } @@ -131,9 +132,10 @@ public ScrollResult scroll( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size) { + int size, + @Nonnull List facets) { return getCachedScrollResults( - opContext, entities, query, filters, sortCriteria, scrollId, keepAlive, size); + opContext, entities, query, filters, sortCriteria, scrollId, keepAlive, size, facets); } /** @@ -150,7 +152,7 @@ public SearchResult getCachedSearchResults( List sortCriteria, int from, int size, - @Nullable List facets) { + @Nonnull List facets) { return new CacheableSearcher<>( cacheManager.getCache(ENTITY_SEARCH_SERVICE_SEARCH_CACHE_NAME), batchSize, @@ -276,7 +278,8 @@ public ScrollResult getCachedScrollResults( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size) { + int size, + @Nonnull List facets) { return opContext.withSpan( "getScrollResults", @@ -291,13 +294,14 @@ public ScrollResult getCachedScrollResults( if (enableCache(opContext.getSearchContext().getSearchFlags())) { Object cacheKey = - Septet.with( + Octet.with( opContext.getSearchContextId(), entities, query, filters != null ? toJsonString(filters) : null, CollectionUtils.isNotEmpty(sortCriteria) ? toJsonString(sortCriteria) : null, scrollId, + facets, size); String json = cache.get(cacheKey, String.class); result = json != null ? toRecordTemplate(ScrollResult.class, json) : null; @@ -313,7 +317,8 @@ public ScrollResult getCachedScrollResults( scrollId, keepAlive, size, - isFullText); + isFullText, + facets); cache.put(cacheKey, toJsonString(result)); Span.current().setAttribute(CACHE_HIT_ATTR, false); MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc(); @@ -332,7 +337,8 @@ public ScrollResult getCachedScrollResults( scrollId, keepAlive, size, - isFullText); + isFullText, + facets); } return result; }, @@ -349,7 +355,7 @@ private SearchResult getRawSearchResults( final List sortCriteria, final int start, final int count, - @Nullable final List facets) { + @Nonnull final List facets) { return entitySearchService.search( opContext, entityNames, input, filters, sortCriteria, start, count, facets); } @@ -386,13 +392,14 @@ private ScrollResult getRawScrollResults( @Nullable final String scrollId, @Nullable final String keepAlive, final int count, - final boolean fulltext) { + final boolean fulltext, + @Nonnull List facets) { if (fulltext) { return entitySearchService.fullTextScroll( - opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count); + opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count, facets); } else { return entitySearchService.structuredScroll( - opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count); + opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count, facets); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index afdc40014d9a0..8ec6d4c699e37 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -150,7 +150,7 @@ public SearchResult search( List sortCriteria, int from, int size) { - return search(opContext, entityNames, input, postFilters, sortCriteria, from, size, null); + return search(opContext, entityNames, input, postFilters, sortCriteria, from, size, List.of()); } @Nonnull @@ -162,7 +162,7 @@ public SearchResult search( List sortCriteria, int from, int size, - @Nullable List facets) { + @Nonnull List facets) { log.debug( String.format( "Searching FullText Search documents entityName: %s, input: %s, postFilters: %s, sortCriteria: %s, from: %s, size: %s", @@ -344,7 +344,8 @@ public ScrollResult fullTextScroll( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size) { + int size, + @Nonnull List facets) { log.debug( String.format( "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, scrollId: %s, size: %s", @@ -374,7 +375,8 @@ public ScrollResult structuredScroll( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size) { + int size, + @Nonnull List facets) { log.debug( String.format( "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, scrollId: %s, size: %s", @@ -415,7 +417,7 @@ public ExplainResponse explain( @Nullable String scrollId, @Nullable String keepAlive, int size, - @Nullable List facets) { + @Nonnull List facets) { return esSearchDAO.explain( opContext.withSearchFlags( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index ae483ad7711b0..519322c572080 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -296,7 +296,7 @@ public SearchResult search( List sortCriteria, int from, int size, - @Nullable List facets) { + @Nonnull List facets) { final String finalInput = input.isEmpty() ? "*" : input; List entitySpecs = entityNames.stream() @@ -533,7 +533,7 @@ public ScrollResult scroll( entitySpecs, finalInput, sortCriteria, - null); + List.of()); // PIT specifies indices in creation so it doesn't support specifying indices on the // request, so @@ -564,7 +564,7 @@ private SearchRequest getScrollRequest( List entitySpecs, String finalInput, List sortCriteria, - @Nullable List facets) { + @Nonnull List facets) { String pitId = null; Object[] sort = null; if (scrollId != null) { @@ -645,7 +645,7 @@ public ExplainResponse explain( @Nullable String scrollId, @Nullable String keepAlive, int size, - @Nullable List facets) { + @Nonnull List facets) { IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 3d1c03524811c..d4854d47801e1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -76,7 +76,7 @@ public AggregationQueryBuilder( /** Get the set of default aggregations, across all facets. */ public List getAggregations(@Nonnull OperationContext opContext) { - return getAggregations(opContext, null); + return getAggregations(opContext, new ArrayList<>()); } /** @@ -84,15 +84,13 @@ public List getAggregations(@Nonnull OperationContext opCont * then get aggregations for all. */ public List getAggregations( - @Nonnull OperationContext opContext, @Nullable List facets) { + @Nonnull OperationContext opContext, @Nonnull List facets) { final Set facetsToAggregate = new HashSet<>(); if (Boolean.TRUE.equals( opContext.getSearchContext().getSearchFlags().isIncludeDefaultFacets())) { facetsToAggregate.addAll(defaultFacetFields); } - if (facets != null) { - facets.stream().filter(this::isValidAggregate).forEach(facetsToAggregate::add); - } + facets.stream().filter(this::isValidAggregate).forEach(facetsToAggregate::add); return facetsToAggregate.stream() .map(f -> facetToAggregationBuilder(opContext, f)) .collect(Collectors.toList()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 90ab6cd5979fa..2f2b64aa924f5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -229,7 +229,7 @@ public SearchRequest getSearchRequest( List sortCriteria, int from, int size, - @Nullable List facets) { + @Nonnull List facets) { SearchFlags searchFlags = opContext.getSearchContext().getSearchFlags(); SearchRequest searchRequest = new SearchRequest(); @@ -293,7 +293,7 @@ public SearchRequest getSearchRequest( @Nullable String pitId, @Nullable String keepAlive, int size, - @Nullable List facets) { + @Nonnull List facets) { SearchFlags searchFlags = opContext.getSearchContext().getSearchFlags(); SearchRequest searchRequest = new PITAwareSearchRequest(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 476f0114817be..88c5aa77fc18c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -1079,7 +1079,7 @@ public void testUnderscore() throws IOException { } @Test - public void testFacets() { + public void testSearchFacets() { Set expectedFacets = Set.of("entity", "typeNames", "platform", "origin", "tags"); SearchResult testResult = searchAcrossEntities(getOperationContext(), getSearchService(), "cypress"); @@ -1121,6 +1121,49 @@ public void testFacets() { assertEquals(entityAggMeta.getAggregations(), expectedEntityTypeCounts); } + @Test + public void testScrollFacets() { + Set expectedFacets = Set.of("entity", "typeNames", "platform", "origin", "tags"); + ScrollResult testResult = + scrollAcrossEntities(getOperationContext(), getSearchService(), "cypress"); + expectedFacets.forEach( + facet -> { + assertTrue( + testResult.getMetadata().getAggregations().stream() + .anyMatch(agg -> agg.getName().equals(facet)), + String.format( + "Failed to find facet `%s` in %s", + facet, + testResult.getMetadata().getAggregations().stream() + .map(AggregationMetadata::getName) + .collect(Collectors.toList()))); + }); + AggregationMetadata entityAggMeta = + testResult.getMetadata().getAggregations().stream() + .filter(aggMeta -> aggMeta.getName().equals("entity")) + .findFirst() + .get(); + Map expectedEntityTypeCounts = new HashMap<>(); + expectedEntityTypeCounts.put("container", 0L); + expectedEntityTypeCounts.put("corpuser", 0L); + expectedEntityTypeCounts.put("corpgroup", 0L); + expectedEntityTypeCounts.put("mlmodel", 0L); + expectedEntityTypeCounts.put("mlfeaturetable", 1L); + expectedEntityTypeCounts.put("mlmodelgroup", 1L); + expectedEntityTypeCounts.put("dataflow", 1L); + expectedEntityTypeCounts.put("glossarynode", 1L); + expectedEntityTypeCounts.put("mlfeature", 0L); + expectedEntityTypeCounts.put("datajob", 2L); + expectedEntityTypeCounts.put("domain", 0L); + expectedEntityTypeCounts.put("tag", 0L); + expectedEntityTypeCounts.put("glossaryterm", 2L); + expectedEntityTypeCounts.put("mlprimarykey", 1L); + expectedEntityTypeCounts.put("dataset", 9L); + expectedEntityTypeCounts.put("chart", 0L); + expectedEntityTypeCounts.put("dashboard", 0L); + assertEquals(entityAggMeta.getAggregations(), expectedEntityTypeCounts); + } + @Test public void testNestedAggregation() { Set expectedFacets = Set.of("platform"); @@ -2026,7 +2069,7 @@ public void testColumnExactMatch() { } @Test - public void testSortOrdering() { + public void testSearchSortOrdering() { String query = "unit_data"; SortCriterion criterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime"); @@ -2047,6 +2090,28 @@ public void testSortOrdering() { String.format("%s - Expected search results to have at least two results", query)); } + @Test + public void testScrollSortOrdering() { + String query = "unit_data"; + SortCriterion criterion = + new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime"); + ScrollResult result = + getSearchService() + .scrollAcrossEntities( + getOperationContext() + .withSearchFlags(flags -> flags.setFulltext(true).setSkipCache(true)), + SEARCHABLE_ENTITIES, + query, + null, + Collections.singletonList(criterion), + null, + null, + 100); + assertTrue( + result.getEntities().size() > 2, + String.format("%s - Expected search results to have at least two results", query)); + } + @Test public void testFilterOnHasValuesField() { assertNotNull(getSearchService()); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index 6779b8f3d825c..b1e677db4f854 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -436,7 +436,7 @@ public void testExplain() { null, null, 10, - null); + List.of()); assertNotNull(explainResponse); assertEquals(explainResponse.getIndex(), "smpldat_datasetindex_v2"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index f1e56e63502ff..af6f6edacc0dd 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -155,7 +155,7 @@ public void testCustomHighlights() { null, 0, 10, - null); + List.of()); SearchSourceBuilder sourceBuilder = searchRequest.source(); assertNotNull(sourceBuilder.highlighter()); assertEquals(4, sourceBuilder.highlighter().fields().size()); @@ -184,7 +184,7 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { null, 0, 10, - null); + List.of()); SearchSourceBuilder sourceBuilder = searchRequest.source(); assertEquals(sourceBuilder.from(), 0); assertEquals(sourceBuilder.size(), 10); @@ -231,7 +231,7 @@ public void testSearchRequestHandler() { null, 0, 10, - null); + List.of()); SearchSourceBuilder sourceBuilder = searchRequest.source(); assertEquals(sourceBuilder.from(), 0); assertEquals(sourceBuilder.size(), 10); @@ -412,7 +412,7 @@ private BoolQueryBuilder constructFilterQuery( null, "5m", 10, - null) + List.of()) .source() .query(); } else { @@ -426,7 +426,7 @@ private BoolQueryBuilder constructFilterQuery( null, 0, 10, - null) + List.of()) .source() .query(); } @@ -482,7 +482,7 @@ private BoolQueryBuilder constructRemovedQuery( null, "5m", 10, - null) + List.of()) .source() .query(); } else { @@ -496,7 +496,7 @@ private BoolQueryBuilder constructRemovedQuery( null, 0, 10, - null) + List.of()) .source() .query(); } @@ -1031,7 +1031,7 @@ private BoolQueryBuilder getQuery( null, 0, 10, - null) + List.of()) .source() .query(); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index f10c6e59bae7c..5338b8445aeaf 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -111,7 +111,7 @@ public static SearchResult searchAcrossEntities( null, 0, 100, - null); + List.of()); } public static SearchResult search( @@ -151,6 +151,29 @@ public static ScrollResult scroll( batchSize); } + public static ScrollResult scrollAcrossEntities( + OperationContext opContext, SearchService searchService, String query) { + return scrollAcrossEntities(opContext, searchService, SEARCHABLE_ENTITIES, query, null); + } + + public static ScrollResult scrollAcrossEntities( + OperationContext opContext, + SearchService searchService, + List entityNames, + String query, + Filter filter) { + return searchService.scrollAcrossEntities( + opContext.withSearchFlags( + flags -> flags.setFulltext(true).setSkipCache(true).setSkipHighlighting(false)), + entityNames, + query, + filter, + null, + null, + null, + 100); + } + public static SearchResult searchStructured( OperationContext opContext, SearchService searchService, String query) { return searchService.searchAcrossEntities( diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java index 6c281959feb70..ee503f91a46ad 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java @@ -98,6 +98,7 @@ public PolicyFetchResult fetchPolicies( filter, scrollId, null, + List.of(), count); List policyUrns = result.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList()); diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index 74232efc84d9c..7cf9ee800cc62 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -4,6 +4,7 @@ import static com.linkedin.metadata.authorization.PoliciesConfig.ACTIVE_POLICY_STATE; import static com.linkedin.metadata.authorization.PoliciesConfig.INACTIVE_POLICY_STATE; import static com.linkedin.metadata.authorization.PoliciesConfig.METADATA_POLICY_TYPE; +import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.ArgumentMatchers.isNull; import static org.mockito.ArgumentMatchers.nullable; @@ -173,6 +174,7 @@ public void setupTest() throws Exception { nullable(Filter.class), isNull(), isNull(), + anyList(), anyInt())) .thenReturn(policySearchResult1); when(_entityClient.scrollAcrossEntities( @@ -182,6 +184,7 @@ public void setupTest() throws Exception { nullable(Filter.class), eq("1"), isNull(), + anyList(), anyInt())) .thenReturn(policySearchResult2); when(_entityClient.scrollAcrossEntities( @@ -191,6 +194,7 @@ public void setupTest() throws Exception { nullable(Filter.class), eq("2"), isNull(), + anyList(), anyInt())) .thenReturn(policySearchResult3); when(_entityClient.scrollAcrossEntities( @@ -200,6 +204,7 @@ public void setupTest() throws Exception { nullable(Filter.class), eq("3"), isNull(), + anyList(), anyInt())) .thenReturn(policySearchResult4); when(_entityClient.scrollAcrossEntities( @@ -209,6 +214,7 @@ public void setupTest() throws Exception { nullable(Filter.class), eq("4"), isNull(), + anyList(), anyInt())) .thenReturn(policySearchResult5); @@ -419,6 +425,7 @@ public void testInvalidateCache() throws Exception { isNull(), any(), any(), + anyList(), anyInt())) .thenReturn(emptyResult); when(_entityClient.batchGetV2( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index ea437f4cf3511..6b20b4e65a586 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -275,7 +275,7 @@ public ResponseEntity explainSearchQuery( scrollId, keepAlive, size, - null); + List.of()); return ResponseEntity.ok(response); } @@ -385,8 +385,7 @@ public ResponseEntity explainSearchQueryDiff( sortCriteria, scrollId, keepAlive, - size, - null); + size); ExplainResponse responseB = searchService.explain( @@ -398,8 +397,7 @@ public ResponseEntity explainSearchQueryDiff( sortCriteria, scrollId, keepAlive, - size, - null); + size); String a = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(responseA); String b = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(responseB); diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java index b85f22e781d0b..9a805400252fc 100644 --- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -308,7 +308,7 @@ SearchResult search( * @throws RemoteInvocationException when unable to execute request */ @Nonnull - SearchResult searchAcrossEntities( + default SearchResult searchAcrossEntities( @Nonnull OperationContext opContext, @Nonnull List entities, @Nonnull String input, @@ -316,7 +316,10 @@ SearchResult searchAcrossEntities( int start, int count, List sortCriteria) - throws RemoteInvocationException; + throws RemoteInvocationException { + return searchAcrossEntities( + opContext, entities, input, filter, start, count, sortCriteria, List.of()); + } /** * Searches for entities matching to a given query and filters across multiple entity types @@ -338,7 +341,7 @@ SearchResult searchAcrossEntities( int start, int count, List sortCriteria, - List facets) + @Nonnull List facets) throws RemoteInvocationException; /** @@ -349,19 +352,48 @@ SearchResult searchAcrossEntities( * @param filter search filters * @param scrollId opaque scroll ID indicating offset * @param keepAlive string representation of time to keep point in time alive, ex: 5m + * @param sortCriteria sort criteria * @param count max number of search results requested * @return Snapshot key * @throws RemoteInvocationException when unable to execute request */ @Nonnull - ScrollResult scrollAcrossEntities( + default ScrollResult scrollAcrossEntities( @Nonnull OperationContext opContext, @Nonnull List entities, @Nonnull String input, @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, + List sortCriteria, int count) + throws RemoteInvocationException { + return scrollAcrossEntities( + opContext, entities, input, filter, scrollId, keepAlive, sortCriteria, count, List.of()); + } + + /** + * Searches for entities matching to a given query and filters across multiple entity types + * + * @param entities entity types to search (if empty, searches all entities) + * @param input search query + * @param filter search filters + * @param scrollId opaque scroll ID indicating offset + * @param keepAlive string representation of time to keep point in time alive, ex: 5m + * @param facets list of facets we want aggregations for + * @return Snapshot key + * @throws RemoteInvocationException when unable to execute request + */ + ScrollResult scrollAcrossEntities( + @Nonnull OperationContext opContext, + @Nonnull List entities, + @Nonnull String input, + @Nullable Filter filter, + @Nullable String scrollId, + @Nullable String keepAlive, + List sortCriteria, + int count, + List facets) throws RemoteInvocationException; /** diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index ca77561922083..8b82b49319748 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -654,21 +654,6 @@ public SearchResult search( return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); } - @Override - @Nonnull - public SearchResult searchAcrossEntities( - @Nonnull OperationContext opContext, - @Nonnull List entities, - @Nonnull String input, - @Nullable Filter filter, - int start, - int count, - List sortCriteria) - throws RemoteInvocationException { - return searchAcrossEntities( - opContext, entities, input, filter, start, count, sortCriteria, null); - } - /** * Searches for entities matching to a given query and filters across multiple entity types * @@ -720,7 +705,6 @@ public SearchResult searchAcrossEntities( return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); } - @Nonnull @Override public ScrollResult scrollAcrossEntities( @Nonnull OperationContext opContext, @@ -729,7 +713,9 @@ public ScrollResult scrollAcrossEntities( @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, - int count) + List sortCriteria, + int count, + @Nullable List facets) throws RemoteInvocationException { final SearchFlags searchFlags = opContext.getSearchContext().getSearchFlags(); final EntitiesDoScrollAcrossEntitiesRequestBuilder requestBuilder = @@ -751,6 +737,11 @@ public ScrollResult scrollAcrossEntities( requestBuilder.keepAliveParam(keepAlive); } + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); + } + return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index da9ba8c684f61..bbe96d4935351 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -126,7 +126,7 @@ SearchResult search( List sortCriteria, int from, int size, - @Nullable List facets); + @Nonnull List facets); /** * Gets a list of documents after applying the input filters. @@ -270,6 +270,7 @@ List getBrowsePaths( * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return + * @param facets list of facets we want aggregations for * @return a {@link ScrollResult} that contains a list of matched documents and related search * result metadata */ @@ -282,7 +283,30 @@ ScrollResult fullTextScroll( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size); + int size, + @Nonnull List facets); + + @Nonnull + default ScrollResult fullTextScroll( + @Nonnull OperationContext opContext, + @Nonnull List entities, + @Nonnull String input, + @Nullable Filter postFilters, + List sortCriteria, + @Nullable String scrollId, + @Nullable String keepAlive, + int size) { + return fullTextScroll( + opContext, + entities, + input, + postFilters, + sortCriteria, + scrollId, + keepAlive, + size, + List.of()); + } /** * Gets a list of documents that match given search request. The results are aggregated and @@ -295,6 +319,7 @@ ScrollResult fullTextScroll( * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return + * @param facets list of facets we want aggregations for * @return a {@link ScrollResult} that contains a list of matched documents and related search * result metadata */ @@ -307,11 +332,56 @@ ScrollResult structuredScroll( List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, - int size); + int size, + @Nonnull List facets); + + default ScrollResult structuredScroll( + @Nonnull OperationContext opContext, + @Nonnull List entities, + @Nonnull String input, + @Nullable Filter postFilters, + List sortCriteria, + @Nullable String scrollId, + @Nullable String keepAlive, + int size) { + return structuredScroll( + opContext, + entities, + input, + postFilters, + sortCriteria, + scrollId, + keepAlive, + size, + List.of()); + } /** Max result size returned by the underlying search backend */ int maxResultSize(); + default ExplainResponse explain( + @Nonnull OperationContext opContext, + @Nonnull String query, + @Nonnull String documentId, + @Nonnull String entityName, + @Nullable Filter postFilters, + List sortCriteria, + @Nullable String scrollId, + @Nullable String keepAlive, + int size) { + return explain( + opContext, + query, + documentId, + entityName, + postFilters, + sortCriteria, + scrollId, + keepAlive, + size, + List.of()); + } + ExplainResponse explain( @Nonnull OperationContext opContext, @Nonnull String query, @@ -322,7 +392,7 @@ ExplainResponse explain( @Nullable String scrollId, @Nullable String keepAlive, int size, - @Nullable List facets); + @Nonnull List facets); /** * Return index convention diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/util/SearchBasedFormAssignmentManager.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/util/SearchBasedFormAssignmentManager.java index 8a3eb463aa15c..bb5af6c446f4d 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/util/SearchBasedFormAssignmentManager.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/util/SearchBasedFormAssignmentManager.java @@ -62,6 +62,7 @@ public static void apply( formFilters.getFilter(), scrollId, "5m", + List.of(), batchFormEntityCount); if (!results.hasEntities() diff --git a/metadata-service/services/src/test/java/com/linkedin/metadata/service/search/EntitySearchServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/search/EntitySearchServiceTest.java index 41e2c2f006e94..7bd6f3abe3713 100644 --- a/metadata-service/services/src/test/java/com/linkedin/metadata/service/search/EntitySearchServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/search/EntitySearchServiceTest.java @@ -31,6 +31,7 @@ import java.util.Map; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import org.apache.commons.lang3.NotImplementedException; import org.opensearch.action.explain.ExplainResponse; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -214,7 +215,7 @@ public SearchResult search( List sortCriteria, int from, int size, - List facets) { + @Nonnull List facets) { return null; } @@ -291,17 +292,19 @@ public List getBrowsePaths(OperationContext opContext, String entityName return null; } + @Nonnull @Override public ScrollResult fullTextScroll( - OperationContext opContext, - List entities, - String input, - Filter postFilters, + @Nonnull OperationContext opContext, + @Nonnull List entities, + @Nonnull String input, + @Nullable Filter postFilters, List sortCriteria, - String scrollId, - String keepAlive, - int size) { - return null; + @Nullable String scrollId, + @Nullable String keepAlive, + int size, + @Nonnull List facets) { + throw new NotImplementedException(); } @Override @@ -313,7 +316,8 @@ public ScrollResult structuredScroll( List sortCriteria, String scrollId, String keepAlive, - int size) { + int size, + @Nonnull List facets) { return null; } @@ -333,7 +337,7 @@ public ExplainResponse explain( String scrollId, String keepAlive, int size, - List facets) { + @Nonnull List facets) { return null; } diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index 720845e7ac1a3..daea573b2c05f 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -16,6 +16,7 @@ import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import java.io.PrintWriter; +import java.util.List; import java.util.Map; import java.util.Optional; import lombok.extern.slf4j.Slf4j; @@ -105,7 +106,7 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { null, 0, 0, - null); + List.of()); FunctionScoreQueryBuilder rankingQuery = ((FunctionScoreQueryBuilder)