diff --git a/docs/asciidoc/modules/ROOT/pages/ml/openai.adoc b/docs/asciidoc/modules/ROOT/pages/ml/openai.adoc index 0bb6a198d7..f549768569 100644 --- a/docs/asciidoc/modules/ROOT/pages/ml/openai.adoc +++ b/docs/asciidoc/modules/ROOT/pages/ml/openai.adoc @@ -331,3 +331,80 @@ RETURN DISTINCT a.name | name | description | value | the description of the dataset |=== + +== Create explanation of the subgraph from a set of queries + +This procedure `apoc.ml.fromQueries` returns an explanation, in natural language, of the given set of queries. + +It uses the `chat/completions` API which is https://platform.openai.com/docs/api-reference/chat/create[documented here^]. + +.Query call +[source,cypher] +---- +CALL apoc.ml.fromQueries(['MATCH (n:Movie) RETURN n', 'MATCH (n:Person) RETURN n'], + {apiKey: }) +YIELD value +RETURN * +---- + +.Example response +[source, bash] +---- ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| value | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| "The database represents movies and people, like in a movie database or social network. + There are no defined relationships between nodes, allowing flexibility for future connections. + The Movie node includes properties like title, tagline, and release year." | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row +---- + +.Query call with path +[source,cypher] +---- +CALL apoc.ml.fromQueries(['MATCH (n:Movie) RETURN n', 'MATCH p=(n:Movie)--() RETURN p'], + {apiKey: }) +YIELD value +RETURN * +---- + +.Example response +[source, bash] +---- ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| value | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| "models relationships in the movie industry, connecting :Person nodes to :Movie nodes. + It represents actors, directors, writers, producers, and reviewers connected to movies they are involved with. + Similar to a social network graph but specialized for the entertainment industry. + Each relationship type corresponds to common roles in movie production and reviewing. + Allows for querying and analyzing connections and collaborations within the movie business." | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row +---- + + +.Input Parameters +[%autowidth, opts=header] +|=== +| name | description +| queries | The list of queries +| conf | An optional configuration map, please check the next section +|=== + +.Configuration map +[%autowidth, opts=header] +|=== +| name | description | mandatory +| apiKey | OpenAI API key | in case `apoc.openai.key` is not defined +| model | The Open AI model | no, default `gpt-3.5-turbo` +| sample | The number of nodes to skip, e.g. a sample of 1000 will read every 1000th node. It's used as a parameter to `apoc.meta.data` procedure that computes the schema | no, default is a random number +|=== + +.Results +[%autowidth, opts=header] +|=== +| name | description +| value | the description of the dataset +|=== \ No newline at end of file diff --git a/full/src/main/resources/extended.txt b/full/src/main/resources/extended.txt index 43029f7d16..f52194c086 100644 --- a/full/src/main/resources/extended.txt +++ b/full/src/main/resources/extended.txt @@ -98,6 +98,8 @@ apoc.metrics.get apoc.metrics.list apoc.metrics.storage apoc.ml.cypher +apoc.ml.fromCypher +apoc.ml.fromQueries apoc.ml.query apoc.ml.schema apoc.ml.openai.chat diff --git a/full/src/test/java/apoc/ml/PromptIT.java b/full/src/test/java/apoc/ml/PromptIT.java index e265a277e1..ee22a324b3 100644 --- a/full/src/test/java/apoc/ml/PromptIT.java +++ b/full/src/test/java/apoc/ml/PromptIT.java @@ -1,6 +1,9 @@ package apoc.ml; +import static apoc.util.TestUtil.testCall; import static apoc.util.TestUtil.testResult; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.fail; import apoc.coll.Coll; import apoc.meta.Meta; @@ -12,7 +15,6 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assume; import org.junit.Before; @@ -58,8 +60,8 @@ public void testQuery() { Map.of("query", "What movies did Tom Hanks play in?", "retries", 2L, "apiKey", OPENAI_KEY), (r) -> { List> list = r.stream().collect(Collectors.toList()); - Assertions.assertThat(list).hasSize(12); - Assertions.assertThat(list.stream() + assertThat(list).hasSize(12); + assertThat(list.stream() .map(m -> m.get("query")) .filter(Objects::nonNull) .map(Object::toString) @@ -72,7 +74,7 @@ public void testQuery() { public void testSchema() { testResult(db, "CALL apoc.ml.schema({apiKey: $apiKey})", Map.of("apiKey", OPENAI_KEY), (r) -> { List> list = r.stream().collect(Collectors.toList()); - Assertions.assertThat(list).hasSize(1); + assertThat(list).hasSize(1); }); } @@ -88,8 +90,8 @@ public void testCypher() { "apiKey", OPENAI_KEY), (r) -> { List> list = r.stream().collect(Collectors.toList()); - Assertions.assertThat(list).hasSize((int) numOfQueries); - Assertions.assertThat(list.stream() + assertThat(list).hasSize((int) numOfQueries); + assertThat(list.stream() .map(m -> m.get("query")) .filter(Objects::nonNull) .map(Object::toString) @@ -97,4 +99,81 @@ public void testCypher() { .hasSize((int) numOfQueries); }); } + + @Test + public void testSchemaFromQueries() { + List queries = List.of( + "MATCH p=(n:Movie)--() RETURN p", + "MATCH (n:Person) RETURN n", + "MATCH (n:Movie) RETURN n", + "MATCH p=(n)-[r]->() RETURN r"); + + testCall( + db, + "CALL apoc.ml.fromQueries($queries, {apiKey: $apiKey})", + Map.of( + "queries", queries, + "apiKey", OPENAI_KEY), + (r) -> { + String value = ((String) r.get("value")).toLowerCase(); + assertThat(value).containsIgnoringCase("movie"); + assertThat(value).satisfiesAnyOf(s -> assertThat(s).contains("person"), s -> assertThat(s) + .contains("people")); + }); + } + + @Test + public void testSchemaFromQueriesWithSingleQuery() { + List queries = List.of("MATCH (n:Movie) RETURN n"); + + testCall( + db, + "CALL apoc.ml.fromQueries($queries, {apiKey: $apiKey})", + Map.of( + "queries", queries, + "apiKey", OPENAI_KEY), + (r) -> { + String value = ((String) r.get("value")).toLowerCase(); + assertThat(value).containsIgnoringCase("movie"); + assertThat(value).doesNotContainIgnoringCase("person", "people"); + }); + } + + @Test + public void testSchemaFromQueriesWithWrongQuery() { + List queries = List.of("MATCH (n:Movie) RETURN a"); + try { + testCall( + db, + "CALL apoc.ml.fromQueries($queries, {apiKey: $apiKey})", + Map.of( + "queries", queries, + "apiKey", OPENAI_KEY), + (r) -> fail()); + } catch (Exception e) { + assertThat(e.getMessage()).contains(" Variable `a` not defined"); + } + } + + @Test + public void testSchemaFromEmptyQueries() { + List queries = List.of("MATCH (n:Movie) RETURN 1"); + + testCall( + db, + "CALL apoc.ml.fromQueries($queries, {apiKey: $apiKey})", + Map.of( + "queries", queries, + "apiKey", OPENAI_KEY), + (r) -> { + String value = ((String) r.get("value")).toLowerCase(); + + assertThat(value) + .satisfiesAnyOf( + s -> assertThat(s).contains("does not contain"), + s -> assertThat(s).contains("empty"), + s -> assertThat(s).contains("undefined"), + s -> assertThat(s).contains("doesn't have")); + }); + } }