From d0a4b81554dc55c27c45a792c18cc7288190e5b0 Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:50:43 -0400 Subject: [PATCH 01/11] Add Weaviate and Gemini cookbook --- .../docker-compose.yml | 25 + ...description_with_weaviate_and_gemini.ipynb | 963 ++++++++++++++++++ 2 files changed, 988 insertions(+) create mode 100644 examples/Building_with_Weaviate_and_Gemini/docker-compose.yml create mode 100644 examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb diff --git a/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml b/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml new file mode 100644 index 000000000..2b31bc58d --- /dev/null +++ b/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml @@ -0,0 +1,25 @@ +--- +version: '3.4' +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.24.8 + ports: + - 8080:8080 + - 50051:50051 + restart: on-failure:0 + environment: + PALM_APIKEY: 'PALM_APIKEY' + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + DEFAULT_VECTORIZER_MODULE: 'text2vec-palm' + ENABLE_MODULES: 'text2vec-palm, generative-palm' + CLUSTER_HOSTNAME: 'node1' +... diff --git a/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb b/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb new file mode 100644 index 000000000..786bd2476 --- /dev/null +++ b/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb @@ -0,0 +1,963 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b19f6be7", + "metadata": { + "id": "b19f6be7" + }, + "source": [ + "# Personalized Product Descriptions with Weaviate and Gemini\n", + "\n", + "Weaviate is an open-source vector database that enables you to build AI-Native applications with Gemini! This notebook has four parts:\n", + "1. [Part 1: Connect to Weaviate, Define Schema, and Import Data](#part-1-install-dependencies-and-connect-to-weaviate)\n", + "\n", + "2. [Part 2: Run Vector Search Queries](#part-2-vector-search)\n", + "\n", + "3. [Part 3: Generative Feedback Loops](#part-3-generative-feedback-loops)\n", + "\n", + "4. [Part 4: Personalized Product Descriptions](#part-4-personalization)\n", + "\n", + "\n", + "In this demo, we will show you how to embed your data, run a semantic search, make a generative call to Gemini and store the output in your vector database, and personalize the description based on the user profile. We are using the Google merch products as our dataset and will generate product descriptions by calling the Gemini API.\n", + "\n", + "# Use Case\n", + "\n", + "We will be working with an e-commerce dataset containing Google merch. We will load the data into the Weaviate vector database and use the semantic search features to retrieve data. Next, we will generate product descriptions and store them back into the database with a vector embedding for retrieval (aka, generative feedback loops). Lastly, we will create a small knowledge graph with uniquely generated product descriptions for the buyer personas Alice and Bob.\n", + "\n", + "### Requirements\n", + "1. Weaviate vector database\n", + " 1. Serverless\n", + " 1. Embedded\n", + " 1. Local (Docker)\n", + "1. Gemini API key\n", + "\n", + "### Video\n", + "**For an awesome walk through of this demo, check out [this](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138) presentation from Google Cloud Next!**\n", + "\n", + "[![From RAG to autonomous apps with Weaviate and Gemini on Google Kubernetes Engine](http://i3.ytimg.com/vi/WORgeRAAN-4/hqdefault.jpg)](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138)" + ] + }, + { + "cell_type": "markdown", + "id": "7Wlb0vCDUK3h", + "metadata": { + "id": "7Wlb0vCDUK3h" + }, + "source": [ + "## Install Dependencies and Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1eQSHZzRx3n6", + "metadata": { + "id": "1eQSHZzRx3n6" + }, + "outputs": [], + "source": [ + "!pip install weaviate-client==4.5.5\n", + "!pip install google-generativeai\n", + "!pip install requests\n", + "!pip install python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "iKmPS8v7s_Xc", + "metadata": { + "id": "iKmPS8v7s_Xc" + }, + "outputs": [], + "source": [ + "import weaviate\n", + "import weaviate.classes.config as wvcc\n", + "from weaviate.embedded import EmbeddedOptions\n", + "import weaviate.classes as wvc\n", + "from weaviate.classes.config import Property, DataType, ReferenceProperty\n", + "from weaviate.util import generate_uuid5\n", + "from weaviate.classes.query import QueryReference\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import json\n", + "import requests\n", + "import PIL\n", + "import IPython\n", + "\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "import google.generativeai as genai\n", + "\n", + "# Convert image links to PIL object\n", + "def url_to_pil(url):\n", + " response = requests.get(url)\n", + " return Image.open(BytesIO(response.content))" + ] + }, + { + "cell_type": "markdown", + "id": "cee8989d", + "metadata": { + "id": "cee8989d" + }, + "source": [ + "## Part 1: Connect to Weaviate, Define Schema, and Import Data" + ] + }, + { + "cell_type": "markdown", + "id": "t1Uc93joUOAR", + "metadata": { + "id": "t1Uc93joUOAR" + }, + "source": [ + "### Connect to Weaviate\n", + "\n", + "You will need to create a Weaviate cluster. There are a few ways to do this:\n", + "\n", + "1. [Weaviate Cloud Services](console.weaviate.cloud): Create a sandbox on our managed service. You will need to deploy it in US West, US East, or Australia.\n", + "\n", + "2. [Weaviate Embedded](https://weaviate.io/developers/weaviate/installation/embedded): Run Weaviate in your runtime (Note: It will disconnect once you stop the terminal.)\n", + "\n", + "3. Local Host: [Docker](https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file) or [Kubernetes](https://weaviate.io/developers/weaviate/installation/kubernetes)\n", + "\n", + "For the full list of installation options, see [this page](https://weaviate.io/developers/weaviate/installation)." + ] + }, + { + "cell_type": "markdown", + "id": "1199263a", + "metadata": { + "id": "1199263a" + }, + "source": [ + "### Choose **only one** installation option\n", + "\n", + "Pick one of the three options below to run Weaviate" + ] + }, + { + "cell_type": "markdown", + "id": "11886426", + "metadata": { + "id": "11886426" + }, + "source": [ + "#### 1. Weaviate Cloud Service\n", + "\n", + "The first option is the [Weaviate Cloud Service](https://console.weaviate.cloud/), you can connect your notebook to a serverless Weaviate to keep the data persistent in the cloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f984616a", + "metadata": { + "id": "f984616a" + }, + "outputs": [], + "source": [ + "load_dotenv()\n", + "\n", + "client = weaviate.connect_to_wcs(\n", + " cluster_url=os.getenv(WCS_DEMO_URL), # Replace with your WCS URL\n", + " auth_credentials=weaviate.auth.AuthApiKey(os.getenv(WCS_DEMO_RO_KEY)), # Replace with your WCS key\n", + " headers={\"X-PaLM-Api-Key\": os.getenv(\"PALM-API-KEY\")}, # Replace with your Gemini API key\n", + ")\n", + "\n", + "print(client.is_ready())" + ] + }, + { + "cell_type": "markdown", + "id": "897684f3", + "metadata": { + "id": "897684f3" + }, + "source": [ + "#### 2. Weaviate Embedded\n", + "\n", + "The second option is Weaviate embedded. This runs Weaviate inside your notebook. Ideal for quick experimentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "QveBlU9aL7jI", + "metadata": { + "id": "QveBlU9aL7jI" + }, + "outputs": [], + "source": [ + "client = weaviate.WeaviateClient(\n", + " embedded_options=EmbeddedOptions(\n", + " version=\"1.24.8\",\n", + " additional_env_vars={\n", + " \"ENABLE_MODULES\": \"text2vec-palm, generative-palm\"\n", + " }),\n", + " additional_headers={\n", + " \"X-PaLM-Api-Key\": 'PALM-API-KEY' # Replace with your Gemini API key\n", + " }\n", + ")\n", + "\n", + "client.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "a1c36425", + "metadata": { + "id": "a1c36425" + }, + "source": [ + "#### 3. Local (Docker)\n", + "\n", + "If you like to run Weaviate yourself, you can download the [Docker files](https://weaviate.io/developers/weaviate/installation/docker-compose) and run it locally on your machine or in the cloud. Make sure to include the Google module in the configurator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f042619", + "metadata": { + "id": "1f042619" + }, + "outputs": [], + "source": [ + "client = weaviate.connect_to_local()\n", + "\n", + "print(client.is_ready())" + ] + }, + { + "cell_type": "markdown", + "id": "mBahZ-eCrjJD", + "metadata": { + "id": "mBahZ-eCrjJD" + }, + "source": [ + "### Create schema\n", + "The schema tells Weaviate how you want to store your data. We will have two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "842e00de", + "metadata": { + "id": "842e00de" + }, + "outputs": [], + "source": [ + "# This is optional to empty your database\n", + "result = client.collections.delete(\"Products\")\n", + "print(result)\n", + "result = client.collections.delete(\"Personas\")\n", + "print(result)\n", + "result = client.collections.delete(\"Personalized\")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "LUmbb63eOGvX", + "metadata": { + "id": "LUmbb63eOGvX" + }, + "outputs": [], + "source": [ + "# Products Collection\n", + "if not client.collections.exists(\"Products\"):\n", + " collection = client.collections.create(\n", + " name=\"Products\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[ # properties for the Products collection\n", + " Property(name=\"product_id\", data_type=DataType.TEXT),\n", + " Property(name=\"title\", data_type=DataType.TEXT),\n", + " Property(name=\"category\", data_type=DataType.TEXT),\n", + " Property(name=\"link\", data_type=DataType.TEXT),\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " Property(name=\"brand\", data_type=DataType.TEXT),\n", + " Property(name=\"generated_description\", data_type=DataType.TEXT),\n", + " ]\n", + " )\n", + "\n", + "# Personas Collection\n", + "if not client.collections.exists(\"Personas\"):\n", + " collection = client.collections.create(\n", + " name=\"Personas\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[ # properties for the Personas collection\n", + " Property(name=\"name\", data_type=DataType.TEXT),\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " ]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "v5sYXBkMAZZm", + "metadata": { + "id": "v5sYXBkMAZZm" + }, + "source": [ + "### Import Objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "vo0WckWt_gyq", + "metadata": { + "id": "vo0WckWt_gyq" + }, + "outputs": [], + "source": [ + "# URL to the raw JSON file\n", + "url = 'https://raw.githubusercontent.com/bkauf/next-store/main/first_99_objects.json'\n", + "response = requests.get(url)\n", + "\n", + "# Load the entire JSON content\n", + "data = json.loads(response.text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "-uxOVFZ6_iA7", + "metadata": { + "id": "-uxOVFZ6_iA7" + }, + "outputs": [], + "source": [ + "# Print first object\n", + "\n", + "data[0]" + ] + }, + { + "cell_type": "markdown", + "id": "3QhqNKBsvTND", + "metadata": { + "id": "3QhqNKBsvTND" + }, + "source": [ + "#### Upload to Weaviate\n", + "We will use Weaviate's batch import to get the 99 objects into our database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "UUZ1yJAvuQXT", + "metadata": { + "id": "UUZ1yJAvuQXT" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "with products.batch.dynamic() as batch:\n", + " for item in data:\n", + " batch.add_object(\n", + " properties={\n", + " \"product_id\": item['product_id'],\n", + " \"title\": item['title'],\n", + " \"category\": item['category'],\n", + " \"link\": item['link'],\n", + " \"description\": item['description'],\n", + " \"brand\": item['brand']\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7XWKsV920vje", + "metadata": { + "id": "7XWKsV920vje" + }, + "outputs": [], + "source": [ + "# count how many objects are in the database\n", + "products = client.collections.get(\"Products\")\n", + "response = products.aggregate.over_all(total_count=True)\n", + "print(response.total_count)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "P5gC08735iVZ", + "metadata": { + "id": "P5gC08735iVZ" + }, + "outputs": [], + "source": [ + "# print the objects uuid and properties\n", + "\n", + "for product in products.iterator():\n", + " print(product.uuid, product.properties)" + ] + }, + { + "cell_type": "markdown", + "id": "a8c64f21", + "metadata": { + "id": "a8c64f21" + }, + "source": [ + "From the printed list above, select one `uuid` and paste it in the below cell.\n", + "\n", + "Note: If you run the cell below without grabbing a `uuid`, it will result in an error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "i-OEzKw85-LY", + "metadata": { + "id": "i-OEzKw85-LY" + }, + "outputs": [], + "source": [ + "product = products.query.fetch_object_by_id(\n", + " \"87e5a137-d943-4863-90df-7eed6415fd58\", # <== paste a new product UUID here after importing\n", + " include_vector=True\n", + ")\n", + "\n", + "print(product.properties[\"title\"], product.vector[\"default\"])" + ] + }, + { + "cell_type": "markdown", + "id": "1c6202c0", + "metadata": { + "id": "1c6202c0" + }, + "source": [ + "## Part 2: Vector Search" + ] + }, + { + "cell_type": "markdown", + "id": "RVKqhfrcyHOb", + "metadata": { + "id": "RVKqhfrcyHOb" + }, + "source": [ + "### Vector Search\n", + "Vector search returns the objects with most similar vectors to that of the query. We will use the `near_text` operator to find objects with the nearest vector to an input text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "F70gYJJzxYHt", + "metadata": { + "id": "F70gYJJzxYHt" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel mug\",\n", + " return_properties=[\"title\", \"description\", \"link\"], # only return these 3 properties\n", + " limit=3 # limited to 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "Wm5SR_0nrlju", + "metadata": { + "id": "Wm5SR_0nrlju" + }, + "source": [ + "### Hybrid Search\n", + "[Hybrid search](https://weaviate.io/developers/weaviate/search/hybrid) combines keyword (BM25) and vector search together, giving you the best of both algorithms.\n", + "\n", + "To use hybrid search in Weaviate, all you have to do is define the `alpha` parameter to determine the weighting.\n", + "\n", + "`alpha` = 0 --> pure BM25\n", + "\n", + "`alpha` = 0.5 --> half BM25, half vector search\n", + "\n", + "`alpha` = 1 --> pure vector search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "egqvUe2-rpnh", + "metadata": { + "id": "egqvUe2-rpnh" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.hybrid(\n", + " query = \"dishwasher safe container\", # query\n", + " alpha = 0.75, # leaning more towards vector search\n", + " return_properties=[\"title\", \"description\", \"link\"], # return these 3 properties\n", + " limit = 3 # limited to only 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "M7bX-bV5rqnR", + "metadata": { + "id": "M7bX-bV5rqnR" + }, + "source": [ + "### Autocut\n", + "Rather than hard-coding the limit on the number of objects (seen above), we can use [autocut](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut) to cut off the result set. Autocut limits the number of results returned based on significant variations in the result set's metrics, such as vector distance or score.\n", + "\n", + "\n", + "To use autocut, you must specify the `auto_limit` parameter, which will stop returning results after the specified number of variations, or \"jumps,\" is reached.\n", + "\n", + "We will use the same hybrid search query above but use `auto_limit` rather than `limit`. Notice how there are actually 4 objects retrieved in this case, compared to the 3 objects returned in the previous query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JaEOs-mVruBf", + "metadata": { + "id": "JaEOs-mVruBf" + }, + "outputs": [], + "source": [ + "# auto_limit set to 1\n", + "\n", + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.hybrid(\n", + " query = \"dishwasher safe container\", # query\n", + " alpha = 0.75, # leaning more towards vector search\n", + " return_properties=[\"title\", \"description\", \"link\"], # return these 3 properties\n", + " auto_limit = 1 # autocut after 1 jump\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "WJG6LXJ3yKYl", + "metadata": { + "id": "WJG6LXJ3yKYl" + }, + "source": [ + "### Filters\n", + "We can narrow down our results by adding a filter to the query.\n", + "\n", + "We will look for objects where `category` is equal to `drinkware`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JnuEwgEG0PVM", + "metadata": { + "id": "JnuEwgEG0PVM" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel cup\",\n", + " return_properties=[\"title\", \"description\", \"category\", \"link\"], # returned properties\n", + " filters=wvc.query.Filter.by_property(\"category\").equal(\"Drinkware\"), # filter\n", + " limit=3, # limit to 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(product.properties)\n", + " print('===')" + ] + }, + { + "cell_type": "markdown", + "id": "oU8Uc1FQimaf", + "metadata": { + "id": "oU8Uc1FQimaf" + }, + "source": [ + "## Part 3: Generative Feedback Loops\n", + "\n", + "[Generative Feedback Loops](https://weaviate.io/blog/generative-feedback-loops-with-llms) refers to the process of storing the output from the language model back to the database.\n", + "\n", + "We will generate a description for each product in our database using Gemini and save it to the `generated_description` property in the `Products` collection." + ] + }, + { + "cell_type": "markdown", + "id": "rCUXn9q0rDxf", + "metadata": { + "id": "rCUXn9q0rDxf" + }, + "source": [ + "### Connect and configure Gemini model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JajEq9ihp6ed", + "metadata": { + "id": "JajEq9ihp6ed" + }, + "outputs": [], + "source": [ + "genai.configure(api_key='gemini-api-key') # gemini api key\n", + "\n", + "# Multimodal model\n", + "model_pro_vision = genai.GenerativeModel(model_name='gemini-pro-vision') # multi-modal model (text and image)\n", + "\n", + "# LLM\n", + "model_pro = genai.GenerativeModel(model_name='gemini-pro') # text only model" + ] + }, + { + "cell_type": "markdown", + "id": "4h7iw16Viny1", + "metadata": { + "id": "4h7iw16Viny1" + }, + "source": [ + "### Generate a description and store it in the `Products` collection\n", + "\n", + "Steps for the below cell:\n", + "1. Run a vector search query to find travel jackets\n", + " 1. Learn more about autocut (`auto_limit`) [here](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut).\n", + "\n", + "2. Grab the returned objects, prompt Gemini with the task and image, store the description in the `generated_description` property" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Qjz4iYtbP5Ni", + "metadata": { + "id": "Qjz4iYtbP5Ni" + }, + "outputs": [], + "source": [ + "response = products.query.near_text( # first find travel jackets\n", + " query=\"travel jacket\",\n", + " return_properties=[\"title\", \"description\", \"category\", \"link\"],\n", + " auto_limit=1, # limit it to 1 close group\n", + ")\n", + "\n", + "for product in response.objects:\n", + " if \"link\" in product.properties:\n", + " id = product.uuid\n", + " img_url = product.properties[\"link\"]\n", + "\n", + " pil_image = url_to_pil(img_url) # convert image to PIL object\n", + " generated_description = model_pro_vision.generate_content([\"Write a short Facebook ad about this product photo.\", pil_image]) # prompt to Gemini\n", + " generated_description = generated_description.text\n", + " print(img_url)\n", + " print(generated_description)\n", + " print('===')\n", + "\n", + " # Update the Product collection with the generated description\n", + " products.data.update(uuid=id, properties={\"generated_description\": generated_description})" + ] + }, + { + "cell_type": "markdown", + "id": "7LjeX-2_vVb5", + "metadata": { + "id": "7LjeX-2_vVb5" + }, + "source": [ + "### Vector Search on the `generated_description` property\n", + "\n", + "Since the product description was saved in our `Products` collection, we can run a vector search query on it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "suWRGv6Zu6g2", + "metadata": { + "id": "suWRGv6Zu6g2" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel jacket\",\n", + " return_properties=[\"generated_description\", \"description\", \"title\"],\n", + " limit=1\n", + " )\n", + "\n", + "for o in response.objects:\n", + " print(o.uuid)\n", + " print(json.dumps(o.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "Qs5hE2k0OuOh", + "metadata": { + "id": "Qs5hE2k0OuOh" + }, + "source": [ + "## Part 4: Personalization\n", + "\n", + "So far, we've generated product descriptions using Gemini's multi-modal model. In Part 4, we will generate product descriptions tailored to the persona.\n", + "\n", + "We will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba56WznY53C1", + "metadata": { + "id": "ba56WznY53C1" + }, + "outputs": [], + "source": [ + "# Personalized Collection\n", + "\n", + "if not client.collections.exists(\"Personalized\"):\n", + " collection = client.collections.create(\n", + " name=\"Personalized\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " ],\n", + " # cross-references\n", + " references=[\n", + " ReferenceProperty(\n", + " name=\"ofProduct\",\n", + " target_collection=\"Products\" # connect personalized to the products collection\n", + " ),\n", + " ReferenceProperty(\n", + " name=\"ofPersona\",\n", + " target_collection=\"Personas\" # connect personalized to the personas collection\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a44952d7", + "metadata": { + "id": "a44952d7" + }, + "source": [ + "### Create two personas (Alice and Bob)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "j1RlEbYpOw_5", + "metadata": { + "id": "j1RlEbYpOw_5" + }, + "outputs": [], + "source": [ + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in ['Alice', 'Bob']:\n", + " generated_description = model_pro.generate_content([\"Create a fictional buyer persona named \" + persona + \", write a short description about them\"]) # use gemini-pro to generate persona description\n", + " uuid = personas.data.insert({\n", + " \"name\": persona,\n", + " \"description\": generated_description.text\n", + " })\n", + " print(uuid)\n", + " print(generated_description.text)\n", + " print(\"===\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "B0oKIH5vQhhw", + "metadata": { + "id": "B0oKIH5vQhhw" + }, + "outputs": [], + "source": [ + "# print objects in the Personas collection\n", + "\n", + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in personas.iterator():\n", + " print(persona.uuid, persona.properties)" + ] + }, + { + "cell_type": "markdown", + "id": "4442bbc4", + "metadata": { + "id": "4442bbc4" + }, + "source": [ + "### Generate a product description tailored to the persona\n", + "\n", + "Grab the product uuid from Part 1 and paste it below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "UHi0V1MX2uNO", + "metadata": { + "id": "UHi0V1MX2uNO" + }, + "outputs": [], + "source": [ + "personalized = client.collections.get(\"Personalized\")\n", + "\n", + "product = products.query.fetch_object_by_id(\"87e5a137-d943-4863-90df-7eed6415fd58\") # <== paste a new product UUID here after importing\n", + "print(product.properties['link'])\n", + "print('===')\n", + "\n", + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in personas.iterator():\n", + " generated_description = model_pro.generate_content([\"Create a product description tailored to the following person, make sure to use the name (\", persona.properties[\"name\"],\") of the persona.\\n\\n\", \"# Product Description\\n\", product.properties[\"description\"], \"# Persona\", persona.properties[\"description\"]]) # generate a description tailored to the persona\n", + " print(generated_description.text)\n", + " # Add the personalized description to the `description` property in the Personalized collection\n", + " new_uuid = personalized.data.insert(\n", + " properties={\n", + " \"description\": generated_description.text },\n", + " references={\n", + " \"ofProduct\": product.uuid, # add cross-reference to the Product collection\n", + " \"ofPersona\": persona.uuid # add cross-reference to the Persona collection\n", + " },\n", + " )\n", + " print(\"New UUID\", new_uuid)\n", + " print('===')" + ] + }, + { + "cell_type": "markdown", + "id": "f3f56984", + "metadata": { + "id": "f3f56984" + }, + "source": [ + "### Fetch the objects in the `Personalized` collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "gxZ1W_cK4_dw", + "metadata": { + "id": "gxZ1W_cK4_dw" + }, + "outputs": [], + "source": [ + "personalized = client.collections.get(\"Personalized\")\n", + "\n", + "response = personalized.query.fetch_objects(\n", + " limit=2,\n", + " include_vector=True,\n", + " return_references=[QueryReference(\n", + " link_on=\"ofProduct\", # return the title property from the Product collection\n", + " return_properties=[\"title\"]\n", + " ),\n", + " QueryReference(\n", + " link_on=\"ofPersona\",\n", + " return_properties=[\"name\"] # return the name property from the Persona collection\n", + " )\n", + " ]\n", + ")\n", + "\n", + "for item in response.objects:\n", + " print(item.properties)\n", + " for ref_obj in item.references[\"ofProduct\"].objects:\n", + " print(ref_obj.properties)\n", + " for ref_obj in item.references[\"ofPersona\"].objects:\n", + " print(ref_obj.properties)\n", + " print(item.vector[\"default\"])\n", + " print(\"===\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "7Wlb0vCDUK3h", + "t1Uc93joUOAR" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a0719df2816ddb5f936d05d1ec2ad0eaf0875cc8 Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:50:43 -0400 Subject: [PATCH 02/11] Add Weaviate and Gemini cookbook --- .../docker-compose.yml | 25 + ...description_with_weaviate_and_gemini.ipynb | 963 ++++++++++++++++++ 2 files changed, 988 insertions(+) create mode 100644 examples/Building_with_Weaviate_and_Gemini/docker-compose.yml create mode 100644 examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb diff --git a/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml b/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml new file mode 100644 index 000000000..2b31bc58d --- /dev/null +++ b/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml @@ -0,0 +1,25 @@ +--- +version: '3.4' +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.24.8 + ports: + - 8080:8080 + - 50051:50051 + restart: on-failure:0 + environment: + PALM_APIKEY: 'PALM_APIKEY' + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + DEFAULT_VECTORIZER_MODULE: 'text2vec-palm' + ENABLE_MODULES: 'text2vec-palm, generative-palm' + CLUSTER_HOSTNAME: 'node1' +... diff --git a/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb b/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb new file mode 100644 index 000000000..786bd2476 --- /dev/null +++ b/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb @@ -0,0 +1,963 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b19f6be7", + "metadata": { + "id": "b19f6be7" + }, + "source": [ + "# Personalized Product Descriptions with Weaviate and Gemini\n", + "\n", + "Weaviate is an open-source vector database that enables you to build AI-Native applications with Gemini! This notebook has four parts:\n", + "1. [Part 1: Connect to Weaviate, Define Schema, and Import Data](#part-1-install-dependencies-and-connect-to-weaviate)\n", + "\n", + "2. [Part 2: Run Vector Search Queries](#part-2-vector-search)\n", + "\n", + "3. [Part 3: Generative Feedback Loops](#part-3-generative-feedback-loops)\n", + "\n", + "4. [Part 4: Personalized Product Descriptions](#part-4-personalization)\n", + "\n", + "\n", + "In this demo, we will show you how to embed your data, run a semantic search, make a generative call to Gemini and store the output in your vector database, and personalize the description based on the user profile. We are using the Google merch products as our dataset and will generate product descriptions by calling the Gemini API.\n", + "\n", + "# Use Case\n", + "\n", + "We will be working with an e-commerce dataset containing Google merch. We will load the data into the Weaviate vector database and use the semantic search features to retrieve data. Next, we will generate product descriptions and store them back into the database with a vector embedding for retrieval (aka, generative feedback loops). Lastly, we will create a small knowledge graph with uniquely generated product descriptions for the buyer personas Alice and Bob.\n", + "\n", + "### Requirements\n", + "1. Weaviate vector database\n", + " 1. Serverless\n", + " 1. Embedded\n", + " 1. Local (Docker)\n", + "1. Gemini API key\n", + "\n", + "### Video\n", + "**For an awesome walk through of this demo, check out [this](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138) presentation from Google Cloud Next!**\n", + "\n", + "[![From RAG to autonomous apps with Weaviate and Gemini on Google Kubernetes Engine](http://i3.ytimg.com/vi/WORgeRAAN-4/hqdefault.jpg)](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138)" + ] + }, + { + "cell_type": "markdown", + "id": "7Wlb0vCDUK3h", + "metadata": { + "id": "7Wlb0vCDUK3h" + }, + "source": [ + "## Install Dependencies and Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1eQSHZzRx3n6", + "metadata": { + "id": "1eQSHZzRx3n6" + }, + "outputs": [], + "source": [ + "!pip install weaviate-client==4.5.5\n", + "!pip install google-generativeai\n", + "!pip install requests\n", + "!pip install python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "iKmPS8v7s_Xc", + "metadata": { + "id": "iKmPS8v7s_Xc" + }, + "outputs": [], + "source": [ + "import weaviate\n", + "import weaviate.classes.config as wvcc\n", + "from weaviate.embedded import EmbeddedOptions\n", + "import weaviate.classes as wvc\n", + "from weaviate.classes.config import Property, DataType, ReferenceProperty\n", + "from weaviate.util import generate_uuid5\n", + "from weaviate.classes.query import QueryReference\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import json\n", + "import requests\n", + "import PIL\n", + "import IPython\n", + "\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "import google.generativeai as genai\n", + "\n", + "# Convert image links to PIL object\n", + "def url_to_pil(url):\n", + " response = requests.get(url)\n", + " return Image.open(BytesIO(response.content))" + ] + }, + { + "cell_type": "markdown", + "id": "cee8989d", + "metadata": { + "id": "cee8989d" + }, + "source": [ + "## Part 1: Connect to Weaviate, Define Schema, and Import Data" + ] + }, + { + "cell_type": "markdown", + "id": "t1Uc93joUOAR", + "metadata": { + "id": "t1Uc93joUOAR" + }, + "source": [ + "### Connect to Weaviate\n", + "\n", + "You will need to create a Weaviate cluster. There are a few ways to do this:\n", + "\n", + "1. [Weaviate Cloud Services](console.weaviate.cloud): Create a sandbox on our managed service. You will need to deploy it in US West, US East, or Australia.\n", + "\n", + "2. [Weaviate Embedded](https://weaviate.io/developers/weaviate/installation/embedded): Run Weaviate in your runtime (Note: It will disconnect once you stop the terminal.)\n", + "\n", + "3. Local Host: [Docker](https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file) or [Kubernetes](https://weaviate.io/developers/weaviate/installation/kubernetes)\n", + "\n", + "For the full list of installation options, see [this page](https://weaviate.io/developers/weaviate/installation)." + ] + }, + { + "cell_type": "markdown", + "id": "1199263a", + "metadata": { + "id": "1199263a" + }, + "source": [ + "### Choose **only one** installation option\n", + "\n", + "Pick one of the three options below to run Weaviate" + ] + }, + { + "cell_type": "markdown", + "id": "11886426", + "metadata": { + "id": "11886426" + }, + "source": [ + "#### 1. Weaviate Cloud Service\n", + "\n", + "The first option is the [Weaviate Cloud Service](https://console.weaviate.cloud/), you can connect your notebook to a serverless Weaviate to keep the data persistent in the cloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f984616a", + "metadata": { + "id": "f984616a" + }, + "outputs": [], + "source": [ + "load_dotenv()\n", + "\n", + "client = weaviate.connect_to_wcs(\n", + " cluster_url=os.getenv(WCS_DEMO_URL), # Replace with your WCS URL\n", + " auth_credentials=weaviate.auth.AuthApiKey(os.getenv(WCS_DEMO_RO_KEY)), # Replace with your WCS key\n", + " headers={\"X-PaLM-Api-Key\": os.getenv(\"PALM-API-KEY\")}, # Replace with your Gemini API key\n", + ")\n", + "\n", + "print(client.is_ready())" + ] + }, + { + "cell_type": "markdown", + "id": "897684f3", + "metadata": { + "id": "897684f3" + }, + "source": [ + "#### 2. Weaviate Embedded\n", + "\n", + "The second option is Weaviate embedded. This runs Weaviate inside your notebook. Ideal for quick experimentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "QveBlU9aL7jI", + "metadata": { + "id": "QveBlU9aL7jI" + }, + "outputs": [], + "source": [ + "client = weaviate.WeaviateClient(\n", + " embedded_options=EmbeddedOptions(\n", + " version=\"1.24.8\",\n", + " additional_env_vars={\n", + " \"ENABLE_MODULES\": \"text2vec-palm, generative-palm\"\n", + " }),\n", + " additional_headers={\n", + " \"X-PaLM-Api-Key\": 'PALM-API-KEY' # Replace with your Gemini API key\n", + " }\n", + ")\n", + "\n", + "client.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "a1c36425", + "metadata": { + "id": "a1c36425" + }, + "source": [ + "#### 3. Local (Docker)\n", + "\n", + "If you like to run Weaviate yourself, you can download the [Docker files](https://weaviate.io/developers/weaviate/installation/docker-compose) and run it locally on your machine or in the cloud. Make sure to include the Google module in the configurator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f042619", + "metadata": { + "id": "1f042619" + }, + "outputs": [], + "source": [ + "client = weaviate.connect_to_local()\n", + "\n", + "print(client.is_ready())" + ] + }, + { + "cell_type": "markdown", + "id": "mBahZ-eCrjJD", + "metadata": { + "id": "mBahZ-eCrjJD" + }, + "source": [ + "### Create schema\n", + "The schema tells Weaviate how you want to store your data. We will have two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "842e00de", + "metadata": { + "id": "842e00de" + }, + "outputs": [], + "source": [ + "# This is optional to empty your database\n", + "result = client.collections.delete(\"Products\")\n", + "print(result)\n", + "result = client.collections.delete(\"Personas\")\n", + "print(result)\n", + "result = client.collections.delete(\"Personalized\")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "LUmbb63eOGvX", + "metadata": { + "id": "LUmbb63eOGvX" + }, + "outputs": [], + "source": [ + "# Products Collection\n", + "if not client.collections.exists(\"Products\"):\n", + " collection = client.collections.create(\n", + " name=\"Products\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[ # properties for the Products collection\n", + " Property(name=\"product_id\", data_type=DataType.TEXT),\n", + " Property(name=\"title\", data_type=DataType.TEXT),\n", + " Property(name=\"category\", data_type=DataType.TEXT),\n", + " Property(name=\"link\", data_type=DataType.TEXT),\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " Property(name=\"brand\", data_type=DataType.TEXT),\n", + " Property(name=\"generated_description\", data_type=DataType.TEXT),\n", + " ]\n", + " )\n", + "\n", + "# Personas Collection\n", + "if not client.collections.exists(\"Personas\"):\n", + " collection = client.collections.create(\n", + " name=\"Personas\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[ # properties for the Personas collection\n", + " Property(name=\"name\", data_type=DataType.TEXT),\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " ]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "v5sYXBkMAZZm", + "metadata": { + "id": "v5sYXBkMAZZm" + }, + "source": [ + "### Import Objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "vo0WckWt_gyq", + "metadata": { + "id": "vo0WckWt_gyq" + }, + "outputs": [], + "source": [ + "# URL to the raw JSON file\n", + "url = 'https://raw.githubusercontent.com/bkauf/next-store/main/first_99_objects.json'\n", + "response = requests.get(url)\n", + "\n", + "# Load the entire JSON content\n", + "data = json.loads(response.text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "-uxOVFZ6_iA7", + "metadata": { + "id": "-uxOVFZ6_iA7" + }, + "outputs": [], + "source": [ + "# Print first object\n", + "\n", + "data[0]" + ] + }, + { + "cell_type": "markdown", + "id": "3QhqNKBsvTND", + "metadata": { + "id": "3QhqNKBsvTND" + }, + "source": [ + "#### Upload to Weaviate\n", + "We will use Weaviate's batch import to get the 99 objects into our database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "UUZ1yJAvuQXT", + "metadata": { + "id": "UUZ1yJAvuQXT" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "with products.batch.dynamic() as batch:\n", + " for item in data:\n", + " batch.add_object(\n", + " properties={\n", + " \"product_id\": item['product_id'],\n", + " \"title\": item['title'],\n", + " \"category\": item['category'],\n", + " \"link\": item['link'],\n", + " \"description\": item['description'],\n", + " \"brand\": item['brand']\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7XWKsV920vje", + "metadata": { + "id": "7XWKsV920vje" + }, + "outputs": [], + "source": [ + "# count how many objects are in the database\n", + "products = client.collections.get(\"Products\")\n", + "response = products.aggregate.over_all(total_count=True)\n", + "print(response.total_count)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "P5gC08735iVZ", + "metadata": { + "id": "P5gC08735iVZ" + }, + "outputs": [], + "source": [ + "# print the objects uuid and properties\n", + "\n", + "for product in products.iterator():\n", + " print(product.uuid, product.properties)" + ] + }, + { + "cell_type": "markdown", + "id": "a8c64f21", + "metadata": { + "id": "a8c64f21" + }, + "source": [ + "From the printed list above, select one `uuid` and paste it in the below cell.\n", + "\n", + "Note: If you run the cell below without grabbing a `uuid`, it will result in an error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "i-OEzKw85-LY", + "metadata": { + "id": "i-OEzKw85-LY" + }, + "outputs": [], + "source": [ + "product = products.query.fetch_object_by_id(\n", + " \"87e5a137-d943-4863-90df-7eed6415fd58\", # <== paste a new product UUID here after importing\n", + " include_vector=True\n", + ")\n", + "\n", + "print(product.properties[\"title\"], product.vector[\"default\"])" + ] + }, + { + "cell_type": "markdown", + "id": "1c6202c0", + "metadata": { + "id": "1c6202c0" + }, + "source": [ + "## Part 2: Vector Search" + ] + }, + { + "cell_type": "markdown", + "id": "RVKqhfrcyHOb", + "metadata": { + "id": "RVKqhfrcyHOb" + }, + "source": [ + "### Vector Search\n", + "Vector search returns the objects with most similar vectors to that of the query. We will use the `near_text` operator to find objects with the nearest vector to an input text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "F70gYJJzxYHt", + "metadata": { + "id": "F70gYJJzxYHt" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel mug\",\n", + " return_properties=[\"title\", \"description\", \"link\"], # only return these 3 properties\n", + " limit=3 # limited to 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "Wm5SR_0nrlju", + "metadata": { + "id": "Wm5SR_0nrlju" + }, + "source": [ + "### Hybrid Search\n", + "[Hybrid search](https://weaviate.io/developers/weaviate/search/hybrid) combines keyword (BM25) and vector search together, giving you the best of both algorithms.\n", + "\n", + "To use hybrid search in Weaviate, all you have to do is define the `alpha` parameter to determine the weighting.\n", + "\n", + "`alpha` = 0 --> pure BM25\n", + "\n", + "`alpha` = 0.5 --> half BM25, half vector search\n", + "\n", + "`alpha` = 1 --> pure vector search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "egqvUe2-rpnh", + "metadata": { + "id": "egqvUe2-rpnh" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.hybrid(\n", + " query = \"dishwasher safe container\", # query\n", + " alpha = 0.75, # leaning more towards vector search\n", + " return_properties=[\"title\", \"description\", \"link\"], # return these 3 properties\n", + " limit = 3 # limited to only 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "M7bX-bV5rqnR", + "metadata": { + "id": "M7bX-bV5rqnR" + }, + "source": [ + "### Autocut\n", + "Rather than hard-coding the limit on the number of objects (seen above), we can use [autocut](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut) to cut off the result set. Autocut limits the number of results returned based on significant variations in the result set's metrics, such as vector distance or score.\n", + "\n", + "\n", + "To use autocut, you must specify the `auto_limit` parameter, which will stop returning results after the specified number of variations, or \"jumps,\" is reached.\n", + "\n", + "We will use the same hybrid search query above but use `auto_limit` rather than `limit`. Notice how there are actually 4 objects retrieved in this case, compared to the 3 objects returned in the previous query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JaEOs-mVruBf", + "metadata": { + "id": "JaEOs-mVruBf" + }, + "outputs": [], + "source": [ + "# auto_limit set to 1\n", + "\n", + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.hybrid(\n", + " query = \"dishwasher safe container\", # query\n", + " alpha = 0.75, # leaning more towards vector search\n", + " return_properties=[\"title\", \"description\", \"link\"], # return these 3 properties\n", + " auto_limit = 1 # autocut after 1 jump\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(json.dumps(product.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "WJG6LXJ3yKYl", + "metadata": { + "id": "WJG6LXJ3yKYl" + }, + "source": [ + "### Filters\n", + "We can narrow down our results by adding a filter to the query.\n", + "\n", + "We will look for objects where `category` is equal to `drinkware`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JnuEwgEG0PVM", + "metadata": { + "id": "JnuEwgEG0PVM" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel cup\",\n", + " return_properties=[\"title\", \"description\", \"category\", \"link\"], # returned properties\n", + " filters=wvc.query.Filter.by_property(\"category\").equal(\"Drinkware\"), # filter\n", + " limit=3, # limit to 3 objects\n", + ")\n", + "\n", + "for product in response.objects:\n", + " print(product.properties)\n", + " print('===')" + ] + }, + { + "cell_type": "markdown", + "id": "oU8Uc1FQimaf", + "metadata": { + "id": "oU8Uc1FQimaf" + }, + "source": [ + "## Part 3: Generative Feedback Loops\n", + "\n", + "[Generative Feedback Loops](https://weaviate.io/blog/generative-feedback-loops-with-llms) refers to the process of storing the output from the language model back to the database.\n", + "\n", + "We will generate a description for each product in our database using Gemini and save it to the `generated_description` property in the `Products` collection." + ] + }, + { + "cell_type": "markdown", + "id": "rCUXn9q0rDxf", + "metadata": { + "id": "rCUXn9q0rDxf" + }, + "source": [ + "### Connect and configure Gemini model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JajEq9ihp6ed", + "metadata": { + "id": "JajEq9ihp6ed" + }, + "outputs": [], + "source": [ + "genai.configure(api_key='gemini-api-key') # gemini api key\n", + "\n", + "# Multimodal model\n", + "model_pro_vision = genai.GenerativeModel(model_name='gemini-pro-vision') # multi-modal model (text and image)\n", + "\n", + "# LLM\n", + "model_pro = genai.GenerativeModel(model_name='gemini-pro') # text only model" + ] + }, + { + "cell_type": "markdown", + "id": "4h7iw16Viny1", + "metadata": { + "id": "4h7iw16Viny1" + }, + "source": [ + "### Generate a description and store it in the `Products` collection\n", + "\n", + "Steps for the below cell:\n", + "1. Run a vector search query to find travel jackets\n", + " 1. Learn more about autocut (`auto_limit`) [here](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut).\n", + "\n", + "2. Grab the returned objects, prompt Gemini with the task and image, store the description in the `generated_description` property" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Qjz4iYtbP5Ni", + "metadata": { + "id": "Qjz4iYtbP5Ni" + }, + "outputs": [], + "source": [ + "response = products.query.near_text( # first find travel jackets\n", + " query=\"travel jacket\",\n", + " return_properties=[\"title\", \"description\", \"category\", \"link\"],\n", + " auto_limit=1, # limit it to 1 close group\n", + ")\n", + "\n", + "for product in response.objects:\n", + " if \"link\" in product.properties:\n", + " id = product.uuid\n", + " img_url = product.properties[\"link\"]\n", + "\n", + " pil_image = url_to_pil(img_url) # convert image to PIL object\n", + " generated_description = model_pro_vision.generate_content([\"Write a short Facebook ad about this product photo.\", pil_image]) # prompt to Gemini\n", + " generated_description = generated_description.text\n", + " print(img_url)\n", + " print(generated_description)\n", + " print('===')\n", + "\n", + " # Update the Product collection with the generated description\n", + " products.data.update(uuid=id, properties={\"generated_description\": generated_description})" + ] + }, + { + "cell_type": "markdown", + "id": "7LjeX-2_vVb5", + "metadata": { + "id": "7LjeX-2_vVb5" + }, + "source": [ + "### Vector Search on the `generated_description` property\n", + "\n", + "Since the product description was saved in our `Products` collection, we can run a vector search query on it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "suWRGv6Zu6g2", + "metadata": { + "id": "suWRGv6Zu6g2" + }, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "response = products.query.near_text(\n", + " query=\"travel jacket\",\n", + " return_properties=[\"generated_description\", \"description\", \"title\"],\n", + " limit=1\n", + " )\n", + "\n", + "for o in response.objects:\n", + " print(o.uuid)\n", + " print(json.dumps(o.properties, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "Qs5hE2k0OuOh", + "metadata": { + "id": "Qs5hE2k0OuOh" + }, + "source": [ + "## Part 4: Personalization\n", + "\n", + "So far, we've generated product descriptions using Gemini's multi-modal model. In Part 4, we will generate product descriptions tailored to the persona.\n", + "\n", + "We will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba56WznY53C1", + "metadata": { + "id": "ba56WznY53C1" + }, + "outputs": [], + "source": [ + "# Personalized Collection\n", + "\n", + "if not client.collections.exists(\"Personalized\"):\n", + " collection = client.collections.create(\n", + " name=\"Personalized\",\n", + " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " (\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " ),\n", + " generative_config=wvcc.Configure.Generative.palm(\n", + " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", + " api_endpoint=\"generativelanguage.googleapis.com\",\n", + " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " ),\n", + " properties=[\n", + " Property(name=\"description\", data_type=DataType.TEXT),\n", + " ],\n", + " # cross-references\n", + " references=[\n", + " ReferenceProperty(\n", + " name=\"ofProduct\",\n", + " target_collection=\"Products\" # connect personalized to the products collection\n", + " ),\n", + " ReferenceProperty(\n", + " name=\"ofPersona\",\n", + " target_collection=\"Personas\" # connect personalized to the personas collection\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a44952d7", + "metadata": { + "id": "a44952d7" + }, + "source": [ + "### Create two personas (Alice and Bob)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "j1RlEbYpOw_5", + "metadata": { + "id": "j1RlEbYpOw_5" + }, + "outputs": [], + "source": [ + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in ['Alice', 'Bob']:\n", + " generated_description = model_pro.generate_content([\"Create a fictional buyer persona named \" + persona + \", write a short description about them\"]) # use gemini-pro to generate persona description\n", + " uuid = personas.data.insert({\n", + " \"name\": persona,\n", + " \"description\": generated_description.text\n", + " })\n", + " print(uuid)\n", + " print(generated_description.text)\n", + " print(\"===\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "B0oKIH5vQhhw", + "metadata": { + "id": "B0oKIH5vQhhw" + }, + "outputs": [], + "source": [ + "# print objects in the Personas collection\n", + "\n", + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in personas.iterator():\n", + " print(persona.uuid, persona.properties)" + ] + }, + { + "cell_type": "markdown", + "id": "4442bbc4", + "metadata": { + "id": "4442bbc4" + }, + "source": [ + "### Generate a product description tailored to the persona\n", + "\n", + "Grab the product uuid from Part 1 and paste it below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "UHi0V1MX2uNO", + "metadata": { + "id": "UHi0V1MX2uNO" + }, + "outputs": [], + "source": [ + "personalized = client.collections.get(\"Personalized\")\n", + "\n", + "product = products.query.fetch_object_by_id(\"87e5a137-d943-4863-90df-7eed6415fd58\") # <== paste a new product UUID here after importing\n", + "print(product.properties['link'])\n", + "print('===')\n", + "\n", + "personas = client.collections.get(\"Personas\")\n", + "\n", + "for persona in personas.iterator():\n", + " generated_description = model_pro.generate_content([\"Create a product description tailored to the following person, make sure to use the name (\", persona.properties[\"name\"],\") of the persona.\\n\\n\", \"# Product Description\\n\", product.properties[\"description\"], \"# Persona\", persona.properties[\"description\"]]) # generate a description tailored to the persona\n", + " print(generated_description.text)\n", + " # Add the personalized description to the `description` property in the Personalized collection\n", + " new_uuid = personalized.data.insert(\n", + " properties={\n", + " \"description\": generated_description.text },\n", + " references={\n", + " \"ofProduct\": product.uuid, # add cross-reference to the Product collection\n", + " \"ofPersona\": persona.uuid # add cross-reference to the Persona collection\n", + " },\n", + " )\n", + " print(\"New UUID\", new_uuid)\n", + " print('===')" + ] + }, + { + "cell_type": "markdown", + "id": "f3f56984", + "metadata": { + "id": "f3f56984" + }, + "source": [ + "### Fetch the objects in the `Personalized` collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "gxZ1W_cK4_dw", + "metadata": { + "id": "gxZ1W_cK4_dw" + }, + "outputs": [], + "source": [ + "personalized = client.collections.get(\"Personalized\")\n", + "\n", + "response = personalized.query.fetch_objects(\n", + " limit=2,\n", + " include_vector=True,\n", + " return_references=[QueryReference(\n", + " link_on=\"ofProduct\", # return the title property from the Product collection\n", + " return_properties=[\"title\"]\n", + " ),\n", + " QueryReference(\n", + " link_on=\"ofPersona\",\n", + " return_properties=[\"name\"] # return the name property from the Persona collection\n", + " )\n", + " ]\n", + ")\n", + "\n", + "for item in response.objects:\n", + " print(item.properties)\n", + " for ref_obj in item.references[\"ofProduct\"].objects:\n", + " print(ref_obj.properties)\n", + " for ref_obj in item.references[\"ofPersona\"].objects:\n", + " print(ref_obj.properties)\n", + " print(item.vector[\"default\"])\n", + " print(\"===\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "7Wlb0vCDUK3h", + "t1Uc93joUOAR" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d7dcf325cdb01a5c166026c3f5b9fb42d1934f0e Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:56:34 -0400 Subject: [PATCH 03/11] Update the notebook based on Mark's feedback 1. Created a Weaviate folder and moved the notebook 2. Wrote in the 2nd person 3. Upgraded the Weaviate version 4. Switched "Gemini" to "Gemini API" --- .../docker-compose.yml | 2 +- ...description_with_weaviate_and_gemini.ipynb | 42 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) rename examples/{Building_with_Weaviate_and_Gemini => weaviate}/docker-compose.yml (89%) rename examples/{Building_with_Weaviate_and_Gemini => weaviate}/personalized_description_with_weaviate_and_gemini.ipynb (91%) diff --git a/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml b/examples/weaviate/docker-compose.yml similarity index 89% rename from examples/Building_with_Weaviate_and_Gemini/docker-compose.yml rename to examples/weaviate/docker-compose.yml index 2b31bc58d..8e6e45beb 100644 --- a/examples/Building_with_Weaviate_and_Gemini/docker-compose.yml +++ b/examples/weaviate/docker-compose.yml @@ -9,7 +9,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.24.8 + image: cr.weaviate.io/semitechnologies/weaviate:1.25.10 ports: - 8080:8080 - 50051:50051 diff --git a/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb similarity index 91% rename from examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb rename to examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb index 786bd2476..f65b296a4 100644 --- a/examples/Building_with_Weaviate_and_Gemini/personalized_description_with_weaviate_and_gemini.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb @@ -7,9 +7,9 @@ "id": "b19f6be7" }, "source": [ - "# Personalized Product Descriptions with Weaviate and Gemini\n", + "# Personalized Product Descriptions with Weaviate and the Gemini API\n", "\n", - "Weaviate is an open-source vector database that enables you to build AI-Native applications with Gemini! This notebook has four parts:\n", + "Weaviate is an open-source vector database that enables you to build AI-Native applications with the Gemini API! This notebook has four parts:\n", "1. [Part 1: Connect to Weaviate, Define Schema, and Import Data](#part-1-install-dependencies-and-connect-to-weaviate)\n", "\n", "2. [Part 2: Run Vector Search Queries](#part-2-vector-search)\n", @@ -19,11 +19,11 @@ "4. [Part 4: Personalized Product Descriptions](#part-4-personalization)\n", "\n", "\n", - "In this demo, we will show you how to embed your data, run a semantic search, make a generative call to Gemini and store the output in your vector database, and personalize the description based on the user profile. We are using the Google merch products as our dataset and will generate product descriptions by calling the Gemini API.\n", + "In this demo, you will learn how to embed your data, run a semantic search, make a generative call to the Gemini API and store the output in your vector database, and personalize the description based on the user profile.\n", "\n", "# Use Case\n", "\n", - "We will be working with an e-commerce dataset containing Google merch. We will load the data into the Weaviate vector database and use the semantic search features to retrieve data. Next, we will generate product descriptions and store them back into the database with a vector embedding for retrieval (aka, generative feedback loops). Lastly, we will create a small knowledge graph with uniquely generated product descriptions for the buyer personas Alice and Bob.\n", + "You will be working with an e-commerce dataset containing Google merch. You will load the data into the Weaviate vector database and use the semantic search features to retrieve data. Next, you'll generate product descriptions and store them back into the database with a vector embedding for retrieval (aka, generative feedback loops). Lastly, you'll create a small knowledge graph with uniquely generated product descriptions for the buyer personas Alice and Bob.\n", "\n", "### Requirements\n", "1. Weaviate vector database\n", @@ -35,7 +35,7 @@ "### Video\n", "**For an awesome walk through of this demo, check out [this](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138) presentation from Google Cloud Next!**\n", "\n", - "[![From RAG to autonomous apps with Weaviate and Gemini on Google Kubernetes Engine](http://i3.ytimg.com/vi/WORgeRAAN-4/hqdefault.jpg)](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138)" + "[![From RAG to autonomous apps with Weaviate and Gemini API on Google Kubernetes Engine](http://i3.ytimg.com/vi/WORgeRAAN-4/hqdefault.jpg)](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138)" ] }, { @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "iKmPS8v7s_Xc", "metadata": { "id": "iKmPS8v7s_Xc" @@ -194,7 +194,7 @@ "source": [ "client = weaviate.WeaviateClient(\n", " embedded_options=EmbeddedOptions(\n", - " version=\"1.24.8\",\n", + " version=\"1.25.10\",\n", " additional_env_vars={\n", " \"ENABLE_MODULES\": \"text2vec-palm, generative-palm\"\n", " }),\n", @@ -240,7 +240,7 @@ }, "source": [ "### Create schema\n", - "The schema tells Weaviate how you want to store your data. We will have two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model." + "The schema tells Weaviate how you want to store your data. You will have two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model." ] }, { @@ -367,7 +367,7 @@ }, "source": [ "#### Upload to Weaviate\n", - "We will use Weaviate's batch import to get the 99 objects into our database" + "This is using Weaviate's batch import to get the 99 objects into our database" ] }, { @@ -472,7 +472,7 @@ }, "source": [ "### Vector Search\n", - "Vector search returns the objects with most similar vectors to that of the query. We will use the `near_text` operator to find objects with the nearest vector to an input text." + "Vector search returns the objects with most similar vectors to that of the query. You will use the `near_text` operator to find objects with the nearest vector to an input text." ] }, { @@ -545,12 +545,12 @@ }, "source": [ "### Autocut\n", - "Rather than hard-coding the limit on the number of objects (seen above), we can use [autocut](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut) to cut off the result set. Autocut limits the number of results returned based on significant variations in the result set's metrics, such as vector distance or score.\n", + "Rather than hard-coding the limit on the number of objects (seen above), the [autocut](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut) feature can be used to cut off the result set. Autocut limits the number of results returned based on significant variations in the result set's metrics, such as vector distance or score.\n", "\n", "\n", - "To use autocut, you must specify the `auto_limit` parameter, which will stop returning results after the specified number of variations, or \"jumps,\" is reached.\n", + "To use autocut, you must specify the `auto_limit` parameter, which will stop returning results after the specified number of variations, or \"jumps\" is reached.\n", "\n", - "We will use the same hybrid search query above but use `auto_limit` rather than `limit`. Notice how there are actually 4 objects retrieved in this case, compared to the 3 objects returned in the previous query." + "You will use the same hybrid search query above but use `auto_limit` rather than `limit`. Notice how there are actually 4 objects retrieved in this case, compared to the 3 objects returned in the previous query." ] }, { @@ -585,9 +585,9 @@ }, "source": [ "### Filters\n", - "We can narrow down our results by adding a filter to the query.\n", + "Narrow down the results by adding a filter to the query.\n", "\n", - "We will look for objects where `category` is equal to `drinkware`." + "Find objects where `category` is equal to `drinkware`." ] }, { @@ -624,7 +624,7 @@ "\n", "[Generative Feedback Loops](https://weaviate.io/blog/generative-feedback-loops-with-llms) refers to the process of storing the output from the language model back to the database.\n", "\n", - "We will generate a description for each product in our database using Gemini and save it to the `generated_description` property in the `Products` collection." + "You will generate a description for each product in our database using the Gemini API and save it to the `generated_description` property in the `Products` collection." ] }, { @@ -634,7 +634,7 @@ "id": "rCUXn9q0rDxf" }, "source": [ - "### Connect and configure Gemini model" + "### Connect and configure the Gemini API model" ] }, { @@ -668,7 +668,7 @@ "1. Run a vector search query to find travel jackets\n", " 1. Learn more about autocut (`auto_limit`) [here](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut).\n", "\n", - "2. Grab the returned objects, prompt Gemini with the task and image, store the description in the `generated_description` property" + "2. Grab the returned objects, prompt the Gemini API with the task and image, store the description in the `generated_description` property" ] }, { @@ -711,7 +711,7 @@ "source": [ "### Vector Search on the `generated_description` property\n", "\n", - "Since the product description was saved in our `Products` collection, we can run a vector search query on it." + "Since the product description was saved in our `Products` collection, you can run a vector search query on it." ] }, { @@ -745,9 +745,9 @@ "source": [ "## Part 4: Personalization\n", "\n", - "So far, we've generated product descriptions using Gemini's multi-modal model. In Part 4, we will generate product descriptions tailored to the persona.\n", + "So far, we've generated product descriptions using the Gemini API multi-modal model. In Part 4, you will generate product descriptions tailored to the persona.\n", "\n", - "We will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." + "You will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." ] }, { From 5bb47836629e70bba2b7b32e6fbac7f79fdc36b5 Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:02:27 -0400 Subject: [PATCH 04/11] Fix sentence --- .../personalized_description_with_weaviate_and_gemini.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb index f65b296a4..4074e3573 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb @@ -745,7 +745,7 @@ "source": [ "## Part 4: Personalization\n", "\n", - "So far, we've generated product descriptions using the Gemini API multi-modal model. In Part 4, you will generate product descriptions tailored to the persona.\n", + "So far, you've generated product descriptions using the Gemini API multi-modal model. In Part 4, you will generate product descriptions tailored to the persona.\n", "\n", "You will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." ] From 90c6b2c498868de8807804211a3b72adad91e1b3 Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:13:14 -0400 Subject: [PATCH 05/11] rename notebook file --- ...> personalized_description_with_weaviate_and_gemini_api.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/weaviate/{personalized_description_with_weaviate_and_gemini.ipynb => personalized_description_with_weaviate_and_gemini_api.ipynb} (100%) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb similarity index 100% rename from examples/weaviate/personalized_description_with_weaviate_and_gemini.ipynb rename to examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb From 89599e523c617e8bd301311d4d1338ae440d5d84 Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:47:40 -0400 Subject: [PATCH 06/11] Factor in edits from Mark --- examples/weaviate/docker-compose.yml | 2 +- ...ription_with_weaviate_and_gemini_api.ipynb | 444 ++++++++++++------ 2 files changed, 301 insertions(+), 145 deletions(-) diff --git a/examples/weaviate/docker-compose.yml b/examples/weaviate/docker-compose.yml index 8e6e45beb..0df7e274c 100644 --- a/examples/weaviate/docker-compose.yml +++ b/examples/weaviate/docker-compose.yml @@ -15,7 +15,7 @@ services: - 50051:50051 restart: on-failure:0 environment: - PALM_APIKEY: 'PALM_APIKEY' + PALM_APIKEY: 'sk-key' QUERY_DEFAULTS_LIMIT: 25 AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' PERSISTENCE_DATA_PATH: '/var/lib/weaviate' diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb index 4074e3573..dcf9513fc 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb @@ -21,18 +21,21 @@ "\n", "In this demo, you will learn how to embed your data, run a semantic search, make a generative call to the Gemini API and store the output in your vector database, and personalize the description based on the user profile.\n", "\n", - "# Use Case\n", + "## Use Case\n", "\n", "You will be working with an e-commerce dataset containing Google merch. You will load the data into the Weaviate vector database and use the semantic search features to retrieve data. Next, you'll generate product descriptions and store them back into the database with a vector embedding for retrieval (aka, generative feedback loops). Lastly, you'll create a small knowledge graph with uniquely generated product descriptions for the buyer personas Alice and Bob.\n", "\n", - "### Requirements\n", + "## Requirements\n", + "You will need a running Weaviate cluster and Gemini API key. You'll set up these requirements as you progress through this notebook!\n", + "\n", "1. Weaviate vector database\n", " 1. Serverless\n", " 1. Embedded\n", " 1. Local (Docker)\n", "1. Gemini API key\n", + " 1. Create an API key via [AI Studio](https://aistudio.google.com/)\n", "\n", - "### Video\n", + "## Video\n", "**For an awesome walk through of this demo, check out [this](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138) presentation from Google Cloud Next!**\n", "\n", "[![From RAG to autonomous apps with Weaviate and Gemini API on Google Kubernetes Engine](http://i3.ytimg.com/vi/WORgeRAAN-4/hqdefault.jpg)](https://youtu.be/WORgeRAAN-4?si=-WvqNkPn8oCmnLGQ&t=1138)" @@ -58,14 +61,13 @@ "outputs": [], "source": [ "!pip install weaviate-client==4.5.5\n", - "!pip install google-generativeai\n", - "!pip install requests\n", - "!pip install python-dotenv" + "!pip install -U -q google-generativeai\n", + "!pip install requests" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "iKmPS8v7s_Xc", "metadata": { "id": "iKmPS8v7s_Xc" @@ -73,7 +75,7 @@ "outputs": [], "source": [ "import weaviate\n", - "import weaviate.classes.config as wvcc\n", + "from weaviate.classes.config import Configure\n", "from weaviate.embedded import EmbeddedOptions\n", "import weaviate.classes as wvc\n", "from weaviate.classes.config import Property, DataType, ReferenceProperty\n", @@ -81,7 +83,6 @@ "from weaviate.classes.query import QueryReference\n", "\n", "import os\n", - "from dotenv import load_dotenv\n", "import json\n", "import requests\n", "import PIL\n", @@ -89,12 +90,26 @@ "\n", "from PIL import Image\n", "from io import BytesIO\n", + "from IPython.display import Markdown\n", + "\n", + "import google\n", "import google.generativeai as genai\n", + "from google.colab import userdata\n", "\n", "# Convert image links to PIL object\n", "def url_to_pil(url):\n", " response = requests.get(url)\n", - " return Image.open(BytesIO(response.content))" + " return Image.open(BytesIO(response.content))\n", + "\n", + "# display images\n", + "def display_image(url, size=100):\n", + " response = requests.get(url)\n", + " image_data = BytesIO(response.content)\n", + " image = Image.open(image_data)\n", + "\n", + " resized_image = image.resize((size,size))\n", + "\n", + " display(resized_image)" ] }, { @@ -118,9 +133,9 @@ "\n", "You will need to create a Weaviate cluster. There are a few ways to do this:\n", "\n", - "1. [Weaviate Cloud Services](console.weaviate.cloud): Create a sandbox on our managed service. You will need to deploy it in US West, US East, or Australia.\n", + "1. [Weaviate Embedded](https://weaviate.io/developers/weaviate/installation/embedded): Run Weaviate in your runtime\n", "\n", - "2. [Weaviate Embedded](https://weaviate.io/developers/weaviate/installation/embedded): Run Weaviate in your runtime (Note: It will disconnect once you stop the terminal.)\n", + "2. [Weaviate Cloud](console.weaviate.cloud): Create a sandbox on our managed service. You will need to deploy it in US West, US East, or Australia.\n", "\n", "3. Local Host: [Docker](https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file) or [Kubernetes](https://weaviate.io/developers/weaviate/installation/kubernetes)\n", "\n", @@ -129,58 +144,37 @@ }, { "cell_type": "markdown", - "id": "1199263a", + "id": "897684f3", "metadata": { - "id": "1199263a" + "id": "897684f3" }, "source": [ - "### Choose **only one** installation option\n", + "#### Weaviate Embedded\n", + "We will default to Weaviate Embedded. This runs Weaviate inside your notebook and is ideal for quick experimentation. \n", "\n", - "Pick one of the three options below to run Weaviate" + "**Note: It will disconnect once you stop the terminal.**" ] }, { "cell_type": "markdown", - "id": "11886426", - "metadata": { - "id": "11886426" - }, + "id": "033eec7e", + "metadata": {}, "source": [ - "#### 1. Weaviate Cloud Service\n", + "**Set up your API key**\n", "\n", - "The first option is the [Weaviate Cloud Service](https://console.weaviate.cloud/), you can connect your notebook to a serverless Weaviate to keep the data persistent in the cloud." + "To run the following cell, your Gemini API key must be stored in a Colab Secret and named `GEMINI_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see the [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) quickstart for an example." ] }, { "cell_type": "code", "execution_count": null, - "id": "f984616a", - "metadata": { - "id": "f984616a" - }, + "id": "b2a9dc37", + "metadata": {}, "outputs": [], "source": [ - "load_dotenv()\n", - "\n", - "client = weaviate.connect_to_wcs(\n", - " cluster_url=os.getenv(WCS_DEMO_URL), # Replace with your WCS URL\n", - " auth_credentials=weaviate.auth.AuthApiKey(os.getenv(WCS_DEMO_RO_KEY)), # Replace with your WCS key\n", - " headers={\"X-PaLM-Api-Key\": os.getenv(\"PALM-API-KEY\")}, # Replace with your Gemini API key\n", - ")\n", - "\n", - "print(client.is_ready())" - ] - }, - { - "cell_type": "markdown", - "id": "897684f3", - "metadata": { - "id": "897684f3" - }, - "source": [ - "#### 2. Weaviate Embedded\n", - "\n", - "The second option is Weaviate embedded. This runs Weaviate inside your notebook. Ideal for quick experimentation." + "# Grab Gemini API key \n", + "GEMINI_API_KEY = userdata.get(\"GEMINI_API_KEY\")\n", + "genai.configure(api_key=GEMINI_API_KEY)" ] }, { @@ -199,7 +193,7 @@ " \"ENABLE_MODULES\": \"text2vec-palm, generative-palm\"\n", " }),\n", " additional_headers={\n", - " \"X-PaLM-Api-Key\": 'PALM-API-KEY' # Replace with your Gemini API key\n", + " \"X-Google-Studio-Api-Key\": GEMINI_API_KEY \n", " }\n", ")\n", "\n", @@ -208,28 +202,52 @@ }, { "cell_type": "markdown", - "id": "a1c36425", - "metadata": { - "id": "a1c36425" - }, + "id": "1a554d15", + "metadata": {}, "source": [ - "#### 3. Local (Docker)\n", + "#### Other Options: Weaviate Cloud and Local Host" + ] + }, + { + "cell_type": "markdown", + "id": "39aa28eb", + "metadata": {}, + "source": [ + "#### **Weaviate Cloud**\n", + "\n", + "You can connect your notebook to a serverless Weaviate cluster to keep the data persistent in the cloud. You can register [here](https://console.weaviate.cloud/) and create a free 14-day sandbox!\n", + "\n", + "To connect to your WCD cluster:\n", + "```python\n", + "WCD_URL = \"https://sandbox.gcp.weaviate.cloud\"\n", + "WCD_AUTH_KEY = \"sk-key\"\n", + "GEMINI_API_KEY = \"sk-key\"\n", + "\n", + "client = weaviate.connect_to_wcs(\n", + " cluster_url=WCD_URL,\n", + " auth_credentials=weaviate.auth.AuthApiKey(WCD_AUTH_KEY),\n", + " headers={\"X-Google-Studio-Api-Key\": GEMINI_API_KEY},\n", + ")\n", "\n", - "If you like to run Weaviate yourself, you can download the [Docker files](https://weaviate.io/developers/weaviate/installation/docker-compose) and run it locally on your machine or in the cloud. Make sure to include the Google module in the configurator." + "print(client.is_ready())\n", + "```" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "1f042619", - "metadata": { - "id": "1f042619" - }, - "outputs": [], + "cell_type": "markdown", + "id": "e400af40", + "metadata": {}, "source": [ + "#### **Local Host**\n", + "\n", + "If you want to run Weaviate yourself, you can download the [Docker files](https://weaviate.io/developers/weaviate/installation/docker-compose) and run it locally on your machine or in the cloud. There is also a `yaml` file in this folder you can use.\n", + "\n", + "To connect to Weaviate locally:\n", + "```python\n", "client = weaviate.connect_to_local()\n", "\n", - "print(client.is_ready())" + "print(client.is_ready())\n", + "```" ] }, { @@ -240,7 +258,11 @@ }, "source": [ "### Create schema\n", - "The schema tells Weaviate how you want to store your data. You will have two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model." + "The schema tells Weaviate how you want to store your data. \n", + "\n", + "You will first create two collections: Products and Personas. Each collection has metadata (properties) and specifies the embedding and language model.\n", + "\n", + "In [Part 4](#part-4-personalization), you will create another collection, `Personalized`, that will generate product descriptions based on the persona. " ] }, { @@ -256,11 +278,29 @@ "result = client.collections.delete(\"Products\")\n", "print(result)\n", "result = client.collections.delete(\"Personas\")\n", - "print(result)\n", - "result = client.collections.delete(\"Personalized\")\n", "print(result)" ] }, + { + "cell_type": "markdown", + "id": "e5c9093f", + "metadata": {}, + "source": [ + "Store your Google Cloud project id in a Colab Secret and name it `PROJECT_ID`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f48ee903", + "metadata": {}, + "outputs": [], + "source": [ + "# Set project id \n", + "PROJECT_ID = userdata.get(\"PROJECT_ID\")\n", + "genai.configure(api_key=PROJECT_ID)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -270,20 +310,24 @@ }, "outputs": [], "source": [ + "API_ENDPOINT = \"generativelanguage.googleapis.com\"\n", + "embedding_model = \"embedding-001\"\n", + "generative_model = \"gemini-pro\"\n", + "\n", "# Products Collection\n", "if not client.collections.exists(\"Products\"):\n", " collection = client.collections.create(\n", " name=\"Products\",\n", - " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " vectorizer_config=Configure.Vectorizer.text2vec_palm\n", " (\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = embedding_model\n", " ),\n", - " generative_config=wvcc.Configure.Generative.palm(\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " generative_config=Configure.Generative.palm(\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = generative_model\n", " ),\n", " properties=[ # properties for the Products collection\n", " Property(name=\"product_id\", data_type=DataType.TEXT),\n", @@ -300,16 +344,16 @@ "if not client.collections.exists(\"Personas\"):\n", " collection = client.collections.create(\n", " name=\"Personas\",\n", - " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", + " vectorizer_config=Configure.Vectorizer.text2vec_palm\n", " (\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = embedding_model\n", " ),\n", - " generative_config=wvcc.Configure.Generative.palm(\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " generative_config=Configure.Generative.palm(\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = generative_model\n", " ),\n", " properties=[ # properties for the Personas collection\n", " Property(name=\"name\", data_type=DataType.TEXT),\n", @@ -330,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "vo0WckWt_gyq", "metadata": { "id": "vo0WckWt_gyq" @@ -347,15 +391,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "-uxOVFZ6_iA7", "metadata": { "id": "-uxOVFZ6_iA7" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'id_1',\n", + " 'product_id': 'GGOEGAYC135814',\n", + " 'title': 'Google Badge Tee',\n", + " 'category': 'Apparel Accessories Tops Tees Tshirts',\n", + " 'link': 'https://shop.googlemerchandisestore.com/store/20160512512/assets/items/images/GGOEGXXX1358.jpg',\n", + " 'description': 'A classic crew neck tee made from 100 cotton Its soft and comfortable and features a small Google logo on the chest',\n", + " 'color': \"['Blue']\",\n", + " 'gender': 'Unisex',\n", + " 'brand': 'Google'}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Print first object\n", - "\n", "data[0]" ] }, @@ -367,7 +429,42 @@ }, "source": [ "#### Upload to Weaviate\n", - "This is using Weaviate's batch import to get the 99 objects into our database" + "\n", + "To make sure everything is set, you will upload only one object and confirm it's in the database. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2762abcc", + "metadata": {}, + "outputs": [], + "source": [ + "products = client.collections.get(\"Products\")\n", + "\n", + "first_object = data[0]\n", + "\n", + "products.data.insert(\n", + " properties={\n", + " \"product_id\": first_object['product_id'],\n", + " \"title\": first_object['title'],\n", + " \"category\": first_object['category'],\n", + " \"link\": first_object['link'],\n", + " \"description\": first_object['description'],\n", + " \"brand\": first_object['brand']\n", + " }\n", + ")\n", + "\n", + "response = products.aggregate.over_all(total_count=True)\n", + "print(response.total_count) # This should output 1" + ] + }, + { + "cell_type": "markdown", + "id": "f7eec0d7", + "metadata": {}, + "source": [ + "Let's import the remainder of our dataset. You will use Weaviate's batch import to get the 98 objects into our database." ] }, { @@ -381,8 +478,10 @@ "source": [ "products = client.collections.get(\"Products\")\n", "\n", + "remaining_data = data[1:]\n", + "\n", "with products.batch.dynamic() as batch:\n", - " for item in data:\n", + " for item in remaining_data:\n", " batch.add_object(\n", " properties={\n", " \"product_id\": item['product_id'],\n", @@ -392,22 +491,10 @@ " \"description\": item['description'],\n", " \"brand\": item['brand']\n", " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7XWKsV920vje", - "metadata": { - "id": "7XWKsV920vje" - }, - "outputs": [], - "source": [ - "# count how many objects are in the database\n", - "products = client.collections.get(\"Products\")\n", + " )\n", + "\n", "response = products.aggregate.over_all(total_count=True)\n", - "print(response.total_count)" + "print(response.total_count) # this should print 99 " ] }, { @@ -420,7 +507,6 @@ "outputs": [], "source": [ "# print the objects uuid and properties\n", - "\n", "for product in products.iterator():\n", " print(product.uuid, product.properties)" ] @@ -432,9 +518,7 @@ "id": "a8c64f21" }, "source": [ - "From the printed list above, select one `uuid` and paste it in the below cell.\n", - "\n", - "Note: If you run the cell below without grabbing a `uuid`, it will result in an error." + "You will fetch the object by the UUID that was created. It will print out the vector embedding as well!" ] }, { @@ -447,7 +531,7 @@ "outputs": [], "source": [ "product = products.query.fetch_object_by_id(\n", - " \"87e5a137-d943-4863-90df-7eed6415fd58\", # <== paste a new product UUID here after importing\n", + " product.uuid,\n", " include_vector=True\n", ")\n", "\n", @@ -493,7 +577,9 @@ ")\n", "\n", "for product in response.objects:\n", - " print(json.dumps(product.properties, indent=2))" + " print(json.dumps(product.properties, indent=2))\n", + " display_image(product.properties['link'])\n", + " print('===')" ] }, { @@ -534,7 +620,9 @@ ")\n", "\n", "for product in response.objects:\n", - " print(json.dumps(product.properties, indent=2))" + " print(json.dumps(product.properties, indent=2))\n", + " display_image(product.properties['link'])\n", + " print('===')" ] }, { @@ -574,7 +662,9 @@ ")\n", "\n", "for product in response.objects:\n", - " print(json.dumps(product.properties, indent=2))" + " print(json.dumps(product.properties, indent=2))\n", + " display_image(product.properties['link'])\n", + " print('===')" ] }, { @@ -609,7 +699,8 @@ ")\n", "\n", "for product in response.objects:\n", - " print(product.properties)\n", + " print(json.dumps(product.properties, indent=2))\n", + " display_image(product.properties['link'])\n", " print('===')" ] }, @@ -634,7 +725,9 @@ "id": "rCUXn9q0rDxf" }, "source": [ - "### Connect and configure the Gemini API model" + "### Connect and configure the Gemini API model\n", + "\n", + "Make sure you have set your Gemini API key in `GEMINI_API_KEY`. Please confirm this step was done in [Part 1](#part-1-connect-to-weaviate-define-schema-and-import-data)." ] }, { @@ -646,13 +739,9 @@ }, "outputs": [], "source": [ - "genai.configure(api_key='gemini-api-key') # gemini api key\n", - "\n", - "# Multimodal model\n", - "model_pro_vision = genai.GenerativeModel(model_name='gemini-pro-vision') # multi-modal model (text and image)\n", + "genai.configure(api_key=GEMINI_API_KEY) # gemini api key\n", "\n", - "# LLM\n", - "model_pro = genai.GenerativeModel(model_name='gemini-pro') # text only model" + "gemini_flash_model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest') # this model handles both images and text" ] }, { @@ -665,8 +754,8 @@ "### Generate a description and store it in the `Products` collection\n", "\n", "Steps for the below cell:\n", - "1. Run a vector search query to find travel jackets\n", - " 1. Learn more about autocut (`auto_limit`) [here](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut).\n", + "1. Run a vector search query to find travel jackets \n", + " * Learn more about autocut (`auto_limit`) [here](https://weaviate.io/developers/weaviate/api/graphql/additional-operators#autocut).\n", "\n", "2. Grab the returned objects, prompt the Gemini API with the task and image, store the description in the `generated_description` property" ] @@ -692,9 +781,9 @@ " img_url = product.properties[\"link\"]\n", "\n", " pil_image = url_to_pil(img_url) # convert image to PIL object\n", - " generated_description = model_pro_vision.generate_content([\"Write a short Facebook ad about this product photo.\", pil_image]) # prompt to Gemini\n", + " generated_description = gemini_flash_model.generate_content([\"Write a short Facebook ad about this product photo.\", pil_image]) # prompt to the Gemini API\n", " generated_description = generated_description.text\n", - " print(img_url)\n", + " display_image(product.properties['link'])\n", " print(generated_description)\n", " print('===')\n", "\n", @@ -745,9 +834,28 @@ "source": [ "## Part 4: Personalization\n", "\n", - "So far, you've generated product descriptions using the Gemini API multi-modal model. In Part 4, you will generate product descriptions tailored to the persona.\n", + "So far, you've generated product descriptions using the Gemini API with the `gemini-1.5-flash` model. In Part 4, you will generate product descriptions tailored to the persona.\n", "\n", - "You will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between collections." + "You will use [cross-references](https://weaviate.io/developers/weaviate/manage-data/cross-references) to establish directional relationships between the Products and Personas collections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0473a28", + "metadata": {}, + "outputs": [], + "source": [ + "result = client.collections.delete(\"Personalized\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "ca0c8c28", + "metadata": {}, + "source": [ + "You will use the same `PROJECT_ID`, `API_ENDPOINT`, `embedding_model`, and `generative-model` from [Part 1](#part-1-connect-to-weaviate-define-schema-and-import-data)." ] }, { @@ -759,21 +867,24 @@ }, "outputs": [], "source": [ + "API_ENDPOINT = \"generativelanguage.googleapis.com\"\n", + "embedding_model = \"embedding-001\"\n", + "generative_model = \"gemini-pro\"\n", + "\n", "# Personalized Collection\n", "\n", "if not client.collections.exists(\"Personalized\"):\n", " collection = client.collections.create(\n", " name=\"Personalized\",\n", - " vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm\n", - " (\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"embedding-gecko-001\" # default model. You can switch to another model if desired\n", + " vectorizer_config=Configure.Vectorizer.text2vec_palm(\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = embedding_model\n", " ),\n", - " generative_config=wvcc.Configure.Generative.palm(\n", - " project_id=\"project-id\", # Only required if you're using Vertex AI. Replace with your project id\n", - " api_endpoint=\"generativelanguage.googleapis.com\",\n", - " model_id=\"gemini-pro-vision\" # You can switch to another model if desired\n", + " generative_config=Configure.Generative.palm(\n", + " project_id=PROJECT_ID,\n", + " api_endpoint=API_ENDPOINT,\n", + " model_id = generative_model\n", " ),\n", " properties=[\n", " Property(name=\"description\", data_type=DataType.TEXT),\n", @@ -814,7 +925,7 @@ "personas = client.collections.get(\"Personas\")\n", "\n", "for persona in ['Alice', 'Bob']:\n", - " generated_description = model_pro.generate_content([\"Create a fictional buyer persona named \" + persona + \", write a short description about them\"]) # use gemini-pro to generate persona description\n", + " generated_description = gemini_flash_model.generate_content([\"Create a fictional buyer persona named \" + persona + \", write a short description about them\"]) # use gemini-pro to generate persona description\n", " uuid = personas.data.insert({\n", " \"name\": persona,\n", " \"description\": generated_description.text\n", @@ -864,15 +975,15 @@ "source": [ "personalized = client.collections.get(\"Personalized\")\n", "\n", - "product = products.query.fetch_object_by_id(\"87e5a137-d943-4863-90df-7eed6415fd58\") # <== paste a new product UUID here after importing\n", - "print(product.properties['link'])\n", - "print('===')\n", + "product = products.query.fetch_object_by_id(product.uuid)\n", + "display_image(product.properties['link'])\n", "\n", "personas = client.collections.get(\"Personas\")\n", "\n", "for persona in personas.iterator():\n", - " generated_description = model_pro.generate_content([\"Create a product description tailored to the following person, make sure to use the name (\", persona.properties[\"name\"],\") of the persona.\\n\\n\", \"# Product Description\\n\", product.properties[\"description\"], \"# Persona\", persona.properties[\"description\"]]) # generate a description tailored to the persona\n", + " generated_description = gemini_flash_model.generate_content([\"Create a product description tailored to the following person, make sure to use the name (\", persona.properties[\"name\"],\") of the persona.\\n\\n\", \"# Product Description\\n\", product.properties[\"description\"], \"# Persona\", persona.properties[\"description\"]]) # generate a description tailored to the persona\n", " print(generated_description.text)\n", + " print('====')\n", " # Add the personalized description to the `description` property in the Personalized collection\n", " new_uuid = personalized.data.insert(\n", " properties={\n", @@ -881,9 +992,7 @@ " \"ofProduct\": product.uuid, # add cross-reference to the Product collection\n", " \"ofPersona\": persona.uuid # add cross-reference to the Persona collection\n", " },\n", - " )\n", - " print(\"New UUID\", new_uuid)\n", - " print('===')" + " )\n" ] }, { @@ -909,10 +1018,9 @@ "\n", "response = personalized.query.fetch_objects(\n", " limit=2,\n", - " include_vector=True,\n", " return_references=[QueryReference(\n", " link_on=\"ofProduct\", # return the title property from the Product collection\n", - " return_properties=[\"title\"]\n", + " return_properties=[\"title\", \"link\"]\n", " ),\n", " QueryReference(\n", " link_on=\"ofPersona\",\n", @@ -927,9 +1035,57 @@ " print(ref_obj.properties)\n", " for ref_obj in item.references[\"ofPersona\"].objects:\n", " print(ref_obj.properties)\n", - " print(item.vector[\"default\"])\n", + " display_image(product.properties['link'])\n", " print(\"===\")" ] + }, + { + "cell_type": "markdown", + "id": "f61bca1b", + "metadata": {}, + "source": [ + "## Notebook Recap" + ] + }, + { + "cell_type": "markdown", + "id": "7f5d9952", + "metadata": {}, + "source": [ + "In this notebook, you learned how to:\n", + "1. Create a Weaviate cluster using Embedded\n", + "2. Define a Weaviate schema and select the embedding and generative model\n", + "3. Connect to the Gemini API\n", + "4. Perform vector and hybrid search with filtering and autocut \n", + "6. Use Generative Feedback Loops to store the output of the language model back to the database for future retrieval\n", + "7. Use cross-references to build relationships between collections" + ] + }, + { + "cell_type": "markdown", + "id": "901aeb49", + "metadata": {}, + "source": [ + "You can learn more about Weaviate through our [documentation](https://weaviate.io/developers/weaviate), and you can find more Weaviate and Google cookbooks [here](https://github.com/weaviate/recipes/tree/main/integrations/cloud-hyperscalers/google)!" + ] + }, + { + "cell_type": "markdown", + "id": "cfba1c28", + "metadata": {}, + "source": [ + "**Authors: Erika Cardenas and Bob Van Luijt** \n", + "\n", + "Connect with us and let us know if you have any questions!\n", + "\n", + "Erika's accounts:\n", + "* [Follow on X](https://x.com/ecardenas300)\n", + "* [Connect on LinkedIn](https://www.linkedin.com/in/erikacardenas300/)\n", + "\n", + "Bob's accounts:\n", + "* [Follow on X](https://x.com/bobvanluijt)\n", + "* [Connect on LinkedIn](https://www.linkedin.com/in/bobvanluijt/)" + ] } ], "metadata": { From 2bd827d2ad72578f5abfd45877a6a6a22217633c Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Wed, 28 Aug 2024 17:36:14 +0800 Subject: [PATCH 07/11] Run nbfmt --- ...ription_with_weaviate_and_gemini_api.ipynb | 78 ++++++++++++------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb index dcf9513fc..feb248caa 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb @@ -158,7 +158,9 @@ { "cell_type": "markdown", "id": "033eec7e", - "metadata": {}, + "metadata": { + "id": "85b8f5569cf6" + }, "source": [ "**Set up your API key**\n", "\n", @@ -169,7 +171,9 @@ "cell_type": "code", "execution_count": null, "id": "b2a9dc37", - "metadata": {}, + "metadata": { + "id": "a4ed6e16c9d3" + }, "outputs": [], "source": [ "# Grab Gemini API key \n", @@ -203,7 +207,9 @@ { "cell_type": "markdown", "id": "1a554d15", - "metadata": {}, + "metadata": { + "id": "7316e3e5e27e" + }, "source": [ "#### Other Options: Weaviate Cloud and Local Host" ] @@ -211,7 +217,9 @@ { "cell_type": "markdown", "id": "39aa28eb", - "metadata": {}, + "metadata": { + "id": "517d2f4247ba" + }, "source": [ "#### **Weaviate Cloud**\n", "\n", @@ -236,7 +244,9 @@ { "cell_type": "markdown", "id": "e400af40", - "metadata": {}, + "metadata": { + "id": "a15f667a78e7" + }, "source": [ "#### **Local Host**\n", "\n", @@ -284,7 +294,9 @@ { "cell_type": "markdown", "id": "e5c9093f", - "metadata": {}, + "metadata": { + "id": "f6c44c0ce91f" + }, "source": [ "Store your Google Cloud project id in a Colab Secret and name it `PROJECT_ID`." ] @@ -293,7 +305,9 @@ "cell_type": "code", "execution_count": null, "id": "f48ee903", - "metadata": {}, + "metadata": { + "id": "a7d08ed6250b" + }, "outputs": [], "source": [ "# Set project id \n", @@ -437,7 +451,9 @@ "cell_type": "code", "execution_count": null, "id": "2762abcc", - "metadata": {}, + "metadata": { + "id": "82c8d2a1a97a" + }, "outputs": [], "source": [ "products = client.collections.get(\"Products\")\n", @@ -462,7 +478,9 @@ { "cell_type": "markdown", "id": "f7eec0d7", - "metadata": {}, + "metadata": { + "id": "ce1fb093e419" + }, "source": [ "Let's import the remainder of our dataset. You will use Weaviate's batch import to get the 98 objects into our database." ] @@ -843,7 +861,9 @@ "cell_type": "code", "execution_count": null, "id": "b0473a28", - "metadata": {}, + "metadata": { + "id": "fb225b4ffb3c" + }, "outputs": [], "source": [ "result = client.collections.delete(\"Personalized\")\n", @@ -853,7 +873,9 @@ { "cell_type": "markdown", "id": "ca0c8c28", - "metadata": {}, + "metadata": { + "id": "d18b54e461a4" + }, "source": [ "You will use the same `PROJECT_ID`, `API_ENDPOINT`, `embedding_model`, and `generative-model` from [Part 1](#part-1-connect-to-weaviate-define-schema-and-import-data)." ] @@ -1042,7 +1064,9 @@ { "cell_type": "markdown", "id": "f61bca1b", - "metadata": {}, + "metadata": { + "id": "bedda838233a" + }, "source": [ "## Notebook Recap" ] @@ -1050,7 +1074,9 @@ { "cell_type": "markdown", "id": "7f5d9952", - "metadata": {}, + "metadata": { + "id": "e2f8c58b3f96" + }, "source": [ "In this notebook, you learned how to:\n", "1. Create a Weaviate cluster using Embedded\n", @@ -1064,7 +1090,9 @@ { "cell_type": "markdown", "id": "901aeb49", - "metadata": {}, + "metadata": { + "id": "60a6252d6c08" + }, "source": [ "You can learn more about Weaviate through our [documentation](https://weaviate.io/developers/weaviate), and you can find more Weaviate and Google cookbooks [here](https://github.com/weaviate/recipes/tree/main/integrations/cloud-hyperscalers/google)!" ] @@ -1072,7 +1100,9 @@ { "cell_type": "markdown", "id": "cfba1c28", - "metadata": {}, + "metadata": { + "id": "0ddbf9648e50" + }, "source": [ "**Authors: Erika Cardenas and Bob Van Luijt** \n", "\n", @@ -1094,26 +1124,14 @@ "7Wlb0vCDUK3h", "t1Uc93joUOAR" ], - "provenance": [] + "name": "personalized_description_with_weaviate_and_gemini_api.ipynb", + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", - "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" } }, "nbformat": 4, - "nbformat_minor": 5 + "nbformat_minor": 0 } From 5c3291e2cc4160e94144a039666bb65cdc7a6e05 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Wed, 28 Aug 2024 17:43:41 +0800 Subject: [PATCH 08/11] Add license, buttons and other nblint fixes --- ...ription_with_weaviate_and_gemini_api.ipynb | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb index feb248caa..22402b130 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb @@ -1,5 +1,36 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2024 Google LLC." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, { "cell_type": "markdown", "id": "b19f6be7", @@ -7,9 +38,29 @@ "id": "b19f6be7" }, "source": [ - "# Personalized Product Descriptions with Weaviate and the Gemini API\n", - "\n", - "Weaviate is an open-source vector database that enables you to build AI-Native applications with the Gemini API! This notebook has four parts:\n", + "# Personalized Product Descriptions with Weaviate and the Gemini API" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + "
\n", + " Run in Google Colab\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ead11e5e73b" + }, + "source": [ + "Weaviate is an open-source vector database that enables you to build AI-powered applications with the Gemini API! This notebook has four parts:\n", "1. [Part 1: Connect to Weaviate, Define Schema, and Import Data](#part-1-install-dependencies-and-connect-to-weaviate)\n", "\n", "2. [Part 2: Run Vector Search Queries](#part-2-vector-search)\n", From 0f0ea72087437fb686be5f3bf36fe88722e30857 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Wed, 28 Aug 2024 17:49:18 +0800 Subject: [PATCH 09/11] Update README --- examples/README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 62b22dea5..c6ec580f9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,9 @@ # Gemini API Examples ## Table of contents -This is a colletion of fun examples for the Gemini API. + +This is a collection of fun examples for the Gemini API. + * [Agents and Automatic Function Calling](https://github.com/google-gemini/cookbook/blob/main/examples/Agents_Function_Calling_Barista_Bot.ipynb): Create an agent (Barrista-bot) to take your coffee order. * [Anomaly Detection](https://github.com/google-gemini/cookbook/blob/main/examples/Anomaly_detection_with_embeddings.ipynb): Use embeddings to detect anomalies in your datasets. * [Apollo 11 - long context example](https://github.com/google-gemini/cookbook/blob/main/examples/Apollo_11.ipynb): Search a 400 page transcript from Apollo 11. @@ -20,7 +22,12 @@ This is a colletion of fun examples for the Gemini API. * [Working with Charts, Graphs, and Slide Decks](https://github.com/google-gemini/cookbook/blob/main/examples/Working_with_Charts_Graphs_and_Slide_Decks.ipynb): Gemini models are powerful multimodal LLMs that can process both text and image inputs. This notebook shows how Gemini 1.5 Flash model is capable of extracting data from various images. * [Entity extraction](https://github.com/google-gemini/cookbook/blob/main/examples/Entity_Extraction.ipynb): Use Gemini API to speed up some of your tasks, such as searching through text to extract needed information. Entity extraction with a Gemini model is a simple query, and you can ask it to retrieve its answer in the form that you prefer. -Folders +### Integrations + +* [Personalized Product Descriptions with Weaviate](weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb): Load data into a Weaviate vector DB, build a semantic search system using embeddings from the Gemini API, create a knowledge graph and generate unique product descriptions for personas using the Gemini API and Weaviate. + +### Folders + * [Prompting examples](https://github.com/google-gemini/cookbook/tree/main/examples/prompting): A directory with examples of various prompting techniques. * [JSON Capabilities](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Tuning.ipynb): A directory with guides containing different types of tasks you can do with JSON schemas. * [Automate Google Workspace tasks with the Gemini API](https://github.com/google-gemini/cookbook/tree/main/examples/Apps_script_and_Workspace_codelab): This codelabs shows you how to connect to the Gemini API using Apps Script, and uses the function calling, vision and text capabilities to automate Google Workspace tasks - summarizing a document, analyzing a chart, sending an email and generating some slides directly. All of this is done from a free text input. From 23e2ad370e913a6abe6270175a92dd2cf668da4b Mon Sep 17 00:00:00 2001 From: Erika Cardenas <110841617+erika-cardenas@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:22:48 -0400 Subject: [PATCH 10/11] Upgrade Python version and remove project_id --- ...ription_with_weaviate_and_gemini_api.ipynb | 64 +++++++------------ 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb index 22402b130..b2eb291b8 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb @@ -111,14 +111,16 @@ }, "outputs": [], "source": [ - "!pip install weaviate-client==4.5.5\n", + "!sudo apt-get install python3.11\n", + "!pip install weaviate-client==4.7.1\n", "!pip install -U -q google-generativeai\n", - "!pip install requests" + "!pip install requests\n", + "!pip install 'protobuf>=5'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "iKmPS8v7s_Xc", "metadata": { "id": "iKmPS8v7s_Xc" @@ -342,30 +344,6 @@ "print(result)" ] }, - { - "cell_type": "markdown", - "id": "e5c9093f", - "metadata": { - "id": "f6c44c0ce91f" - }, - "source": [ - "Store your Google Cloud project id in a Colab Secret and name it `PROJECT_ID`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f48ee903", - "metadata": { - "id": "a7d08ed6250b" - }, - "outputs": [], - "source": [ - "# Set project id \n", - "PROJECT_ID = userdata.get(\"PROJECT_ID\")\n", - "genai.configure(api_key=PROJECT_ID)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -375,9 +353,10 @@ }, "outputs": [], "source": [ + "PROJECT_ID = \"\" # leave this empty\n", "API_ENDPOINT = \"generativelanguage.googleapis.com\"\n", - "embedding_model = \"embedding-001\"\n", - "generative_model = \"gemini-pro\"\n", + "embedding_model = \"embedding-001\" # embedding model \n", + "generative_model = \"gemini-pro\" # language model \n", "\n", "# Products Collection\n", "if not client.collections.exists(\"Products\"):\n", @@ -921,16 +900,6 @@ "print(result)" ] }, - { - "cell_type": "markdown", - "id": "ca0c8c28", - "metadata": { - "id": "d18b54e461a4" - }, - "source": [ - "You will use the same `PROJECT_ID`, `API_ENDPOINT`, `embedding_model`, and `generative-model` from [Part 1](#part-1-connect-to-weaviate-define-schema-and-import-data)." - ] - }, { "cell_type": "code", "execution_count": null, @@ -940,9 +909,10 @@ }, "outputs": [], "source": [ + "PROJECT_ID = \"\" # leave this empty\n", "API_ENDPOINT = \"generativelanguage.googleapis.com\"\n", - "embedding_model = \"embedding-001\"\n", - "generative_model = \"gemini-pro\"\n", + "embedding_model = \"embedding-001\" # embedding model \n", + "generative_model = \"gemini-pro\" # language mdodel \n", "\n", "# Personalized Collection\n", "\n", @@ -1181,6 +1151,18 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" } }, "nbformat": 4, From e8d69430e8b6b36f142da0d1ca5a9fffd748c227 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 2 Sep 2024 16:59:53 +0800 Subject: [PATCH 11/11] nbfmt --- ...ed_description_with_weaviate_and_gemini_api.ipynb | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb index b2eb291b8..d7e74047b 100644 --- a/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb +++ b/examples/weaviate/personalized_description_with_weaviate_and_gemini_api.ipynb @@ -1151,18 +1151,6 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" } }, "nbformat": 4,