Skip to content

Commit

Permalink
#1 - poc RAG to OpenAPI working
Browse files Browse the repository at this point in the history
  • Loading branch information
obriensystems committed Aug 29, 2024
1 parent d9d0ee8 commit 5970f38
Showing 1 changed file with 24 additions and 11 deletions.
35 changes: 24 additions & 11 deletions src/rag/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

os.environ["OPENAI_API_KEY"] = "sk-p..."

#getpass.getpass()

llm = ChatOpenAI(model="gpt-4o-mini")

os.environ["LANGCHAIN_TRACING_V2"] = "true"

os.environ["LANGCHAIN_API_KEY"] = "lsv..."
#os.environ["OPENAI_API_KEY"] = "sk-p..."
#os.environ["LANGCHAIN_API_KEY"] = "lsv..."
#getpass.getpass()

# Load, chunk and index the contents of the blog.
Expand All @@ -36,7 +36,6 @@
print(f"docs content size: %s" % (len(docs[0].page_content)))
print(docs[0].page_content[:500])


text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200) # add_start_index=True
splits = text_splitter.split_documents(docs)
Expand All @@ -46,23 +45,37 @@

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
#retriever = vectorstore.as_retriever() # default simularity, 4
retriever = vectorstore.as_retriever(search_type="similarity",
search_kwargs={"k": 6})

print (f"retriever: %s" % (retriever))
retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")
print(f"vectorstore retrieved: %s" % (len(retrieved_docs)))
print(f"vectorstore retrieved content: %s" % (retrieved_docs[0].page_content))

def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever() # search_type="similarity", search_kwargs={"k": 6}
prompt = hub.pull("rlm/rag-prompt")

print ("test")
#def skip():
example_messages = prompt.invoke(
{"context": "filler context", "question": "filler question"}
).to_messages()
#example_messages
print(f"example_messages: %s" % (example_messages[0].content))

prompt = hub.pull("rlm/rag-prompt")
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

#for chunk in rag_chain.stream("What is Task Decomposition?"):
# print(chunk, end="", flush=True)

retrieved = rag_chain.invoke("What is Task Decomposition?")
print(f"retrieved: %s" % (len(retrieved)))
print(f"retrieved content: %s" % (retrieved))#[0].page_content))
print(f"rag_chain retrieved: %s" % (len(retrieved)))
print(f"rag_chain retrieved content: %s" % (retrieved))#[0].page_content))

0 comments on commit 5970f38

Please sign in to comment.