Skip to content

Commit

Permalink
update tiktoken version, embeddings model is now text-embedding-3-large
Browse files Browse the repository at this point in the history
  • Loading branch information
shlomsh committed Mar 31, 2024
1 parent df31274 commit a3ad618
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 41 deletions.
77 changes: 39 additions & 38 deletions vibraniumdome-shields/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vibraniumdome-shields/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ torch = "2.0.1"
datasets = "^2.15.0"
langchain = "^0.0.339"
faiss-cpu = "^1.7.4"
tiktoken = "^0.5.1"
load-dotenv = "^0.1.0"
termcolor = "^2.3.0"
presidio-analyzer = "^2.2.351"
Expand All @@ -37,6 +36,7 @@ httpx = "^0.25.2"
prometheus-client = "^0.20.0"
vibraniumdome-sdk = "^0.4.0"
openai = "^1.14.3"
tiktoken = "^0.6.0"


[tool.poetry.group.dev.dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ default_logging_level="INFO"
[vector_db]
collection_name="data"
#vector_db_dir=tempfile.gettempdir()
embedding_model_name="text-embedding-ada-002"
embedding_model_name="text-embedding-3-large"

[embeddings]
cache_embeddings=false
Expand All @@ -29,5 +29,5 @@ semantic_similarity.default_threshold=0.34
# execution_mode_async=true
transformer_model_name="deepset/deberta-v3-base-injection"
refusal_model_name="MoritzLaurer/deberta-v3-large-zeroshot-v1"
#sensetive_info_disc_model_name="en_core_web_lg"
# sensetive_info_disc_model_name="en_core_web_lg"
sensetive_info_disc_model_name="en_core_web_trf"
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, vector_db_dir, index_name, embedding_model_name):
self._vector_store = FAISS
self._embeddings = OpenAIEmbeddings(
chunk_size=16 if os.getenv("OPENAI_API_TYPE") == "azure" else 1000,
model=embedding_model_name
) # 1000 is the default also in OpenAIEmbeddings, and 16 in Azure limit

if os.path.exists(self.vector_store_file_path):
Expand Down

0 comments on commit a3ad618

Please sign in to comment.