Skip to content

Commit

Permalink
Merge pull request #57 from oramasearch/feat/rework-field-and-interface
Browse files Browse the repository at this point in the history
Rework field and interface
  • Loading branch information
allevo authored Jan 22, 2025
2 parents 98ca212 + 9b4bde3 commit 93750b2
Show file tree
Hide file tree
Showing 34 changed files with 1,018 additions and 1,916 deletions.
119 changes: 43 additions & 76 deletions config.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -27,86 +27,53 @@
}
},

"embeddings": {
"preload": [],
"ai_server": {
"scheme": "http",
"host": "127.0.0.1",
"port": 50051,
"api_key": "",
"max_connections": 15,

// Providers
"grpc": {
"host": "127.0.0.1",
"port": 50051,
"embeddings": {
"default_model_group": "en",
"dynamically_load_models": false,
"execution_providers": [
"CPUExecutionProvider"
],
"total_threads": 8
},
"hugging_face": {
// The base url for the hugging face model hub
"base_url": "https://huggingface.co",

"user_agent": "Mozilla/5.0 (compatible; RustBot/1.0)",

"connect_timeout": "10s",
"timeout": "3600s",

// where to place the custom models
"cache_path": ".custom_models",
},
"fastembed": {
"cache_dir": ".fastembed_cache",
},

"models": {
// Hugging Face models
"small-model": {
"type": "hugging_face",
"max_input_tokens": 512,
"dimensions": 384,
"real_model_name": "Xenova/gte-small",

"files": {
"onnx_model": "onnx/model_quantized.onnx",
"special_tokens_map": "special_tokens_map.json",
"tokenizer": "tokenizer.json",
"tokenizer_config": "tokenizer_config.json",
"config": "config.json",
},
},

// grpc models
"BGESmall": {
"type": "grpc",
"real_model_name": "BGESmall",
"dimensions": 384,
"LLMs": {
"content_expansion": {
"id": "Qwen/Qwen2.5-3B-Instruct",
"tensor_parallel_size": 1,
"use_cpu": true,
"sampling_params": {
"temperature": 0.2,
"top_p": 0.95,
"max_tokens": 256
}
},

// FastEmbed models
"gte-small": {
"type": "fastembed",
"real_model_name": "Xenova/bge-small-en-v1.5",
"dimensions": 384,
"google_query_translator": {
"id": "Qwen/Qwen2.5-3B-Instruct",
"tensor_parallel_size": 1,
"use_cpu": true,
"sampling_params": {
"temperature": 0.2,
"top_p": 0.95,
"max_tokens": 20
}
},
"gte-base": {
"type": "fastembed",
"real_model_name": "Xenova/bge-base-en-v1.5",
"dimensions": 384,
"answer": {
"id": "Qwen/Qwen2.5-3B-Instruct",
"tensor_parallel_size": 1,
"use_cpu": true,
"sampling_params": {
"temperature": 0,
"top_p": 0.95,
"max_tokens": 2048
}
},
"gte-large": {
"type": "fastembed",
"real_model_name": "Xenova/bge-large-en-v1.5",
"dimensions": 384,
},
"multilingual-e5-small": {
"type": "fastembed",
"real_model_name": "intfloat/multilingual-e5-small",
"dimensions": 384,
},
"multilingual-e5-base": {
"type": "fastembed",
"real_model_name": "intfloat/multilingual-e5-base",
"dimensions": 384,
},
"multilingual-e5-large": {
"type": "fastembed",
"real_model_name": "Qdrant/multilingual-e5-large-onnx",
"dimensions": 384,
},

}
}
},
},
}
59 changes: 59 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
http:
host: 0.0.0.0
port: 8080
allow_cors: true
with_prometheus: true

writer_side:
output: in-memory
config:
data_dir: ./.data/writer
# The maximum number of embeddings that can be stored in the queue
# before the writer starts to be blocked
# NB: the elements are in memory, so be careful with this value
embedding_queue_limit: 50

reader_side:
input: in-memory
config:
data_dir: ./.data/reader

ai_server:
scheme: http
host: 127.0.0.1
port: 50051
api_key: ""
max_connections: 15
total_threads: 12

embeddings:
default_model_group: en
dynamically_load_models: false
execution_providers:
- CPUExecutionProvider
total_threads: 8
LLMs:
content_expansion:
id: "Qwen/Qwen2.5-3B-Instruct"
tensor_parallel_size: 1
use_cpu: true
sampling_params:
temperature: 0.2
top_p: 0.95
max_tokens: 256
google_query_translator:
id: "Qwen/Qwen2.5-3B-Instruct"
tensor_parallel_size: 1
use_cpu: true
sampling_params:
temperature: 0.2
top_p: 0.95
max_tokens: 20
answer:
id: "Qwen/Qwen2.5-3B-Instruct"
tensor_parallel_size: 1
use_cpu: true
sampling_params:
temperature: 0
top_p: 0.95
max_tokens: 2048
8 changes: 3 additions & 5 deletions embedding-api-test.hurl
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@ collection_id: jsonpath "$[0]"
POST {{base_url}}/v0/collections
{
"id": "{{collection_id}}",
"typed_fields": {
"my_embedding": {
"model_name": "gte-small",
"document_fields": ["content"]
}
"embeddings": {
"model_name": "BGESmall",
"document_fields": ["content"]
}
}
HTTP 201
Expand Down
Loading

0 comments on commit 93750b2

Please sign in to comment.