Merge pull request #57 from oramasearch/feat/rework-field-and-interface

Rework field and interface
oramasearch · Jan 22, 2025 · 93750b2 · 93750b2
2 parents 98ca212 + 9b4bde3
commit 93750b2
Show file tree

Hide file tree

Showing 34 changed files with 1,018 additions and 1,916 deletions.
diff --git a/config.jsonc b/config.jsonc
@@ -27,86 +27,53 @@
         }
     },
 
-    "embeddings": {
-        "preload": [],
+    "ai_server": {
+        "scheme": "http",
+        "host": "127.0.0.1",
+        "port": 50051,
+        "api_key": "",
+        "max_connections": 15,
 
-        // Providers
-        "grpc": {
-            "host": "127.0.0.1",
-            "port": 50051,
+        "embeddings": {
+            "default_model_group": "en",
+            "dynamically_load_models": false,
+            "execution_providers": [
+                "CPUExecutionProvider"
+            ],
+            "total_threads": 8
         },
-        "hugging_face": {
-            // The base url for the hugging face model hub
-            "base_url": "https://huggingface.co",
 
-            "user_agent": "Mozilla/5.0 (compatible; RustBot/1.0)",
-
-            "connect_timeout": "10s",
-            "timeout": "3600s",
-
-            // where to place the custom models
-            "cache_path": ".custom_models",
-        },
-        "fastembed": {
-            "cache_dir": ".fastembed_cache",
-        },
-
-        "models": {
-            // Hugging Face models
-            "small-model": {
-                "type": "hugging_face",
-                "max_input_tokens": 512,
-                "dimensions": 384,
-                "real_model_name": "Xenova/gte-small",
-
-                "files": {
-                    "onnx_model": "onnx/model_quantized.onnx",
-                    "special_tokens_map": "special_tokens_map.json",
-                    "tokenizer": "tokenizer.json",
-                    "tokenizer_config": "tokenizer_config.json",
-                    "config": "config.json",
-                },
-            },
-
-            // grpc models
-            "BGESmall": {
-                "type": "grpc",
-                "real_model_name": "BGESmall",
-                "dimensions": 384,
+        "LLMs": {
+            "content_expansion": {
+                "id": "Qwen/Qwen2.5-3B-Instruct",
+                "tensor_parallel_size": 1,
+                "use_cpu": true,
+                "sampling_params": {
+                    "temperature": 0.2,
+                    "top_p": 0.95,
+                    "max_tokens": 256
+                }
             },
-
-            // FastEmbed models
-            "gte-small": {
-                "type": "fastembed",
-                "real_model_name": "Xenova/bge-small-en-v1.5",
-                "dimensions": 384,
+            "google_query_translator": {
+                "id": "Qwen/Qwen2.5-3B-Instruct",
+                "tensor_parallel_size": 1,
+                "use_cpu": true,
+                "sampling_params": {
+                    "temperature": 0.2,
+                    "top_p": 0.95,
+                    "max_tokens": 20
+                }
             },
-            "gte-base": {
-                "type": "fastembed",
-                "real_model_name": "Xenova/bge-base-en-v1.5",
-                "dimensions": 384,
+            "answer": {
+                "id": "Qwen/Qwen2.5-3B-Instruct",
+                "tensor_parallel_size": 1,
+                "use_cpu": true,
+                "sampling_params": {
+                    "temperature": 0,
+                    "top_p": 0.95,
+                    "max_tokens": 2048
+                }
             },
-            "gte-large": {
-                "type": "fastembed",
-                "real_model_name": "Xenova/bge-large-en-v1.5",
-                "dimensions": 384,
-            },
-            "multilingual-e5-small": {
-                "type": "fastembed",
-                "real_model_name": "intfloat/multilingual-e5-small",
-                "dimensions": 384,
-            },
-            "multilingual-e5-base": {
-                "type": "fastembed",
-                "real_model_name": "intfloat/multilingual-e5-base",
-                "dimensions": 384,
-            },
-            "multilingual-e5-large": {
-                "type": "fastembed",
-                "real_model_name": "Qdrant/multilingual-e5-large-onnx",
-                "dimensions": 384,
-            },
-
-        }
-    }
+        },
+    },
 }
diff --git a/config.yaml b/config.yaml
@@ -0,0 +1,59 @@
+http:
+    host: 0.0.0.0
+    port: 8080
+    allow_cors: true
+    with_prometheus: true
+
+writer_side:
+    output: in-memory
+    config:
+        data_dir: ./.data/writer
+        # The maximum number of embeddings that can be stored in the queue
+        # before the writer starts to be blocked
+        # NB: the elements are in memory, so be careful with this value
+        embedding_queue_limit: 50
+
+reader_side:
+    input: in-memory
+    config:
+        data_dir: ./.data/reader
+
+ai_server:
+    scheme: http
+    host: 127.0.0.1
+    port: 50051
+    api_key: ""
+    max_connections: 15
+    total_threads: 12
+
+    embeddings:
+        default_model_group: en
+        dynamically_load_models: false
+        execution_providers:
+            - CPUExecutionProvider
+        total_threads: 8
+    LLMs:
+        content_expansion:
+            id: "Qwen/Qwen2.5-3B-Instruct"
+            tensor_parallel_size: 1
+            use_cpu: true
+            sampling_params:
+                temperature: 0.2
+                top_p: 0.95
+                max_tokens: 256
+        google_query_translator:
+            id: "Qwen/Qwen2.5-3B-Instruct"
+            tensor_parallel_size: 1
+            use_cpu: true
+            sampling_params:
+                temperature: 0.2
+                top_p: 0.95
+                max_tokens: 20
+        answer:
+            id: "Qwen/Qwen2.5-3B-Instruct"
+            tensor_parallel_size: 1
+            use_cpu: true
+            sampling_params:
+                temperature: 0
+                top_p: 0.95
+                max_tokens: 2048
diff --git a/embedding-api-test.hurl b/embedding-api-test.hurl
@@ -12,11 +12,9 @@ collection_id: jsonpath "$[0]"
 POST {{base_url}}/v0/collections
 {
     "id": "{{collection_id}}",
-    "typed_fields": {
-        "my_embedding": {
-            "model_name": "gte-small",
-            "document_fields": ["content"]
-        }
+    "embeddings": {
+        "model_name": "BGESmall",
+        "document_fields": ["content"]
     }
 }
 HTTP 201