From 3a224f5b666da839b7fb0f3939b99121652f4f76 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 4 Feb 2025 16:59:56 -0500 Subject: [PATCH] fix after rebasing, now test isn't working --- llama_stack/providers/registry/inference.py | 18 +++++++++++++----- .../remote/inference/centml/centml.py | 4 ++++ llama_stack/templates/centml/build.yaml | 5 +++-- llama_stack/templates/centml/run.yaml | 17 ++++++++++------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 9cb3b11b8e..0587bfbbed 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -216,15 +216,23 @@ def available_providers() -> List[ProviderSpec]: remote_provider_spec( api=Api.inference, adapter=AdapterSpec( - adapter_type="centml", + adapter_type="runpod", + pip_packages=["openai"], + module="llama_stack.providers.remote.inference.runpod", + config_class= + "llama_stack.providers.remote.inference.runpod.RunpodImplConfig", + ), + ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="sambanova", pip_packages=[ "openai", ], - module="llama_stack.providers.remote.inference.centml", + module="llama_stack.providers.remote.inference.sambanova", config_class= - "llama_stack.providers.remote.inference.centml.CentMLImplConfig", - provider_data_validator= - "llama_stack.providers.remote.inference.centml.CentMLProviderDataValidator", + "llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", ), ), remote_provider_spec( diff --git a/llama_stack/providers/remote/inference/centml/centml.py b/llama_stack/providers/remote/inference/centml/centml.py index c3798837b5..0ed31aacd6 100644 --- a/llama_stack/providers/remote/inference/centml/centml.py +++ b/llama_stack/providers/remote/inference/centml/centml.py @@ -17,6 +17,7 @@ ChatCompletionRequest, ChatCompletionResponse, CompletionRequest, + CompletionResponse, EmbeddingsResponse, Inference, LogProbConfig, @@ -25,6 +26,7 @@ ResponseFormatType, SamplingParams, ToolChoice, + ToolConfig, ToolDefinition, ToolPromptFormat, ) @@ -42,6 +44,7 @@ process_completion_stream_response, ) from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, completion_request_to_prompt, content_has_media, interleaved_content_as_str, @@ -176,6 +179,7 @@ async def chat_completion( response_format: Optional[ResponseFormat] = None, stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, + tool_config: Optional[ToolConfig] = None, ) -> AsyncGenerator: """ For "chat completion" style requests. diff --git a/llama_stack/templates/centml/build.yaml b/llama_stack/templates/centml/build.yaml index 489b9f8fdf..aa8abd202b 100644 --- a/llama_stack/templates/centml/build.yaml +++ b/llama_stack/templates/centml/build.yaml @@ -5,7 +5,7 @@ distribution_spec: providers: inference: - remote::centml - memory: + vector_io: - inline::faiss - remote::chromadb - remote::pgvector @@ -28,5 +28,6 @@ distribution_spec: - remote::brave-search - remote::tavily-search - inline::code-interpreter - - inline::memory-runtime + - inline::rag-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/centml/run.yaml b/llama_stack/templates/centml/run.yaml index 414dd9065f..9008aa8cc2 100644 --- a/llama_stack/templates/centml/run.yaml +++ b/llama_stack/templates/centml/run.yaml @@ -6,11 +6,11 @@ apis: - datasetio - eval - inference - - memory - safety - scoring - telemetry - tool_runtime + - vector_io providers: inference: - provider_id: centml @@ -22,7 +22,7 @@ providers: provider_type: inline::sentence-transformers config: {} - memory: + vector_io: - provider_id: faiss provider_type: inline::faiss config: @@ -92,8 +92,11 @@ providers: - provider_id: code-interpreter provider_type: inline::code-interpreter config: {} - - provider_id: memory-runtime - provider_type: inline::memory-runtime + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol config: {} metadata_store: @@ -116,14 +119,14 @@ models: shields: - shield_id: meta-llama/Llama-Guard-3-8B -memory_banks: [] +vector_dbs: [] datasets: [] scoring_fns: [] eval_tasks: [] tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::memory - provider_id: memory-runtime + - toolgroup_id: builtin::rag + provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter