Add GPT-4o-mini and Llama 3.3 70B on Stanford Health Care API (#3277)

stanford-crfm · Jan 17, 2025 · 966b50b · 966b50b
1 parent 1b0202b
commit 966b50b
Show file tree

Hide file tree

Showing 6 changed files with 163 additions and 1 deletion.
diff --git a/src/helm/clients/auto_client.py b/src/helm/clients/auto_client.py
@@ -71,6 +71,7 @@ def _get_client(self, model_deployment_name: str) -> Client:
                 model_deployment.client_spec,
                 constant_bindings={
                     "cache_config": cache_config,
+                    "model_name": model_deployment.model_name,
                     "tokenizer_name": model_deployment.tokenizer_name,
                 },
                 provider_bindings={

diff --git a/src/helm/clients/azure_openai_client.py b/src/helm/clients/azure_openai_client.py
@@ -0,0 +1,40 @@
+import os
+from typing import Dict, Optional
+
+from helm.clients.openai_client import OpenAIClient
+from helm.common.cache import CacheConfig
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.proxy.retry import NonRetriableException
+from helm.tokenizers.tokenizer import Tokenizer
+
+try:
+    from openai import AzureOpenAI
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["openai"])
+
+
+class AzureOpenAIClient(OpenAIClient):
+    API_VERSION = "2024-07-01-preview"
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        api_version: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(
+            tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused"
+        )
+        azure_endpoint = endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
+        if not azure_endpoint:
+            raise NonRetriableException("Must provide Azure endpoint through credentials.conf or AZURE_OPENAI_ENDPOINT")
+        self.client = AzureOpenAI(
+            api_key=api_key,
+            api_version=api_version or AzureOpenAIClient.API_VERSION,
+            azure_endpoint=azure_endpoint,
+            default_headers=default_headers,
+        )
diff --git a/src/helm/clients/stanfordhealthcare_llama_client.py b/src/helm/clients/stanfordhealthcare_llama_client.py
@@ -0,0 +1,54 @@
+from typing import Optional
+
+from helm.clients.openai_client import OpenAIClient
+from helm.common.cache import CacheConfig
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.proxy.retry import NonRetriableException
+from helm.tokenizers.tokenizer import Tokenizer
+
+try:
+    from openai import OpenAI
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["openai"])
+
+
+class StanfordHealthCareLlamaClient(OpenAIClient):
+    """
+    Client for accessing Llama models hosted on Stanford Health Care's model API.
+
+    Configure by setting the following in prod_env/credentials.conf:
+
+    ```
+    stanfordhealthcareEndpoint: https://your-domain-name/
+    stanfordhealthcareApiKey: your-private-key
+    ```
+    """
+
+    CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        model_name: str,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+    ):
+        super().__init__(
+            tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused"
+        )
+        if not endpoint:
+            raise NonRetriableException("Must provide endpoint through credentials.conf")
+        if not api_key:
+            raise NonRetriableException("Must provide API key through credentials.conf")
+        # Guess the base URL part based on the model name
+        # Maybe make this configurable instead?
+        base_url_part = model_name.split("/")[1].lower().removesuffix("-instruct").replace("-", "").replace(".", "")
+
+        base_url = f"{endpoint.strip('/')}/{base_url_part}/v1"
+        self.client = OpenAI(
+            api_key="dummy",
+            base_url=base_url,
+            default_headers={StanfordHealthCareLlamaClient.CREDENTIAL_HEADER_NAME: api_key},
+        )
diff --git a/src/helm/clients/stanfordhealthcare_openai_client.py b/src/helm/clients/stanfordhealthcare_openai_client.py
@@ -0,0 +1,42 @@
+from typing import Optional
+
+from helm.clients.azure_openai_client import AzureOpenAIClient
+from helm.common.cache import CacheConfig
+from helm.proxy.retry import NonRetriableException
+from helm.tokenizers.tokenizer import Tokenizer
+
+
+class StanfordHealthCareOpenAIClient(AzureOpenAIClient):
+    """
+    Client for accessing OpenAI models hosted on Stanford Health Care's model API.
+
+    Configure by setting the following in prod_env/credentials.conf:
+
+    ```
+    stanfordhealthcareEndpoint: https://your-domain-name/
+    stanfordhealthcareApiKey: your-private-key
+    ```
+    """
+
+    API_VERSION = "2023-05-15"
+    CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+    ):
+        if not api_key:
+            raise NonRetriableException("Must provide API key through credentials.conf")
+        super().__init__(
+            tokenizer=tokenizer,
+            tokenizer_name=tokenizer_name,
+            cache_config=cache_config,
+            api_key="unused",
+            endpoint=endpoint,
+            api_version=StanfordHealthCareOpenAIClient.API_VERSION,
+            default_headers={StanfordHealthCareOpenAIClient.CREDENTIAL_HEADER_NAME: api_key},
+        )
diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
@@ -16,6 +16,22 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.simple_client.SimpleClient"
 
+  # Stanford Health Care
+  # Placed earlier in the file to make them non-default
+  - name: stanfordhealthcare/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+
+  - name: stanfordhealthcare/llama-3.3-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
+    tokenizer_name: meta/llama-3.3-70b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_llama_client.StanfordHealthCareLlamaClient"
+
   # Adobe
   - name: adobe/giga-gan
     model_name: adobe/giga-gan

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
@@ -1701,7 +1701,16 @@ models:
 
   - name: meta/llama-3.3-70b-instruct-turbo
     display_name: Llama 3.3 Instruct Turbo (70B)
-    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-12-06
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.3-70b-instruct
+    display_name: Llama 3.3 Instruct (70B)
+    description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
     creator_organization_name: Meta
     access: open
     num_parameters: 70000000000