Skip to content

Commit

Permalink
Add GPT-4o-mini and Llama 3.3 70B on Stanford Health Care API (#3277)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Jan 17, 2025
1 parent 1b0202b commit 966b50b
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/helm/clients/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def _get_client(self, model_deployment_name: str) -> Client:
model_deployment.client_spec,
constant_bindings={
"cache_config": cache_config,
"model_name": model_deployment.model_name,
"tokenizer_name": model_deployment.tokenizer_name,
},
provider_bindings={
Expand Down
40 changes: 40 additions & 0 deletions src/helm/clients/azure_openai_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from typing import Dict, Optional

from helm.clients.openai_client import OpenAIClient
from helm.common.cache import CacheConfig
from helm.common.optional_dependencies import handle_module_not_found_error
from helm.proxy.retry import NonRetriableException
from helm.tokenizers.tokenizer import Tokenizer

try:
from openai import AzureOpenAI
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["openai"])


class AzureOpenAIClient(OpenAIClient):
API_VERSION = "2024-07-01-preview"

def __init__(
self,
tokenizer: Tokenizer,
tokenizer_name: str,
cache_config: CacheConfig,
api_key: Optional[str] = None,
endpoint: Optional[str] = None,
api_version: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
):
super().__init__(
tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused"
)
azure_endpoint = endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
if not azure_endpoint:
raise NonRetriableException("Must provide Azure endpoint through credentials.conf or AZURE_OPENAI_ENDPOINT")
self.client = AzureOpenAI(
api_key=api_key,
api_version=api_version or AzureOpenAIClient.API_VERSION,
azure_endpoint=azure_endpoint,
default_headers=default_headers,
)
54 changes: 54 additions & 0 deletions src/helm/clients/stanfordhealthcare_llama_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import Optional

from helm.clients.openai_client import OpenAIClient
from helm.common.cache import CacheConfig
from helm.common.optional_dependencies import handle_module_not_found_error
from helm.proxy.retry import NonRetriableException
from helm.tokenizers.tokenizer import Tokenizer

try:
from openai import OpenAI
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["openai"])


class StanfordHealthCareLlamaClient(OpenAIClient):
"""
Client for accessing Llama models hosted on Stanford Health Care's model API.
Configure by setting the following in prod_env/credentials.conf:
```
stanfordhealthcareEndpoint: https://your-domain-name/
stanfordhealthcareApiKey: your-private-key
```
"""

CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"

def __init__(
self,
tokenizer: Tokenizer,
tokenizer_name: str,
cache_config: CacheConfig,
model_name: str,
api_key: Optional[str] = None,
endpoint: Optional[str] = None,
):
super().__init__(
tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused"
)
if not endpoint:
raise NonRetriableException("Must provide endpoint through credentials.conf")
if not api_key:
raise NonRetriableException("Must provide API key through credentials.conf")
# Guess the base URL part based on the model name
# Maybe make this configurable instead?
base_url_part = model_name.split("/")[1].lower().removesuffix("-instruct").replace("-", "").replace(".", "")

base_url = f"{endpoint.strip('/')}/{base_url_part}/v1"
self.client = OpenAI(
api_key="dummy",
base_url=base_url,
default_headers={StanfordHealthCareLlamaClient.CREDENTIAL_HEADER_NAME: api_key},
)
42 changes: 42 additions & 0 deletions src/helm/clients/stanfordhealthcare_openai_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Optional

from helm.clients.azure_openai_client import AzureOpenAIClient
from helm.common.cache import CacheConfig
from helm.proxy.retry import NonRetriableException
from helm.tokenizers.tokenizer import Tokenizer


class StanfordHealthCareOpenAIClient(AzureOpenAIClient):
"""
Client for accessing OpenAI models hosted on Stanford Health Care's model API.
Configure by setting the following in prod_env/credentials.conf:
```
stanfordhealthcareEndpoint: https://your-domain-name/
stanfordhealthcareApiKey: your-private-key
```
"""

API_VERSION = "2023-05-15"
CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"

def __init__(
self,
tokenizer: Tokenizer,
tokenizer_name: str,
cache_config: CacheConfig,
api_key: Optional[str] = None,
endpoint: Optional[str] = None,
):
if not api_key:
raise NonRetriableException("Must provide API key through credentials.conf")
super().__init__(
tokenizer=tokenizer,
tokenizer_name=tokenizer_name,
cache_config=cache_config,
api_key="unused",
endpoint=endpoint,
api_version=StanfordHealthCareOpenAIClient.API_VERSION,
default_headers={StanfordHealthCareOpenAIClient.CREDENTIAL_HEADER_NAME: api_key},
)
16 changes: 16 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@ model_deployments:
client_spec:
class_name: "helm.clients.simple_client.SimpleClient"

# Stanford Health Care
# Placed earlier in the file to make them non-default
- name: stanfordhealthcare/gpt-4o-mini-2024-07-18
model_name: openai/gpt-4o-mini-2024-07-18
tokenizer_name: openai/o200k_base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"

- name: stanfordhealthcare/llama-3.3-70b-instruct
model_name: meta/llama-3.3-70b-instruct
tokenizer_name: meta/llama-3.3-70b-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.stanfordhealthcare_llama_client.StanfordHealthCareLlamaClient"

# Adobe
- name: adobe/giga-gan
model_name: adobe/giga-gan
Expand Down
11 changes: 10 additions & 1 deletion src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1701,7 +1701,16 @@ models:

- name: meta/llama-3.3-70b-instruct-turbo
display_name: Llama 3.3 Instruct Turbo (70B)
description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
creator_organization_name: Meta
access: open
num_parameters: 70000000000
release_date: 2024-12-06
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

- name: meta/llama-3.3-70b-instruct
display_name: Llama 3.3 Instruct (70B)
description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
creator_organization_name: Meta
access: open
num_parameters: 70000000000
Expand Down

0 comments on commit 966b50b

Please sign in to comment.