Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding functionality to run benchmarks for Amazon nova models #3251

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ allenai =
ai2-olmo~=0.2

amazon =
boto3~=1.28.57
awscli~=1.29.57
botocore~=1.31.57
boto3~=1.34.131
awscli~=1.32.1
botocore~=1.34.1

anthropic =
anthropic~=0.17,<0.39 # TODO(#3212): Limit anthropic to >=0.39 after resolving #3212.
Expand Down
3 changes: 3 additions & 0 deletions src/helm/benchmark/model_metadata_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
# OpenAI Chat format
OPENAI_CHATGPT_MODEL_TAG: str = "OPENAI_CHATGPT_MODEL_TAG"

# For NOVA models
NOVA_MODEL_TAG: str = "NOVA_MODEL_TAG"

# For Anthropic models
ANTHROPIC_CLAUDE_1_MODEL_TAG: str = "ANTHROPIC_CLAUDE_1_MODEL_TAG"
ANTHROPIC_CLAUDE_2_MODEL_TAG: str = "ANTHROPIC_CLAUDE_2_MODEL_TAG"
Expand Down
27 changes: 27 additions & 0 deletions src/helm/benchmark/run_expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,33 @@ def expand(self, run_spec: RunSpec) -> List[RunSpec]:
return [run_spec]


class NovaRunExpander(RunExpander):
"""
Custom prompt for Amazon Nova models.
These models need more explicit instructions about following the format.
"""

name = "amazon-nova"

PROMPT = "Do not provide any additional explanation. Follow the format shown in the provided examples strictly."

def __init__(self):
pass

def expand(self, run_spec: RunSpec) -> List[RunSpec]:
return [
replace(
run_spec,
name=run_spec.name,
adapter_spec=replace(
run_spec.adapter_spec,
global_prefix=NovaRunExpander.PROMPT
+ "\n\n"
),
),
]


class FollowFormatInstructionsRunExpander(RunExpander):
"""Adds more explicit instructions about following the format to prompts.

Expand Down
5 changes: 5 additions & 0 deletions src/helm/benchmark/run_spec_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
ANTHROPIC_CLAUDE_1_MODEL_TAG,
ANTHROPIC_CLAUDE_2_MODEL_TAG,
ANTHROPIC_CLAUDE_3_MODEL_TAG,
NOVA_MODEL_TAG,
BUGGY_TEMP_0_TAG,
CHATML_MODEL_TAG,
GOOGLE_GEMINI_PRO_VISION_V1_TAG,
Expand All @@ -31,6 +32,7 @@
RUN_EXPANDERS,
AnthropicClaude2RunExpander,
AnthropicClaude3RunExpander,
NovaRunExpander,
ChatMLRunExpander,
GlobalPrefixRunExpander,
IDEFICSInstructRunExpander,
Expand Down Expand Up @@ -122,6 +124,9 @@ def alter_run_spec(run_spec: RunSpec) -> RunSpec:
chatml_expander = ChatMLRunExpander()
run_spec = singleton(chatml_expander.expand(run_spec))

if NOVA_MODEL_TAG in model.tags:
run_spec = singleton(NovaRunExpander().expand(run_spec))

# Anthropic Claude 1 and 2 prompts
if ANTHROPIC_CLAUDE_1_MODEL_TAG in model.tags or ANTHROPIC_CLAUDE_2_MODEL_TAG in model.tags:
run_spec = singleton(AnthropicClaude2RunExpander().expand(run_spec))
Expand Down
79 changes: 78 additions & 1 deletion src/helm/clients/bedrock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import json
import os
from typing import Any, Dict, List, Mapping, Optional
from datetime import datetime

from helm.common.cache import CacheConfig
from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
from helm.clients.bedrock_utils import get_bedrock_client
from helm.common.hierarchical_logger import htrack_block
from helm.clients.bedrock_utils import get_bedrock_client, get_bedrock_client_v1
from helm.tokenizers.tokenizer import Tokenizer


Expand Down Expand Up @@ -96,6 +98,81 @@ def do_it() -> Dict[Any, Any]:
)


class BedrockNovaClient(CachingClient):

"""
Amazon Bedrock is a fully managed service that provides s selection of leading foundation models (FMs) from Amazon
and other partner model providers.
"""

def __init__(
self,
cache_config: CacheConfig,
tokenizer: Tokenizer,
tokenizer_name: str,
bedrock_model_id: Optional[str] = None,
assumed_role: Optional[str] = None,
region: Optional[str] = None,
):
super().__init__(cache_config=cache_config)
self.tokenizer = tokenizer
self.tokenizer_name = tokenizer_name
self.bedrock_model_id = bedrock_model_id
self.bedrock_client = get_bedrock_client_v1(
assumed_role=assumed_role or os.environ.get("BEDROCK_ASSUME_ROLE", None),
region=region or os.environ.get("AWS_DEFAULT_REGION", None),
)

def convert_request_to_raw_request(self, request: Request) -> Dict:
model_id = request.model.replace("/", ".")
messages = [
{
"role": "user",
"content": [
{
"text": request.prompt
}
]
}
]

return {
"modelId": model_id,
"inferenceConfig": {
"temperature": request.temperature,
"maxTokens": request.max_tokens,
"topP": request.top_p
},
"messages": messages,
}

def make_request(self, request: Request) -> RequestResult:
raw_request = self.convert_request_to_raw_request(request)
response = self.bedrock_client.converse(**raw_request)
completions = self.convert_raw_response_to_completions(response, request)
dt = datetime.strptime(response["ResponseMetadata"]["HTTPHeaders"]["date"], "%a, %d %b %Y %H:%M:%S GMT")

return RequestResult(
success=True,
cached=False,
request_time=response["metrics"]["latencyMs"],
request_datetime=int(dt.timestamp()),
completions=completions,
embedding=[],
)

def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[GeneratedOutput]:
completions: List[GeneratedOutput] = []
raw_completion = response["output"]
output_text = raw_completion["message"]["content"][0]["text"]
finish_reason = response["stopReason"]
completion = truncate_and_tokenize_response_text(
output_text.lstrip(), request, self.tokenizer, self.tokenizer_name, finish_reason
)
completions.append(completion)
return completions


# Amazon Bedrock Client for Titan Models
class BedrockTitanClient(BedrockClient):
_COMPLETION_REASON_TO_FINISH_REASON = {
Expand Down
48 changes: 47 additions & 1 deletion src/helm/clients/bedrock_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Helper utilities for working with Amazon Bedrock."""

import os
from typing import Optional
from typing import Optional, Dict

from helm.common.hierarchical_logger import hlog
from helm.common.optional_dependencies import handle_module_not_found_error

try:
import boto3
from boto3 import Session
from botocore.config import Config
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["aws"])
Expand Down Expand Up @@ -70,3 +71,48 @@ def get_bedrock_client(

hlog(f"Amazon Bedrock client successfully created with endpoint {bedrock_client._endpoint}")
return bedrock_client


def get_bedrock_client_v1(
assumed_role: Optional[str] = None,
service_name: str = "bedrock-runtime",
region: str = "us-east-1",
read_timeout: int = 5000,
connect_timeout: int = 5000,
retries: Dict = {"max_attempts": 10},
):
if region is None:
target_region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))
else:
target_region = region

boto_config = Config(
read_timeout=read_timeout, connect_timeout=connect_timeout, retries=retries
)

if target_region is None:
raise ValueError(
"region environment variable is not set."
)

if assumed_role:
session = boto3.Session(region_name=target_region)
# Assume role and get credentials
sts = session.client("sts")
creds = sts.assume_role(RoleArn=str(assumed_role),RoleSessionName="crfm-helm")["Credentials"]
session = Session(
aws_access_key_id=creds["AccessKeyId"],
aws_secret_access_key=creds["SecretAccessKey"],
)
return session.client(
service_name=service_name,
region_name=target_region,
config=boto_config,
)

# default to instance role to get the aws credentials or aws configured credentials
return boto3.client(
service_name=service_name,
region_name=target_region,
config=boto_config
)
1 change: 0 additions & 1 deletion src/helm/clients/huggingface_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def serve_request(self, raw_request: HuggingFaceRequest) -> Dict:
encoded_input = tokenizer(raw_request["prompt"], return_tensors="pt", return_token_type_ids=False).to(
0 if self.device is None else self.device
)

stopping_criteria: Optional[StoppingCriteriaList] = None
optional_args = {}
if len(raw_request["stop_sequences"]) > 0:
Expand Down
24 changes: 23 additions & 1 deletion src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,29 @@ model_deployments:
class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"


# Amazon
# Amazon nova models

- name: amazon/nova-pro-v1:0
model_name: amazon/nova-pro-v1:0
tokenizer_name: huggingface/gpt2
max_sequence_length: 300000
client_spec:
class_name: "helm.clients.bedrock_client.BedrockNovaClient"

- name: amazon/nova-lite-v1:0
model_name: amazon/nova-lite-v1:0
tokenizer_name: huggingface/gpt2
max_sequence_length: 300000
client_spec:
class_name: "helm.clients.bedrock_client.BedrockNovaClient"

- name: amazon/nova-micro-v1:0
model_name: amazon/nova-micro-v1:0
tokenizer_name: huggingface/gpt2
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.bedrock_client.BedrockNovaClient"

# Titan on Amazon Bedrock

- name: amazon/titan-text-lite-v1
Expand Down
28 changes: 27 additions & 1 deletion src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,33 @@ models:
tags: [TEXT_TO_IMAGE_MODEL_TAG]


# Amazon
# Amazon Nova models
# References for Amazon Nova models:
# https://aws.amazon.com/ai/generative-ai/nova/
- name: amazon/nova-pro-v1:0
display_name: Amazon Nova Pro
description: Amazon Nova Pro Model
creator_organization_name: Amazon
access: limited
release_date: 2024-12-03
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: amazon/nova-lite-v1:0
display_name: Amazon Nova Lite
description: Amazon Nova Lite Model
creator_organization_name: Amazon
access: limited
release_date: 2024-12-03
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: amazon/nova-micro-v1:0
display_name: Amazon Nova Micro
description: Amazon Nova Micro Model
creator_organization_name: Amazon
access: limited
release_date: 2024-12-03
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

# Titan Models
# References for Amazon Titan models:
# - https://aws.amazon.com/bedrock/titan/
Expand Down
Loading