stanford-crfm · yifanmai · Jan 20, 2025 · Dec 5, 2024
diff --git a/src/helm/clients/vertexai_client.py b/src/helm/clients/vertexai_client.py
@@ -56,17 +56,16 @@ def _get_safety_settings_for_preset(
         raise ValueError(f"Unknown safety_settings_preset: {safety_settings_preset}")
 
 
-def _get_model_name_for_request(request: Request) -> str:
-    # We have to strip "-safety-" suffixes from model names because they are not part of the Vertex AI model name
-    # TODO: Clean up this hack
-    return request.model_engine.split("-safety-")[0]
-
-
 class VertexAIClient(CachingClient, ABC):
     """Client for Vertex AI models"""
 
     def __init__(
-        self, cache_config: CacheConfig, project_id: str, location: str, safety_settings_preset: Optional[str] = None
+        self,
+        cache_config: CacheConfig,
+        project_id: str,
+        location: str,
+        safety_settings_preset: Optional[str] = None,
+        vertexai_model: Optional[str] = None,
     ) -> None:
         super().__init__(cache_config=cache_config)
         self.project_id = project_id
@@ -75,8 +74,15 @@ def __init__(
         self.safety_settings_preset = safety_settings_preset
         self.safety_settings = _get_safety_settings_for_preset(safety_settings_preset)
 
+        self.vertexai_model = vertexai_model
+
         vertexai.init(project=self.project_id, location=self.location)
 
+    def _get_model_name_for_request(self, request: Request) -> str:
+        if self.vertexai_model is not None:
+            return self.vertexai_model
+        return request.model_engine
+
     def make_cache_key_with_safety_settings_preset(self, raw_request: Mapping, request: Request) -> Mapping:
         """Construct the key for the cache using the raw request.
 
@@ -119,7 +125,7 @@ def make_request(self, request: Request) -> RequestResult:
         }
 
         completions: List[GeneratedOutput] = []
-        model_name: str = _get_model_name_for_request(request)
+        model_name: str = self._get_model_name_for_request(request)
 
         try:
 
@@ -233,7 +239,7 @@ def make_request(self, request: Request) -> RequestResult:
         }
 
         completions: List[GeneratedOutput] = []
-        model_name: str = _get_model_name_for_request(request)
+        model_name: str = self._get_model_name_for_request(request)
         model = self.get_model(model_name)
 
         try:
@@ -375,7 +381,7 @@ def _make_multimodal_request(self, request: Request) -> RequestResult:
         }
 
         completions: List[GeneratedOutput] = []
-        model_name: str = _get_model_name_for_request(request)
+        model_name: str = self._get_model_name_for_request(request)
         model = self.get_model(model_name)
 
         request_time = 0

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
@@ -552,6 +552,7 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vertexai_client.VertexAIChatClient"
       args:
+        vertexai_model: gemini-1.5-pro-001
         safety_settings_preset: default
 
   - name: google/gemini-1.5-pro-001-safety-block-none
@@ -562,6 +563,7 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vertexai_client.VertexAIChatClient"
       args:
+        vertexai_model: gemini-1.5-pro-001
         safety_settings_preset: block_none
 
   - name: google/gemini-1.5-flash-001-safety-default
@@ -572,6 +574,7 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vertexai_client.VertexAIChatClient"
       args:
+        vertexai_model: gemini-1.5-flash-001
         safety_settings_preset: default
 
   - name: google/gemini-1.5-flash-001-safety-block-none
@@ -582,6 +585,7 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vertexai_client.VertexAIChatClient"
       args:
+        vertexai_model: gemini-1.5-flash-001
         safety_settings_preset: block_none
 
   - name: google/gemini-1.5-pro-002
@@ -608,6 +612,33 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vertexai_client.VertexAIChatClient"
 
+  - name: google/llama-3.1-8b-instruct
+    model_name: meta/llama-3.1-8b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-8b-instruct-maas
+
+  - name: google/llama-3.1-70b-instruct
+    model_name: meta/llama-3.1-70b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-70b-instruct-maas
+
+  - name: google/llama-3.1-405b-instruct
+    model_name: meta/llama-3.1-405b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-405b-instruct-maas
+
   ## Gemma
   - name: together/gemma-2b
     model_name: google/gemma-2b

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
@@ -1645,6 +1645,33 @@ models:
     release_date: 2024-07-18
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
+  - name: meta/llama-3.1-8b-instruct
+    display_name: Llama 3.1 Instruct (8B)
+    description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-70b-instruct
+    display_name: Llama 3.1 Instruct (70B)
+    description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-405b-instruct
+    display_name: Llama 3.1 Instruct (405B)
+    description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 405000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
   - name: meta/llama-3.1-8b-instruct-turbo
     display_name: Llama 3.1 Instruct Turbo (8B)
     description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
@@ -1701,7 +1728,7 @@ models:
 
   - name: meta/llama-3.3-70b-instruct-turbo
     display_name: Llama 3.3 Instruct Turbo (70B)
-    description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
     creator_organization_name: Meta
     access: open
     num_parameters: 70000000000
@@ -1710,7 +1737,7 @@ models:
 
   - name: meta/llama-3.3-70b-instruct
     display_name: Llama 3.3 Instruct (70B)
-    description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
     creator_organization_name: Meta
     access: open
     num_parameters: 70000000000

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
@@ -339,6 +339,14 @@ tokenizer_configs:
     prefix_token: "<|begin_of_text|>"
     end_of_text_token: "<|end_of_text|>"
 
+  - name: meta/llama-3.1-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
   - name: meta/llama-3.2-3b-instruct
     tokenizer_spec:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"