Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Granite 3.1 model family (IBM) #3261

Merged
merged 11 commits into from
Jan 13, 2025
80 changes: 80 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2948,3 +2948,83 @@ model_deployments:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: maritaca-ai/sabia-7b

# Granite-3.1-8b-base
- name: huggingface/granite-3.1-8b-base
model_name: ibm-granite/granite-3.1-8b-base
tokenizer_name: ibm-granite/granite-3.1-8b-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base

# Granite-3.1-8b-instruct
- name: huggingface/granite-3.1-8b-instruct
model_name: ibm-granite/granite-3.1-8b-instruct
tokenizer_name: ibm-granite/granite-3.1-8b-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct

# Granite-3.1-2b-instruct
- name: huggingface/granite-3.1-2b-instruct
model_name: ibm-granite/granite-3.1-2b-instruct
tokenizer_name: ibm-granite/granite-3.1-2b-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct

# Granite-3.1-2b-base
- name: huggingface/granite-3.1-2b-base
model_name: ibm-granite/granite-3.1-2b-base
tokenizer_name: ibm-granite/granite-3.1-2b-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base

# Granite-3.1-3b-a800m-instruct
- name: huggingface/granite-3.1-3b-a800m-instruct
model_name: ibm-granite/granite-3.1-3b-a800m-instruct
tokenizer_name: ibm-granite/granite-3.1-3b-a800m-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct

# Granite-3.1-3b-a800m-base
- name: huggingface/granite-3.1-3b-a800m-base
model_name: ibm-granite/granite-3.1-3b-a800m-base
tokenizer_name: ibm-granite/granite-3.1-3b-a800m-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base

# Granite-3.1-1b-a400m-instruct
- name: huggingface/granite-3.1-1b-a400m-instruct
model_name: ibm-granite/granite-3.1-1b-a400m-instruct
tokenizer_name: ibm-granite/granite-3.1-1b-a400m-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct

# Granite-3.1-1b-a400m-base
- name: huggingface/granite-3.1-1b-a400m-base
model_name: ibm-granite/granite-3.1-1b-a400m-base
tokenizer_name: ibm-granite/granite-3.1-1b-a400m-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
79 changes: 79 additions & 0 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3584,3 +3584,82 @@ models:
release_date: 2023-11-08
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-8b-base
- name: ibm-granite/granite-3.1-8b-base
display_name: Granite 3.1 - 8B - Base
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 8170000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-8b-instruct
- name: ibm-granite/granite-3.1-8b-instruct
display_name: Granite 3.1 - 8B - Instruct
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 8170000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-2b-instruct
- name: ibm-granite/granite-3.1-2b-instruct
display_name: Granite 3.1 - 2B - Instruct
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 2530000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-2b-base
- name: ibm-granite/granite-3.1-2b-base
display_name: Granite 3.1 - 2B - Base
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 2530000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-3b-a800m-instruct
- name: ibm-granite/granite-3.1-3b-a800m-instruct
display_name: Granite 3.1 - 3B - A800M - Instruct
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 3300000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-3b-a800m-base
- name: ibm-granite/granite-3.1-3b-a800m-base
display_name: Granite 3.1 - 3B - A800M - Base
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 3300000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-1b-a400m-instruct
- name: ibm-granite/granite-3.1-1b-a400m-instruct
display_name: Granite 3.1 - 1B - A400M - Instruct
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 1330000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-1b-a400m-base
- name: ibm-granite/granite-3.1-1b-a400m-base
display_name: Granite 3.1 - 1B - A400M - Base
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 1330000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]
74 changes: 73 additions & 1 deletion src/helm/config/tokenizer_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -763,4 +763,76 @@ tokenizer_configs:
args:
pretrained_model_name_or_path: maritaca-ai/sabia-7b
end_of_text_token: "</s>"
prefix_token: "<s>"
prefix_token: "<s>"

# Granite-3.1-8b-base
- name: ibm-granite/granite-3.1-8b-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
prefix_token: ""
end_of_text_token: "<|endoftext|>"

# Granite-3.1-8b-instruct
- name: ibm-granite/granite-3.1-8b-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
prefix_token: ""
end_of_text_token: "<|endoftext|>"

# Granite-3.1-2b-instruct
- name: ibm-granite/granite-3.1-2b-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-2b-base
- name: ibm-granite/granite-3.1-2b-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-3b-a800m-instruct
- name: ibm-granite/granite-3.1-3b-a800m-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-3b-a800m-base
- name: ibm-granite/granite-3.1-3b-a800m-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-1b-a400m-instruct
- name: ibm-granite/granite-3.1-1b-a400m-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-1b-a400m-base
- name: ibm-granite/granite-3.1-1b-a400m-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
prefix_token: ""
end_of_text_token: ""
Loading