From 23f81ee1b634f0632fb3721b36f53da2c70d97ed Mon Sep 17 00:00:00 2001 From: Yifan Mai Date: Fri, 31 Jan 2025 16:18:12 -0800 Subject: [PATCH] Add Deepseek-R1 model --- src/helm/config/model_deployments.yaml | 11 ++++++++++- src/helm/config/model_metadata.yaml | 10 ++++++++++ src/helm/config/tokenizer_configs.yaml | 6 ++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index a4f3ab06ca..9c102c5f5c 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -431,7 +431,16 @@ model_deployments: - name: together/deepseek-v3 model_name: deepseek-ai/deepseek-v3 tokenizer_name: deepseek-ai/deepseek-v3 - max_sequence_length: 131072 + max_sequence_length: 16384 + client_spec: + class_name: "helm.clients.together_client.TogetherChatClient" + args: + disable_logprobs: True + + - name: together/deepseek-r1 + model_name: deepseek-ai/deepseek-r1 + tokenizer_name: deepseek-ai/deepseek-r1 + max_sequence_length: 32768 client_spec: class_name: "helm.clients.together_client.TogetherChatClient" args: diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index e4f2603cec..5562e966fe 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -798,6 +798,16 @@ models: release_date: 2024-12-24 tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: deepseek-ai/deepseek-r1 + display_name: DeepSeek R1 + description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) + creator_organization_name: DeepSeek + access: open + # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B + num_parameters: 685000000000 + release_date: 2025-01-20 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + # EleutherAI - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together. display_name: GPT-J (6B) diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml index e7a5548cc4..d47d40db35 100644 --- a/src/helm/config/tokenizer_configs.yaml +++ b/src/helm/config/tokenizer_configs.yaml @@ -175,6 +175,12 @@ tokenizer_configs: end_of_text_token: "<|end▁of▁sentence|>" prefix_token: "<|begin▁of▁sentence|>" + - name: deepseek-ai/deepseek-r1 + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + end_of_text_token: "<|end▁of▁sentence|>" + prefix_token: "<|begin▁of▁sentence|>" + # EleutherAI - name: EleutherAI/gpt-j-6B tokenizer_spec: