Skip to content

Commit

Permalink
use older tgi version
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 16, 2024
1 parent bd54289 commit 3b0138b
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 1 deletion.
1 change: 1 addition & 0 deletions examples/cuda_tgi_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ backend:
cuda_graphs: 0 # remove for better perf but bigger memory footprint
no_weights: true
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.1

scenario:
input_shapes:
Expand Down
2 changes: 1 addition & 1 deletion tests/configs/_no_weights_.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ hydra:
mode: MULTIRUN
sweeper:
params:
backend.no_weights: true
backend.no_weights: true,false
1 change: 1 addition & 0 deletions tests/configs/cpu_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ name: cpu_inference_py_txi_gpt2

backend:
cuda_graphs: 0
image: ghcr.io/huggingface/text-generation-inference:2.4.1
1 change: 1 addition & 0 deletions tests/configs/cuda_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ name: cuda_inference_py_txi_gpt2

backend:
cuda_graphs: 0
image: ghcr.io/huggingface/text-generation-inference:2.4.1

0 comments on commit 3b0138b

Please sign in to comment.