Skip to content

Commit

Permalink
Expose match_batch_size as envvar for TGI entrypoint
Browse files Browse the repository at this point in the history
  • Loading branch information
mfuntowicz committed Apr 17, 2024
1 parent 30153d5 commit 2eb609e
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions text-generation-inference/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ else
export TGI_MAX_CONCURRENT_REQUESTS 4
fi

if [[ -n "${TGI_MAX_BATCH_SIZE}" ]]; then
export TGI_MAX_BATCH_SIZE="${TGI_MAX_BATCH_SIZE}"
else
export TGI_MAX_BATCH_SIZE 1
fi

if [[ -n "${TGI_MAX_INPUT_LENGTH}" ]]; then
export TGI_MAX_INPUT_LENGTH="${TGI_MAX_INPUT_LENGTH}"
else
Expand All @@ -48,6 +54,7 @@ fi

text-generation-launcher --port 8080 \
--max-concurrent-requests ${TGI_MAX_CONCURRENT_REQUESTS}
--max-batch-size ${TGI_MAX_BATCH_SIZE}
--max-input-length ${TGI_MAX_INPUT_LENGTH} \
--max-total-tokens ${TGI_MAX_TOTAL_TOKENS} \
--max-batch-prefill-tokens ${TGI_MAX_BATCH_PREFILL_TOKENS} \
Expand Down

0 comments on commit 2eb609e

Please sign in to comment.