diff --git a/text-generation-inference/entrypoint.sh b/text-generation-inference/entrypoint.sh index 97e12b3a..9cba11f8 100644 --- a/text-generation-inference/entrypoint.sh +++ b/text-generation-inference/entrypoint.sh @@ -11,25 +11,25 @@ export MODEL_ID="${MODEL_ID}" if [[ -n "${TGI_MAX_CONCURRENT_REQUESTS}" ]]; then export TGI_MAX_CONCURRENT_REQUESTS="${TGI_MAX_CONCURRENT_REQUESTS}" else - export TGI_MAX_CONCURRENT_REQUESTS 4 + export TGI_MAX_CONCURRENT_REQUESTS=4 fi if [[ -n "${TGI_MAX_BATCH_SIZE}" ]]; then export TGI_MAX_BATCH_SIZE="${TGI_MAX_BATCH_SIZE}" else - export TGI_MAX_BATCH_SIZE 1 + export TGI_MAX_BATCH_SIZE=1 fi if [[ -n "${TGI_MAX_INPUT_TOKENS}" ]]; then export TGI_MAX_INPUT_TOKENS="${TGI_MAX_INPUT_TOKENS}" else - export TGI_MAX_INPUT_TOKENS 128 + export TGI_MAX_INPUT_TOKENS=32 fi if [[ -n "${TGI_MAX_TOTAL_TOKENS}" ]]; then export TGI_MAX_TOTAL_TOKENS="${TGI_MAX_TOTAL_TOKENS}" else - export TGI_MAX_TOTAL_TOKENS 256 + export TGI_MAX_TOTAL_TOKENS=64 fi TGI_MAX_BATCH_PREFILL_TOKENS=$(( TGI_MAX_BATCH_SIZE*TGI_MAX_INPUT_TOKENS ))