forked from getumbrel/llama-gpt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose-70b.yml
32 lines (30 loc) · 1.03 KB
/
docker-compose-70b.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
version: '3.6'
services:
llama-gpt-api-70b:
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
build:
context: ./api
dockerfile: 70B.Dockerfile
restart: on-failure
environment:
MODEL: '/models/llama-2-70b-chat.bin'
# Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
# it's not possible to change this using --n_gqa with llama-cpp-python in
# run.sh, so we expose it as an environment variable.
# See: https://github.com/abetlen/llama-cpp-python/issues/528
# and: https://github.com/facebookresearch/llama/issues/407
N_GQA: '8'
USE_MLOCK: 1
cap_add:
- IPC_LOCK
llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
ports:
- 3000:3000
restart: on-failure
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://llama-gpt-api-70b:8000'
- 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin'
- 'WAIT_HOSTS=llama-gpt-api-70b:8000'
- 'WAIT_TIMEOUT=600'