Skip to content

Commit

Permalink
feat(text-extraction): add optional text extraction configuration (#69)
Browse files Browse the repository at this point in the history
Signed-off-by: Radek Ježek <radek.jezek@ibm.com>
  • Loading branch information
jezekra1 authored Jan 8, 2025
1 parent 383ce28 commit f6ce1f6
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 8 deletions.
27 changes: 27 additions & 0 deletions bee-stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,17 @@ configure_openai() {
write_env OPENAI_API_KEY
}

configure_text_extraction() {
echo FEATURE_FLAGS=\''{"Knowledge":true,"Files":true,"TextExtraction":true,"FunctionTools":true,"Observe":true,"Projects":true}'\' >> "$TMP_ENV_FILE"
echo TEXT_EXTRACTION_ENABLED=true >> "$TMP_ENV_FILE"
echo EXTRACTION_BACKEND=docling >> "$TMP_ENV_FILE"
}

configure_no_text_extraction() {
echo FEATURE_FLAGS=\''{"Knowledge":false,"Files":true,"TextExtraction":false,"FunctionTools":true,"Observe":true,"Projects":true}'\' >> "$TMP_ENV_FILE"
echo EXTRACTION_BACKEND=wdu >> "$TMP_ENV_FILE"
}

setup() {
printf "🐝 Welcome to the bee-stack! You're just a few questions away from building agents!\n(Press ^C to exit)\n\n"
rm -f "$TMP_ENV_FILE"
Expand All @@ -188,6 +199,15 @@ setup() {
[[ $SELECTED_OPT == 'watsonx' ]] && configure_watsonx
[[ $SELECTED_OPT == 'openai' ]] && configure_openai

text_extraction_enabled=$(ask_yes_no \
"Do you want to enable docling text extraction? ⚠️ Requires >= 15GB of RAM **CONFIGURED** for the container runtime ⚠️"
)
if [[ $text_extraction_enabled == 'yes' ]]; then
configure_text_extraction
else
configure_no_text_extraction
fi

if [ -f ".env" ]; then
[ "$(ask_yes_no ".env file already exists. Do you want to override it?")" = 'no' ] && exit 1
if [ -n "$(${RUNTIME} compose ps -aq)" ]; then
Expand All @@ -206,17 +226,24 @@ start_stack() {
fi

${RUNTIME} compose --profile all up -d

if grep -q TEXT_EXTRACTION_ENABLED=true .env; then
${RUNTIME} compose --profile text-extraction up -d
fi

printf "Done. You can visit the UI at ${BLUE}http://localhost:3000${NC}\n"
}

stop_stack() {
${RUNTIME} compose --profile all down
${RUNTIME} compose --profile infra down
${RUNTIME} compose --profile text-extraction down
}

clean_stack() {
${RUNTIME} compose --profile all down --volumes
${RUNTIME} compose --profile infra down --volumes
${RUNTIME} compose --profile text-extraction down --volumes
rm -rf tmp
mkdir -p ./tmp/code-interpreter-storage
}
Expand Down
27 changes: 19 additions & 8 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ services:
--import ./dist/opentelemetry.js \
./dist/server.js
'
env_file:
- .env
environment:
EXTRACTION_BACKEND: wdu

env_file: [ .env ]
environment: &api-env
MONGODB_URL: mongodb://mongo:27017?directConnection=true
MONGODB_DATABASE_NAME: bee-api
REDIS_URL: redis://redis:6379/0
Expand Down Expand Up @@ -55,7 +52,7 @@ services:
MILVUS_PASSWORD: password
MILVUS_DATABASE_NAME: default

RUN_BULLMQ_WORKERS: runs,runs-cleanup,vectorStores-cleanup,vectorStores-fileProcessor,files-extraction-node
RUN_BULLMQ_WORKERS: runs,runs-cleanup,vectorStores-cleanup,vectorStores-fileProcessor,files-extraction-node,files-extraction-python

# Used to encrypt/decrypt values
# You can use `$ openssl rand -base64 32` to generate new one
Expand All @@ -82,16 +79,30 @@ services:
- "4000:4000"
profiles: [ all ]

bee-api-python-workers:
image: icr.io/i-am-bee/bee-api-workers-python:0.0.21-docling
env_file: [ .env ]
environment:
<<: *api-env
DOCLING_DO_TABLE_STRUCTURE: false
DOCLING_PDF_DO_OCR: false
healthcheck:
test: "wget --quiet -O - http://127.0.0.1:8080/health"
interval: 3s
retries: 20
start_period: 5s
profiles: [ text-extraction ]

bee-ui:
depends_on:
bee-api: { condition: service_healthy }
image: icr.io/i-am-bee/bee-ui-local:0.0.21
image: icr.io/i-am-bee/bee-ui-local:0.0.22
env_file: [ .env ]
environment:
NEXTAUTH_URL: http://localhost:3000
NEXTAUTH_SECRET: top-secret
NEXT_PUBLIC_USERCONTENT_SITE_URL: http://localhost:5173
API_URL: http://bee-api:4000

DUMMY_JWT_TOKEN: "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0IiwicHJlZmVycmVkX3VzZXJuYW1lIjoiVGVzdCBVc2VyIiwiZW1haWwiOiJ0ZXN0QGVtYWlsLmNvbSIsImlhdCI6MTUxNjIzOTAyMiwiaXNzIjoiaHR0cHM6Ly9sb2NhbGhvc3QiLCJhdWQiOiJiZWUtdGVzdCJ9.vwkGnl7lBbzJYk6BtoW3VoA3mnNJVI-nDQU8aK7zOH-rkf2pn5cn6CKwpq7enDInIXro8WtBLNZP8Nr8GQIZKahICuP3YrPRmzv7YIW8LuXKnx1hycg5OAtj0OtQi5FYwwCxTYW9pBF2it7XwQSBcW7yYsOrvgs7jVhThCOsavX0YiAROxZIhk1idZT4Pl3egfUI_dy9iBxcn7xocTnos-94wqJNt8oCVgB8ynj75yJFHJbiQ-9Tym_V3LcMHoEyv67Jzie8KugCgdpuF6EbQqcyfYJ83q5jJpR2LiuWMuGsNSbjjDY-f1vCSMo9L9-R8KFrDylT_BzLvRBswOzW7A"
profiles: [ all ]
ports:
Expand Down

0 comments on commit f6ce1f6

Please sign in to comment.