feat(Dockerfile): update base image to slim and add build args for ve…

…rsioning feat(server): implement server functionalities with routers for health, detect, and search feat(pyproject.toml): add new dependencies for image processing, settings management, and testing feat(server): implement object detection and classification models for image analysis and search
shba007 · Dec 18, 2024 · 5a788d6 · 5a788d6
1 parent 0b6a1f9
commit 5a788d6
Show file tree

Hide file tree

Showing 16 changed files with 796 additions and 58 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -6,25 +6,30 @@ ENV UV_COMPILE_BYTECODE=1
 ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-	--mount=type=bind,source=uv.lock,target=uv.lock \
-	--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-	uv sync --frozen --no-install-project --no-dev
+  --mount=type=bind,source=uv.lock,target=uv.lock \
+  --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+  uv sync --frozen --no-install-project --no-dev
 
 ADD . /app
 
 WORKDIR /app
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-	uv sync --frozen --no-dev
+  uv sync --frozen --no-dev
 
-FROM python:3.12-slim-bookworm AS runner
+FROM python:3.12-slim AS runner
 
-COPY --from=builder /app /app
+ARG VERSION
+ARG BUILD_TIME
 
-WORKDIR /app
+ENV PYTHON_ENV=production
 
 ENV PATH="/app/.venv/bin:$PATH"
 
+COPY --from=builder /app /app
+
+WORKDIR /app
+
 EXPOSE 8000
 
-CMD ["fastapi", "run", "app/main.py", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["fastapi", "run", "server/main.py", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,16 +11,22 @@ dependencies = [
     "tensorflow-cpu>=2.18.0",
     "weaviate-client>=3.26.7,<4.0.0",
     "firebase-admin>=6.6.0",
+    "numpy>=2.0.2",
+    "opencv-contrib-python-headless>=4.10.0.84",
+    "pydantic-settings>=2.7.0",
+    "scipy>=1.14.1",
 ]
 
 [dependency-groups]
 dev = [
+    "changelog-gen>=0.13.5",
+    "pytest>=8.3.4",
     "ruff>=0.8.3",
     "taskipy>=1.14.1",
 ]
 
 [tool.taskipy.tasks]
-dev = "fastapi dev app/main.py"
+dev = "fastapi dev server/main.py"
 lint = "ruff check --fix"
 format = "ruff format"
 docker-build = "docker build --build-arg VERSION_TAG=dev -t unai-api-fastapi:dev ."

diff --git a/app/__init__.py → server/__init__.py b/app/__init__.py → server/__init__.py
diff --git a/app/dependencies.py → server/dependencies.py b/app/dependencies.py → server/dependencies.py
diff --git a/app/main.py → server/main.py b/app/main.py → server/main.py
diff --git a/app/routers/__init__.py → server/routers/__init__.py b/app/routers/__init__.py → server/routers/__init__.py
diff --git a/app/routers/detect.py → server/routers/detect.py b/app/routers/detect.py → server/routers/detect.py
@@ -8,7 +8,7 @@
 from PIL import Image
 import tensorflow as tf
 
-from ..utils.helpers import Data, save_file, upload_file
+from ..utils.helpers_obj import Data, save_file, upload_file
 from ..utils.models import Detector
 
 router = APIRouter(
@@ -47,7 +47,7 @@ async def detect(background_tasks: BackgroundTasks, request: ImageRequest):
         background_tasks.add_task(upload_file, file_path, f"images/{id}.jpg")
 
         data = Data("temp-1")
-        data.images = {} if data.images == None else data.images
+        data.images = {} if data.images is None else data.images
         data.images[id] = image
 
         img = data.get_images()

diff --git a/app/routers/health.py → server/routers/health.py b/app/routers/health.py → server/routers/health.py
@@ -6,6 +6,7 @@
     responses={404: {"description": "Not found"}},
 )
 
+
 @router.get("/")
 async def get_health():
-    return {"status": "OK", "version":"dev"}
+    return {"status": "OK", "version": "dev"}
diff --git a/server/routers/scan.py b/server/routers/scan.py
@@ -0,0 +1,65 @@
+from fastapi import APIRouter, HTTPException
+import base64
+
+from utils.scan import (
+    align_crop,
+    align_inputs,
+    detect_markers,
+    detect_qr,
+    extract_data,
+    highlight,
+)
+
+router = APIRouter(
+    prefix="/scan",
+    tags=["scan"],
+    dependencies=[],
+    responses={404: {"description": "Not found"}},
+)
+
+
+@router.post("/")
+async def scan(images: list[str]):
+    meta_data = []
+    cropped_images = []
+    for image in images:
+        image = base64.b64decode(images[0].split(",")[1])
+
+        markers = detect_markers(image)
+        cropped_image = align_crop(image, markers)
+        cropped_images.append(cropped_image)
+        meta_data.append(detect_qr(cropped_image))
+
+    if not all(item["scale"] == meta_data[0]["scale"] for item in meta_data):
+        raise HTTPException(status_code=409, detail="Pages are not of a same scale")
+
+    total_choice_indexes = set()
+    for data in meta_data:
+        start = data["choice"]["start"]
+        count = data["choice"]["count"]
+
+        choice_indexes = list(range(start, start + count))
+        total_choice_indexes.update(choice_indexes)
+
+    if not set(range(1, meta_data[0]["choice"]["total"])).issubset(
+        total_choice_indexes
+    ):
+        raise HTTPException(status_code=400, detail="Insufficient number of pages")
+
+    highlights = []
+    choices = []
+    for index, cropped_image in enumerate(cropped_images):
+        # print(meta_data)
+        option_count = meta_data[index]["option"]
+        start = meta_data[index]["choice"]["start"]
+        choice_count = meta_data[index]["choice"]["count"]
+        inputs = align_inputs(cropped_image, option_count, start, choice_count)
+        # print(inputs)
+        choices.extend(extract_data(cropped_image, inputs))
+        highlights.append(highlight(cropped_image, option_count, inputs, choices))
+
+    # print(choices)
+    return {
+        "data": {"name": meta_data[0]["scale"], "choices": choices},
+        "highlights": highlights,
+    }
diff --git a/app/routers/search.py → server/routers/search.py b/app/routers/search.py → server/routers/search.py
@@ -8,7 +8,7 @@
 import weaviate as Weaviate
 import meilisearch as Meilisearch
 
-from ..utils.helpers import Data
+from ..utils.helpers_obj import Data
 from ..utils.models import OneShotClassifier
 
 router = APIRouter(
@@ -20,16 +20,17 @@
 classifier = OneShotClassifier()
 
 WEAVIATE_URL = os.getenv("WEAVIATE_URL")
-WEAVIATE_URL = "127.0.0.1" if WEAVIATE_URL == None else WEAVIATE_URL
+WEAVIATE_URL = "127.0.0.1" if WEAVIATE_URL is None else WEAVIATE_URL
 
 MEILISEARCH_URL = os.getenv("MEILISEARCH_URL")
-MEILISEARCH_URL = "127.0.0.1" if MEILISEARCH_URL == None else MEILISEARCH_URL
+MEILISEARCH_URL = "127.0.0.1" if MEILISEARCH_URL is None else MEILISEARCH_URL
 MEILISEARCH_API_KEY = os.getenv("MEILISEARCH_SECRET")
-MEILISEARCH_API_KEY = "" if MEILISEARCH_API_KEY == None else MEILISEARCH_API_KEY
+MEILISEARCH_API_KEY = "" if MEILISEARCH_API_KEY is None else MEILISEARCH_API_KEY
 
 weaviate = None
 meilisearch = None
 
+
 class Box(BaseModel):
     x: float
     y: float
@@ -62,8 +63,12 @@ def format(data):
 @router.post("/")
 async def search(request: ImageRequest):
     try:
-        weaviate = Weaviate.Client(url=WEAVIATE_URL) if weaviate == None else weaviate
-        meilisearch = Meilisearch.Client(MEILISEARCH_URL, MEILISEARCH_API_KEY) if meilisearch == None else meilisearch
+        weaviate = Weaviate.Client(url=WEAVIATE_URL) if weaviate is None else weaviate
+        meilisearch = (
+            Meilisearch.Client(MEILISEARCH_URL, MEILISEARCH_API_KEY)
+            if meilisearch is None
+            else meilisearch
+        )
 
         id = request.id
         file_path = f"assets/images/{id}.jpg"

diff --git a/server/utils/__init__.py b/server/utils/__init__.py
diff --git a/server/utils/helper_omr.py b/server/utils/helper_omr.py
@@ -0,0 +1,72 @@
+import numpy as np
+import cv2
+
+
+def is_circle_inside(circle_center):
+    # from markers 3,5,11,9
+    boundary = [
+        [70.0, 390.5],
+        [2306.0, 390.5],
+        [2306.0, 3294.0],
+        [70.0, 3294.0],
+    ]
+
+    x, y = circle_center
+    x_min, y_min = boundary[0]
+    x_max, y_max = boundary[2]
+
+    if x_min <= x <= x_max and y_min <= y <= y_max:
+        return True
+    else:
+        return False
+
+
+def choice_generator(option, index, total):
+    factor = 4
+    index = index - 1
+    unit = 15
+    x = 55
+    y = 100
+
+    while index < total:
+        if index % 40 == 0 and index != 0:
+            x += 110
+            y = 100
+        elif index % 5 == 0 and index != 0:
+            y += 15
+
+        y += unit
+
+        choices = None
+        if option == 2:
+            choices = [
+                {"value": 1, "chord": [(x) * factor, (y) * factor]},
+                {"value": 0, "chord": [(x + unit) * factor, (y) * factor]},
+            ]
+        elif option == 5:
+            choices = [
+                {"value": 0, "chord": [(x) * factor, (y) * factor]},
+                {"value": 1, "chord": [(x + 1 * unit) * factor, (y) * factor]},
+                {"value": 2, "chord": [(x + 2 * unit) * factor, (y) * factor]},
+                {"value": 3, "chord": [(x + 3 * unit) * factor, (y) * factor]},
+                {"value": 4, "chord": [(x + 4 * unit) * factor, (y) * factor]},
+            ]
+
+        yield {"index": index + 1, "choices": choices}
+
+        index += 1
+
+
+def calculate_bw_ratio(image):
+    # Threshold the image to get binary image with white pixels
+    _, binary = cv2.threshold(image, 250, 255, cv2.THRESH_BINARY)
+
+    # Count the white pixels
+    num_white_pixels = np.count_nonzero(binary == 255)
+
+    # Calculate the ratio of white pixels to total pixels
+    height, width = binary.shape
+    num_pixels = width * height
+    white_ratio = num_white_pixels / num_pixels
+
+    return white_ratio
diff --git a/app/utils/helpers.py → server/utils/helpers_obj.py b/app/utils/helpers.py → server/utils/helpers_obj.py
@@ -10,11 +10,11 @@
 from PIL import Image, ImageOps, ExifTags
 
 PRESET = os.getenv("PRESET")
-PRESET = "deploy" if PRESET == None else PRESET
+PRESET = "deploy" if PRESET is None else PRESET
 
 
 STORAGE_BUCKET = os.getenv("STORAGE_BUCKET")
-STORAGE_BUCKET = "" if STORAGE_BUCKET == None else STORAGE_BUCKET
+STORAGE_BUCKET = "" if STORAGE_BUCKET is None else STORAGE_BUCKET
 
 FIREBASE_CONFIG = ""
 
@@ -49,7 +49,7 @@ def upload_file(source_file_location: str, dest_file_location: str | None = None
     if PRESET != "deploy":
         return
 
-    if dest_file_location == None:
+    if dest_file_location is None:
         dest_file_location = source_file_location
 
     blob = bucket.blob(dest_file_location)
@@ -63,7 +63,7 @@ def download_file(source_file_location: str, dest_file_location: str | None = No
     if file_exists_check(dest_file_location):
         return
 
-    if dest_file_location == None:
+    if dest_file_location is None:
         dest_file_location = source_file_location
 
     blob = bucket.blob(source_file_location)
@@ -286,7 +286,7 @@ def __pipeline__(self, info, type, resize_dim, return_annotations):
                     else (single_crop, annotation)
                 )
 
-                if return_annotations == True:
+                if return_annotations is True:
                     yield {
                         "id": info["id"],
                         "photography": info["photography"],
@@ -310,7 +310,7 @@ def __pipeline__(self, info, type, resize_dim, return_annotations):
                 else (self.images[info["id"]], annotations)
             )
 
-            if return_annotations == True:
+            if return_annotations is True:
                 yield {
                     "id": info["id"],
                     "photography": info["photography"],
@@ -346,12 +346,12 @@ def __img_pipeline__(self, input, type, resize_dim, return_annotations):
                         else (single_crop, annotation)
                     )
 
-                    if return_annotations == True:
+                    if return_annotations is True:
                         yield {"id": id, "image": single_crop, "bboxes": annotations}
                     else:
                         yield {"id": id, "image": single_crop}
             else:
-                if self.annotations == None:
+                if self.annotations is None:
                     annotations = None
                     image = (
                         resize(self.images[id], resize_dim)
@@ -366,7 +366,7 @@ def __img_pipeline__(self, input, type, resize_dim, return_annotations):
                         else (self.images[id], annotations)
                     )
 
-                if return_annotations == True:
+                if return_annotations is True:
                     yield {"id": id, "image": image, "bboxes": annotations}
                 else:
                     yield {"id": id, "image": image}
@@ -383,15 +383,15 @@ def get_image(self, select, type="full", resize_dim=None, return_annotations=Fal
     def get_images(
         self, select=None, type="full", resize_dim=None, return_annotations=False
     ):
-        if self.meta == None:
+        if self.meta is None:
             lst = map(
                 lambda x: self.__img_pipeline__(
                     x, type, resize_dim, return_annotations
                 ),
                 self.images.items(),
             )
         else:
-            if select == None:
+            if select is None:
                 # print("Id", self.id)
                 filtered_images = self.meta["images"]
             elif select == "face":