From 40a90c040171fdd5b609780a6833d7f303713dc1 Mon Sep 17 00:00:00 2001 From: Caleb John <45307388+calebjohn24@users.noreply.github.com> Date: Thu, 9 Jan 2025 02:22:54 -0800 Subject: [PATCH] Add torch model and GPU support. (#195) * Setup TorchVL * fix build, setup cpu, gpu, cloud variants * remove torch files * update .gitignore * update tests * fix torchVL class and add streaming tests * format * remove space in torch max tokens * format * Add black install * fix cloud test * isort imports * fix test build ci * Remove build py, update readme * update readme * Add back torch deps * update readme * update readme * update readme --- .github/workflows/python-client-tests.yml | 4 +- .gitignore | 1 + clients/python/README.md | 26 ++++- clients/python/build.py | 93 --------------- clients/python/moondream/__init__.py | 15 ++- clients/python/moondream/torch_vl.py | 110 ++++++++++++++++++ clients/python/pyproject.toml | 55 +++++---- clients/python/tests/test_api_inference.py | 2 - clients/python/tests/test_local_inference.py | 3 +- .../tests/test_local_torch_inference.py | 84 +++++++++++++ moondream/torch/sample.py | 1 - 11 files changed, 271 insertions(+), 123 deletions(-) delete mode 100644 clients/python/build.py create mode 100644 clients/python/moondream/torch_vl.py create mode 100644 clients/python/tests/test_local_torch_inference.py diff --git a/.github/workflows/python-client-tests.yml b/.github/workflows/python-client-tests.yml index 5e50ad96..d63ff7d2 100644 --- a/.github/workflows/python-client-tests.yml +++ b/.github/workflows/python-client-tests.yml @@ -40,12 +40,12 @@ jobs: - name: Install dependencies working-directory: ./clients/python run: | - python -m pip install --upgrade pip poetry install --all-extras - name: Format code working-directory: ./clients/python run: | + poetry run pip install black poetry run black tests/test_local_inference.py --check - name: Run tests @@ -54,4 +54,4 @@ jobs: MOONDREAM_API_KEY: ${{ secrets.MOONDREAM_API_KEY }} run: | poetry run pip install pytest pytest-asyncio - poetry run pytest tests/test_*.py -v + poetry run pytest tests/test_api_inference.py -v diff --git a/.gitignore b/.gitignore index 098f5493..c1b5e0bf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ data /pyproject.toml poetry.lock dist +clients/python/moondream/torch diff --git a/clients/python/README.md b/clients/python/README.md index 422095c0..df0c3f08 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -17,7 +17,23 @@ local inference and cloud-based API access. Install the package from PyPI: ```bash -pip install moondream==0.0.5 +pip install moondream==0.0.6 +``` + +To install the CPU dependencies for local inference, run: + +```bash +pip install moondream[cpu] +``` + +To install the GPU dependencies for local inference, run: + +```bash +# Copy the torch implementation from the root moondream repo into the moondream/torch directory +cp -r moondream/torch clients/python/moondream/torch + +# Install the GPU dependencies +pip install moondream[gpu] ``` ## Quick Start @@ -160,7 +176,13 @@ All methods return typed dictionaries: - CUDA (GPU) and MPS (Apple Silicon) support coming soon - For optimal performance with GPU/MPS, use the PyTorch implementation for now +## Development Notes + +- Copy the torch implementation from the root moondream repo into the `torch` directory +- Run `poetry install --extras "gpu"` to install the GPU dependencies +- Run `poetry install --extras "cpu"` to install the CPU dependencies + ## Links - [Website](https://moondream.ai/) -- [Demo](https://moondream.ai/playground) +- [Demo](https://moondream.ai/playground) \ No newline at end of file diff --git a/clients/python/build.py b/clients/python/build.py deleted file mode 100644 index 460a7e52..00000000 --- a/clients/python/build.py +++ /dev/null @@ -1,93 +0,0 @@ -import toml -import sys -import shutil -from pathlib import Path - -BASE_CONFIG = { - "tool": { - "poetry": { - "version": "0.0.2", - "description": "Python client library for moondream", - "authors": ["vik "], - "readme": "README.md", - # Explicitly declare where to find the package source - "packages": [{"include": "moondream", "from": "."}], # for GPU variant - "dependencies": { - "python": "^3.10", - "pillow": "^10.4.0", - "numpy": "^2.1.2", - "tokenizers": "^0.20.1", - }, - "scripts": {"moondream": "moondream.cli:main"}, - }, - "pyright": { - "venvPath": ".", - "venv": ".venv", - "reportMissingParameterType": False, - }, - }, - "build-system": { - "requires": ["poetry-core"], - "build-backend": "poetry.core.masonry.api", - }, -} - - -def build(variant): - config = BASE_CONFIG.copy() - src_dir = Path("moondream") - - if variant == "gpu": - package_name = "moondream-gpu" - config["tool"]["poetry"]["name"] = package_name - config["tool"]["poetry"]["dependencies"]["onnxruntime-gpu"] = "^1.20.0" - - # Create package directory - target_dir = Path(package_name) - if target_dir.exists(): - shutil.rmtree(target_dir) - - # Create directory and copy files - target_dir.mkdir(exist_ok=True) - - # Ensure __init__.py exists (fix the filename) - # init_file = target_dir / "__init__.py" - # if not init_file.exists(): - # init_file.touch() - - # Copy all python files - for py_file in src_dir.glob("*.py"): - shutil.copy2(py_file, target_dir) - - elif variant == "cpu": - package_name = "moondream" - config["tool"]["poetry"]["name"] = package_name - config["tool"]["poetry"]["dependencies"]["onnxruntime"] = "^1.19.2" - config["tool"]["poetry"]["packages"] = [{"include": "moondream", "from": "."}] - - else: - print(f"Unknown variant: {variant}") - print("Usage: python build.py [cpu|gpu]") - sys.exit(1) - - import copy - - config = copy.deepcopy(config) - - # Write the configuration - toml_content = toml.dumps(config) - print("Generated pyproject.toml content:") - print(toml_content) # Debug output - Path("pyproject.toml").write_text(toml_content) - - print(f"Built configuration for {variant} variant") - return package_name - - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python build.py [cpu|gpu]") - sys.exit(1) - - package_name = build(sys.argv[1]) - print(f"Successfully created {package_name} configuration") diff --git a/clients/python/moondream/__init__.py b/clients/python/moondream/__init__.py index feb01791..46bd3235 100644 --- a/clients/python/moondream/__init__.py +++ b/clients/python/moondream/__init__.py @@ -1,7 +1,6 @@ from typing import Optional from .cloud_vl import CloudVL -from .onnx_vl import OnnxVL from .types import VLM DEFAULT_API_URL = "https://api.moondream.ai/v1" @@ -14,7 +13,19 @@ def vl( api_url: Optional[str] = None, ) -> VLM: if model: - return OnnxVL.from_path(model) + model_filetype = model.split(".")[-1] + if model_filetype == "safetensors": + from .torch_vl import TorchVL + + return TorchVL(model=model) + elif model_filetype == "mf": + from .onnx_vl import OnnxVL + + return OnnxVL.from_path(model) + + raise ValueError( + "Unsupported model filetype. Please use a .safetensors model for GPU use or .mf model for CPU use." + ) if api_key: if not api_url: diff --git a/clients/python/moondream/torch_vl.py b/clients/python/moondream/torch_vl.py new file mode 100644 index 00000000..31e653fa --- /dev/null +++ b/clients/python/moondream/torch_vl.py @@ -0,0 +1,110 @@ +from typing import Literal, Optional, Union + +import torch +from PIL import Image + +from .torch.moondream import MoondreamConfig, MoondreamModel +from .torch.weights import load_weights_into_model +from .types import ( + VLM, + Base64EncodedImage, + CaptionOutput, + DetectOutput, + EncodedImage, + PointOutput, + QueryOutput, + SamplingSettings, +) +from .version import __version__ + + +class TorchVL(VLM): + def __init__( + self, + *, + model: str, + ): + config = MoondreamConfig() + self.model = MoondreamModel(config) + load_weights_into_model(model, self.model) + self.model.eval() + # Move model to the appropriate device + if torch.cuda.is_available(): + self.device = "cuda" + elif torch.backends.mps.is_available(): + self.device = "mps" + else: + self.device = "cpu" + self.model.to(self.device) + + def encode_image( + self, image: Union[Image.Image, EncodedImage] + ) -> Base64EncodedImage: + if isinstance(image, EncodedImage): + assert type(image) == Base64EncodedImage + return image + + if not self.model: + raise ValueError("No local model loaded") + + return self.model.encode_image(image) + + def caption( + self, + image: Union[Image.Image, EncodedImage], + length: Literal["normal", "short"] = "normal", + stream: bool = False, + settings: Optional[SamplingSettings] = None, + ) -> CaptionOutput: + if not self.model: + raise ValueError("No local model loaded") + + encoded_image = ( + self.model.encode_image(image) if isinstance(image, Image.Image) else image + ) + return self.model.caption( + encoded_image, length=length, stream=stream, settings=settings + ) + + def query( + self, + image: Union[Image.Image, EncodedImage], + question: str, + stream: bool = False, + settings: Optional[SamplingSettings] = None, + ) -> QueryOutput: + if not self.model: + raise ValueError("No local model loaded") + + encoded_image = ( + self.model.encode_image(image) if isinstance(image, Image.Image) else image + ) + return self.model.query( + encoded_image, question, stream=stream, settings=settings + ) + + def detect( + self, + image: Union[Image.Image, EncodedImage], + object: str, + ) -> DetectOutput: + if not self.model: + raise ValueError("No local model loaded") + + encoded_image = ( + self.model.encode_image(image) if isinstance(image, Image.Image) else image + ) + return self.model.detect(encoded_image, object) + + def point( + self, + image: Union[Image.Image, EncodedImage], + object: str, + ) -> PointOutput: + if not self.model: + raise ValueError("No local model loaded") + + encoded_image = ( + self.model.encode_image(image) if isinstance(image, Image.Image) else image + ) + return self.model.point(encoded_image, object) diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 635be1ea..700b119c 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,32 +1,47 @@ +[build-system] +requires = [ "poetry-core",] +build-backend = "poetry.core.masonry.api" + [tool.poetry] name = "moondream" -version = "0.0.5" +version = "0.0.2" description = "Python client library for moondream" -authors = ["vik "] +authors = [ "M87 Labs ",] readme = "README.md" +[[tool.poetry.packages]] +include = "moondream" +from = "." + +[tool.pyright] +venvPath = "." +venv = ".venv" +reportMissingParameterType = false [tool.poetry.dependencies] python = "^3.10" pillow = "^10.4.0" -onnxruntime = "^1.19.2" numpy = "^2.1.2" -onnx = "^1.17.0" -tokenizers = "^0.20.1" +onnxruntime = { version = ">=1.19.2", optional = true } +tokenizers = { version = ">=0.20.1", optional = true } +torch = { version = ">=2.5.0", optional = true } +safetensors = { version = ">=0.4.2", optional = true } +einops = { version = ">=0.7.0", optional = true } +pyvips-binary = { version = ">=8.16.0", optional = true } +pyvips = { version = ">=2.2.1", optional = true } + +[tool.poetry.extras] +cpu = [ + "onnxruntime", + "tokenizers" +] +gpu = [ + "torch", + "safetensors", + "einops", + "pyvips-binary", + "pyvips", + "tokenizers" +] [tool.poetry.scripts] moondream = "moondream.cli:main" - -[tool.poetry.group.dev.dependencies] -pytest = "^8.3.4" -pytest-asyncio = "^0.25.1" -requests = "^2.32.3" -black = "^24.10.0" - -[tool.pyright] -venvPath = "." -venv = ".venv" -reportMissingParameterType = false - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/clients/python/tests/test_api_inference.py b/clients/python/tests/test_api_inference.py index 36ba51d0..bcc58dcc 100644 --- a/clients/python/tests/test_api_inference.py +++ b/clients/python/tests/test_api_inference.py @@ -28,7 +28,6 @@ def test_api_initialization(model): assert isinstance(model, md.cloud_vl.CloudVL) -@pytest.mark.skip(reason="API returning 502 errors, needs investigation") def test_image_captioning(model, test_image): # Test normal length caption result = model.caption(test_image, length="normal") @@ -56,7 +55,6 @@ def test_streaming_caption(model, test_image): assert len(caption) > 0 -@pytest.mark.skip(reason="API returning 502 errors, needs investigation") def test_query_answering(model, test_image): # Test basic question answering result = model.query(test_image, "What is in this image?") diff --git a/clients/python/tests/test_local_inference.py b/clients/python/tests/test_local_inference.py index d0da8d10..9ae4a24b 100644 --- a/clients/python/tests/test_local_inference.py +++ b/clients/python/tests/test_local_inference.py @@ -109,6 +109,7 @@ def test_invalid_caption_length(model, test_image): def test_invalid_model_path(): with pytest.raises( - ValueError, match="Model path is invalid or file does not exist" + ValueError, + match="Unsupported model filetype. Please use a .safetensors for GPU use or .mf for CPU use.", ): md.vl(model="invalid/path/to/model.bin") diff --git a/clients/python/tests/test_local_torch_inference.py b/clients/python/tests/test_local_torch_inference.py new file mode 100644 index 00000000..d42f0b76 --- /dev/null +++ b/clients/python/tests/test_local_torch_inference.py @@ -0,0 +1,84 @@ +import os +import pytest +from PIL import Image +import moondream as md + +MODEL_PATH = "/Users/caleb/Projects/moondream/moondream-playground-inf/src/ai-models/05/moondream-01-08-2025.safetensors" +TEST_IMAGE_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "assets", + "demo-1.jpg", +) + + +@pytest.fixture +def model(): + return md.vl(model=MODEL_PATH) + + +@pytest.fixture +def test_image(): + return Image.open(TEST_IMAGE_PATH) + + +def test_image_captioning(model, test_image): + # Test normal length caption + result = model.caption(test_image, length="normal") + assert "caption" in result + assert isinstance(result["caption"], str) + assert len(result["caption"]) > 0 + + # Test short length caption + result = model.caption(test_image, length="short") + assert "caption" in result + assert isinstance(result["caption"], str) + assert len(result["caption"]) > 0 + + # Test streaming caption + result = model.caption(test_image, stream=True) + assert "caption" in result + + # Test that we can iterate over the stream + num_chunks = 0 + caption = "" + for chunk in result["caption"]: + assert isinstance(chunk, str) + caption += chunk + num_chunks += 1 + + assert len(caption) > 0 + assert num_chunks > 1 + + +def test_query(model, test_image): + result = model.query(test_image, "What is in this image?") + assert "answer" in result + assert isinstance(result["answer"], str) + assert len(result["answer"]) > 0 + + # Test streaming query + result = model.query(test_image, "What is in this image?", stream=True) + assert "answer" in result + + # Test that we can iterate over the stream + num_chunks = 0 + answer = "" + for chunk in result["answer"]: + assert isinstance(chunk, str) + answer += chunk + num_chunks += 1 + + assert num_chunks > 1 + assert len(answer) > 0 + + +def test_detect(model, test_image): + result = model.detect(test_image, "person") + assert "objects" in result + assert isinstance(result["objects"], list) + + +def test_point(model, test_image): + result = model.point(test_image, "face") + assert "points" in result + assert isinstance(result["points"], list) diff --git a/moondream/torch/sample.py b/moondream/torch/sample.py index 54ca4133..327356f6 100644 --- a/moondream/torch/sample.py +++ b/moondream/torch/sample.py @@ -45,7 +45,6 @@ if not args.benchmark: encoded_image = model.encode_image(image) - print("Caption: short") for t in model.caption(encoded_image, "short", stream=True)["caption"]: print(t, end="", flush=True)