albumentations-team · ternaus · May 16, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -53,7 +53,7 @@ repos:
   #   hooks:
   #     - id: markdownlint
   - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "2.0.3"
+    rev: "2.1.1"
     hooks:
       - id: pyproject-fmt
         additional_dependencies: ["tomli"]

diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ multiplied_image = multiply(img, multiplier)
 
 Albucore provides significant performance improvements for image processing tasks. Here are some benchmark results comparing Albucore with OpenCV and Numpy:
 
-For more detailed benchmark results, including other configurations and data types, refer to the [Benchmark.md](Benchmark.md) in the repository.
+For more detailed benchmark results, including other configurations and data types, refer to the [Benchmark](benchmark/results/) in the repository.
 
 ## License
 

diff --git a/albucore/__init__.py b/albucore/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "0.0.2"
+__version__ = "0.0.3"
 
 from .functions import *
diff --git a/albucore/functions.py b/albucore/functions.py
@@ -4,55 +4,52 @@
 import numpy as np
 
 from albucore.utils import (
+    MAX_OPENCV_WORKING_CHANNELS,
     MAX_VALUES_BY_DTYPE,
+    NPDTYPE_TO_OPENCV_DTYPE,
     clip,
     clipped,
-    is_grayscale_image,
-    maybe_process_in_chunks,
+    get_num_channels,
     preserve_channel_dim,
 )
 
 
 @clipped
-def _multiply_non_uint8_optimized(img: np.ndarray, multiplier: Union[Sequence[float], float]) -> np.ndarray:
-    img = img.astype(np.float32)
-    return np.multiply(img, multiplier)
+def _multiply_non_uint_optimized(img: np.ndarray, multiplier: Union[Sequence[float], float]) -> np.ndarray:
+    if isinstance(multiplier, float) or get_num_channels(img) > MAX_OPENCV_WORKING_CHANNELS or img.dtype == np.uint32:
+        return np.multiply(img, multiplier)
+    return cv2.multiply(img, multiplier, dtype=NPDTYPE_TO_OPENCV_DTYPE[img.dtype])
 
 
 @preserve_channel_dim
-def _multiply_uint8_optimized(img: np.ndarray, multiplier: Union[Sequence[float], float]) -> np.ndarray:
-    max_value = MAX_VALUES_BY_DTYPE[np.uint8]
-    if isinstance(multiplier, float):
-        lut = np.arange(0, max_value + 1, dtype=np.float32)
-        lut *= multiplier
-        lut = clip(lut, np.uint8)
-        func = maybe_process_in_chunks(cv2.LUT, lut=lut)
-        return func(img)
+def _multiply_uint_optimized(img: np.ndarray, multiplier: Union[Sequence[float], float]) -> np.ndarray:
+    dtype = img.dtype
+    max_value = MAX_VALUES_BY_DTYPE[dtype]
 
-    if is_grayscale_image(img):
-        multiplier = multiplier[0]
+    if isinstance(multiplier, float):
         lut = np.arange(0, max_value + 1, dtype=np.float32)
         lut *= multiplier
-        lut = clip(lut, np.uint8)
-        func = maybe_process_in_chunks(cv2.LUT, lut=lut)
-        return func(img)
+        lut = clip(lut, dtype)
+        return cv2.LUT(img, lut)
 
     num_channels = img.shape[-1]
+
     lut = [np.arange(0, max_value + 1, dtype=np.float32)] * num_channels
     lut = np.stack(lut, axis=-1)
 
     lut *= multiplier
-    lut = clip(lut, np.uint8)
+    lut = clip(lut, dtype)
 
-    images = []
-    for i in range(num_channels):
-        func = maybe_process_in_chunks(cv2.LUT, lut=lut[:, i])
-        images.append(func(img[:, :, i]))
+    images = [cv2.LUT(img[:, :, i], lut[:, i]) for i in range(num_channels)]
     return np.stack(images, axis=-1)
 
 
 def multiply(img: np.ndarray, multiplier: Union[Sequence[float], float]) -> np.ndarray:
-    if img.dtype == np.uint8:
-        return _multiply_uint8_optimized(img, multiplier)
+    num_channels = get_num_channels(img)
+    if num_channels == 1 and isinstance(multiplier, Sequence):
+        multiplier = multiplier[0]
+
+    if img.dtype == np.uint8 and num_channels <= MAX_OPENCV_WORKING_CHANNELS:
+        return _multiply_uint_optimized(img, multiplier)
 
-    return _multiply_non_uint8_optimized(img, multiplier)
+    return _multiply_non_uint_optimized(img, multiplier)
diff --git a/albucore/utils.py b/albucore/utils.py
@@ -10,28 +10,33 @@
 FOUR = 4
 TWO = 2
 
+MAX_OPENCV_WORKING_CHANNELS = 4
+
 P = ParamSpec("P")
 
 MAX_VALUES_BY_DTYPE = {
     np.dtype("uint8"): 255,
     np.dtype("uint16"): 65535,
     np.dtype("uint32"): 4294967295,
+    np.dtype("float16"): 1.0,
     np.dtype("float32"): 1.0,
+    np.dtype("float64"): 1.0,
     np.uint8: 255,
     np.uint16: 65535,
     np.uint32: 4294967295,
+    np.float16: 1.0,
     np.float32: 1.0,
+    np.float64: 1.0,
 }
 
 NPDTYPE_TO_OPENCV_DTYPE = {
     np.uint8: cv2.CV_8U,
     np.uint16: cv2.CV_16U,
-    np.int32: cv2.CV_32S,
     np.float32: cv2.CV_32F,
     np.float64: cv2.CV_64F,
     np.dtype("uint8"): cv2.CV_8U,
     np.dtype("uint16"): cv2.CV_16U,
-    np.dtype("int32"): cv2.CV_32S,
+    np.dtype("float16"): cv2.CV_16F,
     np.dtype("float32"): cv2.CV_32F,
     np.dtype("float64"): cv2.CV_64F,
 }
@@ -57,7 +62,7 @@ def maybe_process_in_chunks(
     @wraps(process_fn)
     def __process_fn(img: np.ndarray) -> np.ndarray:
         num_channels = get_num_channels(img)
-        if num_channels > FOUR:
+        if num_channels > MAX_OPENCV_WORKING_CHANNELS:
             chunks = []
             for index in range(0, num_channels, 4):
                 if num_channels - index == TWO:

diff --git a/benchmark.sh b/benchmark.sh
@@ -1,16 +1,16 @@
 #!/bin/bash
 
 # Define the array of channel values
-channels=(1 3 7)
+channels=(1 3 5)
 
 # Define the array of image types
-types=("float32" "uint8")
+types=("uint8" "float32")
 
 # Loop over each channel
 for ch in "${channels[@]}"; do
     # Nested loop over each image type
     for type in "${types[@]}"; do
         # Command to run your program, e.g., a Python script
-        python -m benchmark.benchmark --num_channels $ch --img_type $type --markdown -n 1000 --show-std -r 10
+        python -m benchmark.benchmark --num_channels $ch --img_type $type --markdown -n 1000 --show-std -r 5
     done
 done
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -3,17 +3,18 @@
 import os
 import random
 import sys
-from collections import defaultdict
 from importlib.metadata import PackageNotFoundError, version
+from pathlib import Path
 from timeit import Timer
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional
 
 import cv2
 import numpy as np
 import pandas as pd
+from tqdm import tqdm
 
 import albucore
-from albucore.utils import MAX_VALUES_BY_DTYPE, NPDTYPE_TO_OPENCV_DTYPE
+from albucore.utils import MAX_VALUES_BY_DTYPE, NPDTYPE_TO_OPENCV_DTYPE, clip
 from benchmark.utils import MarkdownGenerator, format_results, get_markdown_table
 
 cv2.setNumThreads(0)
@@ -36,7 +37,13 @@ def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Augmentation libraries performance benchmark")
     parser.add_argument("-n", "--num_images", default=10, type=int, help="number of images to test")
     parser.add_argument("-c", "--num_channels", default=3, type=int, help="number of channels in the images")
-    parser.add_argument("-t", "--img_type", choices=["float32", "uint8"], type=str, help="image type for benchmarking")
+    parser.add_argument(
+        "-t",
+        "--img_type",
+        choices=["float32", "float64", "uint8", "uint16"],
+        type=str,
+        help="image type for benchmarking",
+    )
     parser.add_argument("-r", "--runs", default=5, type=int, metavar="N", help="number of runs for each benchmark")
     parser.add_argument(
         "--show-std", dest="show_std", action="store_true", help="show standard deviation for benchmark runs"
@@ -68,10 +75,10 @@ def albucore(self, img: np.ndarray) -> np.ndarray:
         return self.albucore_transform(img)
 
     def opencv(self, img: np.ndarray) -> np.ndarray:
-        return self.opencv_transform(img)
+        return clip(self.opencv_transform(img), img.dtype)
 
     def numpy(self, img: np.ndarray) -> np.ndarray:
-        return self.numpy_transform(img)
+        return clip(self.numpy_transform(img), img.dtype)
 
     def is_supported_by(self, library: str) -> bool:
         library_attr_map = {"albucore": "albucore_transform", "opencv": "opencv_transform", "numpy": "numpy_transform"}
@@ -109,8 +116,7 @@ def albucore_transform(self, img: np.ndarray) -> np.ndarray:
         return albucore.multiply(img, self.multiplier)
 
     def numpy_transform(self, img: np.ndarray) -> np.ndarray:
-        result = img * self.multiplier
-        return np.clip(result, 0, MAX_VALUES_BY_DTYPE[img.dtype]).astype(img.dtype)
+        return img * self.multiplier
 
     def opencv_transform(self, img: np.ndarray) -> Optional[np.ndarray]:
         return cv2.multiply(img, self.multiplier, dtype=NPDTYPE_TO_OPENCV_DTYPE[img.dtype])
@@ -124,60 +130,70 @@ def albucore_transform(self, img: np.ndarray) -> np.ndarray:
     def numpy_transform(self, img: np.ndarray) -> np.ndarray:
         multiplier = np.array([1.5] * self.num_channels)
 
-        result = img * multiplier
-        return np.clip(result, 0, MAX_VALUES_BY_DTYPE[img.dtype]).astype(img.dtype)
+        return img * multiplier
 
     def opencv_transform(self, img: np.ndarray) -> np.ndarray:
         multiplier = np.array([1.5] * self.num_channels)
         return cv2.multiply(img, multiplier, dtype=NPDTYPE_TO_OPENCV_DTYPE[img.dtype])
 
 
+def get_images(num_images: int, height: int, width: int, num_channels: int, dtype: str) -> List[np.ndarray]:
+    if dtype in {"float32", "float64"}:
+        return [rng.random((height, width, num_channels), dtype=np.dtype(dtype)) for _ in range(num_images)]
+    if dtype in {"uint8", "uint16"}:
+        return [
+            rng.integers(0, MAX_VALUES_BY_DTYPE[np.dtype(dtype)] + 1, (height, width, num_channels), dtype=dtype)
+            for _ in range(num_images)
+        ]
+    raise ValueError(f"Invalid image type {dtype}")
+
+
 def main() -> None:
     args = parse_args()
     package_versions = get_package_versions()
 
     num_channels = args.num_channels
     num_images = args.num_images
 
-    height, width = 256, 256
+    height, width = 512, 512
 
     if args.print_package_versions:
         print(get_markdown_table(package_versions))
 
-    images_per_second: Dict[str, Dict[str, Any]] = defaultdict(dict)
-
-    if args.img_type == "float32":
-        # Using the new Generator to create float32 images
-        imgs = [rng.random((height, width, num_channels), dtype=np.float32) for _ in range(num_images)]
-    elif args.img_type == "uint8":
-        # Using the new Generator to create uint8 images
-        imgs = [rng.integers(0, 256, (height, width, num_channels), dtype=np.uint8) for _ in range(num_images)]
-    else:
-        raise ValueError("Invalid image type")
+    imgs = get_images(num_images, height, width, num_channels, args.img_type)
 
     benchmark_class_names = [MultiplyConstant, MultiplyVector]
 
     libraries = DEFAULT_BENCHMARKING_LIBRARIES
 
-    for benchmark_class in benchmark_class_names:
-        shuffled_libraries = copy.deepcopy(libraries)
-        random.shuffle(shuffled_libraries)
+    images_per_second = {lib: {} for lib in libraries}
+    to_skip = {lib: {} for lib in libraries}
 
+    for benchmark_class in tqdm(benchmark_class_names, desc="Running benchmarks"):
         benchmark = benchmark_class(num_channels)
 
-        for library in shuffled_libraries:
-            if benchmark.is_supported_by(library):
-                timer = Timer(lambda: benchmark.run(library, imgs))
-                try:
-                    run_times = timer.repeat(number=1, repeat=args.runs)
-                    benchmark_images_per_second = [1 / (run_time / num_images) for run_time in run_times]
-                except Exception as e:
-                    print(f"Error running benchmark for {library}: {e!s}")
-                    benchmark_images_per_second = None  # Handling cases where `run` returns None
-            else:
-                benchmark_images_per_second = None
-
-            images_per_second[library][str(benchmark)] = benchmark_images_per_second
+        for library in libraries:
+            images_per_second[library][str(benchmark)] = []
+
+        for _ in range(args.runs):
+            shuffled_libraries = copy.deepcopy(libraries)
+            random.shuffle(shuffled_libraries)
+
+            for library in shuffled_libraries:
+                if benchmark.is_supported_by(library) and not to_skip[library].get(str(benchmark), False):
+                    timer = Timer(lambda lib=library: benchmark.run(lib, imgs))
+                    try:
+                        run_times = timer.repeat(number=1, repeat=1)
+                        benchmark_images_per_second = [1 / (run_time / num_images) for run_time in run_times]
+                    except Exception as e:
+                        print(f"Error running benchmark for {library}: {e}")
+                        benchmark_images_per_second = [None]
+                        images_per_second[library][str(benchmark)].extend(benchmark_images_per_second)
+                        to_skip[library][str(benchmark)] = True
+                else:
+                    benchmark_images_per_second = [None]
+
+                images_per_second[library][str(benchmark)].extend(benchmark_images_per_second)
 
     pd.set_option("display.width", 1000)
     df = pd.DataFrame.from_dict(images_per_second)
@@ -189,14 +205,12 @@ def main() -> None:
     df = df[DEFAULT_BENCHMARKING_LIBRARIES]
 
     if args.markdown:
-        print()
-        print(
-            f"## Benchmark results for {num_images} images of {args.img_type} "
-            f"type with ({height}, {width}, {num_channels})"
-        )
-        print()
+        results_dir = Path(__file__).parent / "results"
+        results_dir.mkdir(parents=True, exist_ok=True)
+        file_path = results_dir / f"{args.img_type}_{num_channels}.md"
         markdown_generator = MarkdownGenerator(df, package_versions, num_images)
-        markdown_generator.print_markdown_table()
+        markdown_generator.save_markdown_table(file_path)
+        print(f"Benchmark results saved to {file_path}")
 
 
 if __name__ == "__main__":

diff --git a/benchmark/results/float32_1.md b/benchmark/results/float32_1.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**562 ± 27**                    |**517 ± 63**                     |**580 ± 37**                  |
+|MultiplyVector  |**466 ± 16**                    |**470 ± 44**                     |**486 ± 28**                  |
diff --git a/benchmark/results/float32_3.md b/benchmark/results/float32_3.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**759 ± 134**                   |497 ± 101                        |**716 ± 244**                 |
+|MultiplyVector  |**508 ± 93**                    |**543 ± 99**                     |264 ± 12                      |
diff --git a/benchmark/results/float32_5.md b/benchmark/results/float32_5.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**208 ± 60**                    |-                                |**221 ± 29**                  |
+|MultiplyVector  |**59 ± 55**                     |-                                |**56 ± 46**                   |
diff --git a/benchmark/results/uint8_1.md b/benchmark/results/uint8_1.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**3082 ± 1030**                 |1471 ± 219                       |1203 ± 363                    |
+|MultiplyVector  |**2763 ± 385**                  |1449 ± 225                       |1204 ± 108                    |
diff --git a/benchmark/results/uint8_3.md b/benchmark/results/uint8_3.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**504 ± 51**                    |331 ± 60                         |274 ± 28                      |
+|MultiplyVector  |**859 ± 350**                   |**770 ± 201**                    |283 ± 82                      |
diff --git a/benchmark/results/uint8_5.md b/benchmark/results/uint8_5.md
@@ -0,0 +1,4 @@
+|                |albucore<br><small>0.0.1</small>|opencv<br><small>4.9.0.80</small>|numpy<br><small>1.24.4</small>|
+|----------------|--------------------------------|---------------------------------|------------------------------|
+|MultiplyConstant|**318 ± 123**                   |-                                |**333 ± 110**                 |
+|MultiplyVector  |**175 ± 52**                    |-                                |**160 ± 46**                  |