Merge branch 'develop' into zm/fix-dataset-transforms

zhiltsov-max · web-flow · commit b7d527efc45a · 2024-06-05T19:31:56.000+03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -62,13 +62,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/cvat-ai/datumaro/pull/17>)
 - Annotation matching algorithm in `datumaro.components.operations.match_segments()`
   (<https://github.com/cvat-ai/datumaro/pull/30>)
+- Automatic detection of `is_crowd` parameter is disabled in
+  `segment_iou()`, added a separate function argument
+  (turned off by default)
+  (<https://github.com/cvat-ai/datumaro/pull/41>)
 
 ### Deprecated
 - `--save-images` is replaced with `--save-media` in CLI and converter API
   (<https://github.com/openvinotoolkit/datumaro/pull/539>)
 - \[API\] `image`, `point_cloud` and `related_images` of `DatasetItem` are
   replaced with `media` and `media_as(type)` members and c-tor parameters
   (<https://github.com/openvinotoolkit/datumaro/pull/539>)
+- \[API\] `datumaro.util.annotation_util._get_bbox()` is renamed into `get_bbox()`
+  (<https://github.com/cvat-ai/datumaro/pull/41>)
 
 ### Removed
 - TBD
@@ -86,6 +92,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/cvat-ai/datumaro/pull/34>)
 - Added missing `PointCloud` media type in the datumaro module namespace
   (<https://github.com/cvat-ai/datumaro/pull/34>)
+- Incorrect computation of binary mask bbox (missed 1 pixel of the size)
+  (<https://github.com/cvat-ai/datumaro/pull/41>)
 - `Dataset.get()` could ignore existing transforms in the dataset
   (<https://github.com/cvat-ai/datumaro/pull/45>)
 
diff --git a/datumaro/util/annotation_util.py b/datumaro/util/annotation_util.py
@@ -1,18 +1,34 @@
 # Copyright (C) 2020-2021 Intel Corporation
+# Copyright (C) 2024 CVAT.ai Corporation
 #
 # SPDX-License-Identifier: MIT
 
+import warnings
 from itertools import groupby
 from typing import Callable, Dict, Iterable, NewType, Optional, Sequence, Tuple, Union
 
 import numpy as np
 from typing_extensions import Literal
 
-from datumaro.components.annotation import AnnotationType, LabelCategories, Mask, RleMask, _Shape
+from datumaro.components.annotation import (
+    Annotation,
+    AnnotationType,
+    LabelCategories,
+    Mask,
+    RleMask,
+    _Shape,
+)
 from datumaro.util.mask_tools import mask_to_rle
 
+BboxCoords = Tuple[float, float, float, float]
+"A tuple of bounding box coordinates, (x, y, w, h)"
+
+Shape = NewType("Shape", _Shape)
+
+SpatialAnnotation = Union[Shape, Mask]
 
-def find_instances(instance_anns):
+
+def find_instances(instance_anns: Sequence[Annotation]) -> Sequence[Sequence[Annotation]]:
     instance_anns = sorted(instance_anns, key=lambda a: a.group)
     ann_groups = []
     for g_id, group in groupby(instance_anns, lambda a: a.group):
@@ -24,22 +40,22 @@ def find_instances(instance_anns):
     return ann_groups
 
 
-def find_group_leader(group):
+def find_group_leader(group: Sequence[SpatialAnnotation]) -> SpatialAnnotation:
     return max(group, key=lambda x: x.get_area())
 
 
-BboxCoords = Tuple[float, float, float, float]
-Shape = NewType("Shape", _Shape)
-SpatialAnnotation = Union[Shape, Mask]
-
+def get_bbox(ann: Union[Sequence, BboxCoords, SpatialAnnotation]) -> BboxCoords:
+    "An utility function to get a bbox of the bbox-like annotation"
 
-def _get_bbox(ann: Union[Sequence, SpatialAnnotation]) -> BboxCoords:
-    if isinstance(ann, (_Shape, Mask)):
+    if hasattr(ann, "get_bbox"):
         return ann.get_bbox()
     elif hasattr(ann, "__len__") and len(ann) == 4:
         return ann
     else:
-        raise ValueError("The value of type '%s' can't be treated as a " "bounding box" % type(ann))
+        raise ValueError("The value of type '%s' can't be treated as a bounding box" % type(ann))
+
+
+_deprecated_get_bbox = get_bbox  # backward compatibility
 
 
 def max_bbox(annotations: Iterable[Union[BboxCoords, SpatialAnnotation]]) -> BboxCoords:
@@ -50,7 +66,7 @@ def max_bbox(annotations: Iterable[Union[BboxCoords, SpatialAnnotation]]) -> Bbo
       bbox (tuple): (x, y, w, h)
     """
 
-    boxes = [_get_bbox(ann) for ann in annotations]
+    boxes = [get_bbox(ann) for ann in annotations]
     x0 = min((b[0] for b in boxes), default=0)
     y0 = min((b[1] for b in boxes), default=0)
     x1 = max((b[0] + b[2] for b in boxes), default=0)
@@ -67,7 +83,7 @@ def mean_bbox(annotations: Iterable[Union[BboxCoords, SpatialAnnotation]]) -> Bb
     """
 
     le = len(annotations)
-    boxes = [_get_bbox(ann) for ann in annotations]
+    boxes = [get_bbox(ann) for ann in annotations]
     mlb = sum(b[0] for b in boxes) / le
     mtb = sum(b[1] for b in boxes) / le
     mrb = sum(b[0] + b[2] for b in boxes) / le
@@ -101,12 +117,15 @@ def nms(segments, iou_thresh=0.5):
     return predictions
 
 
-def bbox_iou(a, b) -> Union[Literal[-1], float]:
+def bbox_iou(
+    a: Union[SpatialAnnotation, BboxCoords],
+    b: Union[SpatialAnnotation, BboxCoords],
+) -> Union[Literal[-1], float]:
     """
     IoU computations for simple cases with bounding boxes
     """
-    bbox_a = _get_bbox(a)
-    bbox_b = _get_bbox(b)
+    bbox_a = get_bbox(a)
+    bbox_b = get_bbox(b)
 
     aX, aY, aW, aH = bbox_a
     bX, bY, bW, bH = bbox_b
@@ -127,23 +146,39 @@ def bbox_iou(a, b) -> Union[Literal[-1], float]:
     return intersection / union
 
 
-def segment_iou(a, b):
+def segment_iou(
+    gt_ann: SpatialAnnotation,
+    ds_ann: SpatialAnnotation,
+    *,
+    is_crowd: Union[bool, str] = False,
+) -> float:
     """
     Generic IoU computation with masks, polygons, and boxes.
-    Returns -1 if no intersection, [0; 1] otherwise
+
+    Parameters:
+        is_crowd - bool or GT annotation attribute name - if true, consider
+            the GT annotation a crowd, so that the DS annotation is excluded
+            from the denominator of the IoU formula, i.e. it becomes I / GT area.
+            This is useful if you want to check a specific object to be within a crowd,
+            where the crowd ob objects is annotated by a single GT mask.
+
+    Returns: -1 if no intersection, [0; 1] otherwise
     """
     from pycocotools import mask as mask_utils
 
-    a_bbox = list(a.get_bbox())
-    b_bbox = list(b.get_bbox())
+    gt_bbox = list(gt_ann.get_bbox())
+    ds_bbox = list(ds_ann.get_bbox())
+
+    if isinstance(is_crowd, str):
+        is_crowd = gt_ann.attributes.get(is_crowd, False) is True
 
-    is_bbox = AnnotationType.bbox in [a.type, b.type]
+    is_bbox = AnnotationType.bbox in [gt_ann.type, ds_ann.type]
     if is_bbox:
-        a = [a_bbox]
-        b = [b_bbox]
+        gt_ann = [gt_bbox]
+        ds_ann = [ds_bbox]
     else:
-        w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2])
-        h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3])
+        w = max(gt_bbox[0] + gt_bbox[2], ds_bbox[0] + ds_bbox[2])
+        h = max(gt_bbox[1] + gt_bbox[3], ds_bbox[1] + ds_bbox[3])
 
         def _to_rle(ann):
             if ann.type == AnnotationType.polygon:
@@ -153,11 +188,12 @@ def _to_rle(ann):
             elif ann.type == AnnotationType.mask:
                 return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w)
             else:
-                raise TypeError("Unexpected arguments: %s, %s" % (a, b))
+                raise TypeError("Unexpected arguments: %s, %s" % (gt_ann, ds_ann))
 
-        a = _to_rle(a)
-        b = _to_rle(b)
-    return float(mask_utils.iou(a, b, [not is_bbox]).item())
+        gt_ann = _to_rle(gt_ann)
+        ds_ann = _to_rle(ds_ann)
+
+    return float(mask_utils.iou(gt_ann, ds_ann, [is_crowd]).item())
 
 
 def PDJ(a, b, eps=None, ratio=0.05, bbox=None):
@@ -270,7 +306,7 @@ def make_label_id_mapping(
     Returns:
 
     |   map_id (callable): src id -> dst id
-    |   id_mapping (dict): src id -> dst i
+    |   id_mapping (dict): src id -> dst id
     |   src_labels (dict): src id -> src label
     |   dst_labels (dict): dst id -> dst label
     """
@@ -286,3 +322,13 @@ def map_id(src_id):
         return id_mapping.get(src_id, fallback)
 
     return map_id, id_mapping, source_labels, target_labels
+
+
+def __getattr__(name: str):
+    if name is "_get_bbox":
+        warnings.warn(
+            "_get_bbox() is deprecated, please use get_bbox() instead", category=DeprecationWarning
+        )
+        return _deprecated_get_bbox
+
+    return globals().get(name)
diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
@@ -367,9 +367,13 @@ def rles_to_mask(rles: Sequence[Union[CompressedRle, Polygon]], width, height) -
 def find_mask_bbox(mask: BinaryMask) -> BboxCoords:
     cols = np.any(mask, axis=0)
     rows = np.any(mask, axis=1)
+    has_pixels = np.any(cols)
+    if not has_pixels:
+        return BboxCoords(0, 0, 0, 0)
+
     x0, x1 = np.where(cols)[0][[0, -1]]
     y0, y1 = np.where(rows)[0][[0, -1]]
-    return BboxCoords(x0, y0, x1 - x0, y1 - y0)
+    return BboxCoords(x0, y0, x1 - x0 + 1, y1 - y0 + 1)
 
 
 def merge_masks(
diff --git a/tests/test_annotation_util.py b/tests/test_annotation_util.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 CVAT.ai Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import numpy as np
+import pytest
+
+from datumaro.components.annotation import Bbox, Mask, Polygon
+from datumaro.util.annotation_util import SpatialAnnotation, get_bbox, segment_iou
+
+from .requirements import Requirements, mark_requirement
+
+
+class SegmentIouTest:
+    @pytest.mark.parametrize(
+        "a, b, expected_iou",
+        [
+            (Bbox(0, 0, 2, 2), Bbox(0, 0, 2, 1), 0.5),  # nested
+            (Bbox(0, 0, 2, 2), Bbox(1, 0, 2, 2), 1 / 3),  # partially intersecting
+            (Bbox(0, 0, 2, 2), Polygon([0, 0, 0, 1, 1, 1, 1, 0]), 0.25),
+            (Polygon([0, 0, 0, 2, 2, 2, 2, 0]), Polygon([1, 0, 3, 0, 3, 2, 1, 2]), 1 / 3),
+            (Bbox(0, 0, 2, 2), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 1 / 3),
+            (Mask(np.array([[1, 1, 0], [1, 1, 0]])), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 1 / 3),
+            (Polygon([0, 0, 0, 2, 2, 2, 2, 0]), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 1 / 3),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_segment_iou_can_match_shapes(
+        self, a: SpatialAnnotation, b: SpatialAnnotation, expected_iou: float
+    ):
+        assert expected_iou == segment_iou(a, b)
+
+    @pytest.mark.parametrize(
+        "a, b, expected_iou",
+        [
+            (Bbox(0, 0, 2, 2), Bbox(0, 0, 2, 1), 0.5),  # nested
+            (Bbox(0, 0, 2, 2), Bbox(1, 0, 2, 2), 0.5),  # partially intersecting
+            (Bbox(0, 0, 2, 2), Polygon([0, 0, 0, 1, 1, 1, 1, 0]), 0.25),
+            (Polygon([0, 0, 0, 2, 2, 2, 2, 0]), Polygon([1, 0, 3, 0, 3, 2, 1, 2]), 0.5),
+            (Bbox(0, 0, 2, 2), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 0.5),
+            (Mask(np.array([[1, 1, 0], [1, 1, 0]])), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 0.5),
+            (Polygon([0, 0, 0, 2, 2, 2, 2, 0]), Mask(np.array([[0, 1, 1], [0, 1, 1]])), 0.5),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_segment_iou_can_match_shapes_as_crowd(
+        self, a: SpatialAnnotation, b: SpatialAnnotation, expected_iou: float
+    ):
+        # In this mode, intersection is divided by the GT object area
+        assert expected_iou == segment_iou(a, b, is_crowd=True)
+
+    @pytest.mark.parametrize(
+        "a, b, expected_iou",
+        [
+            (Bbox(0, 0, 2, 2, attributes={"is_crowd": True}), Bbox(1, 0, 2, 2), 0.5),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_segment_iou_can_get_is_crowd_from_attribute(
+        self, a: SpatialAnnotation, b: SpatialAnnotation, expected_iou: float
+    ):
+        # In this mode, intersection is divided by the GT object area
+        assert expected_iou == segment_iou(a, b, is_crowd="is_crowd")
+
+
+@pytest.mark.parametrize(
+    "obj, expected_bbox",
+    [
+        ((0, 1, 3, 4), (0, 1, 3, 4)),
+        (Bbox(0, 0, 2, 2), (0, 0, 2, 2)),
+        (Polygon([0, 0, 0, 1, 1, 1, 1, 0]), (0, 0, 1, 1)),  # polygons don't include the last pixel
+        (Polygon([1, 0, 3, 0, 3, 2, 1, 2]), (1, 0, 2, 2)),
+        (Mask(np.array([[0, 1, 1], [0, 1, 1]])), (1, 0, 2, 2)),
+    ],
+)
+@mark_requirement(Requirements.DATUM_GENERAL_REQ)
+def test_can_get_bbox(obj, expected_bbox):
+    assert expected_bbox == tuple(get_bbox(obj))
diff --git a/tests/test_masks.py b/tests/test_masks.py
@@ -1,9 +1,11 @@
 from unittest import TestCase
 
 import numpy as np
+import pytest
 
 import datumaro.util.mask_tools as mask_tools
 from datumaro.components.annotation import CompiledMask
+from datumaro.util.annotation_util import BboxCoords
 
 from .requirements import Requirements, mark_requirement
 
@@ -461,3 +463,16 @@ def test_can_decode_compiled_mask(self):
         labels = compiled_mask.get_instance_labels()
 
         self.assertEqual({instance_idx: class_idx}, labels)
+
+
+class MaskTest:
+    @pytest.mark.parametrize(
+        "mask, expected_bbox",
+        [
+            (np.array([[0, 1, 1], [0, 1, 1]]), [1, 0, 2, 2]),
+            (np.array([[0, 0, 0], [0, 0, 0]]), [0, 0, 0, 0]),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_find_mask_bbox(self, mask: mask_tools.BinaryMask, expected_bbox: BboxCoords):
+        assert tuple(expected_bbox) == mask_tools.find_mask_bbox(mask)
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
@@ -352,7 +352,7 @@ def test_shapes_to_boxes(self):
                     id=1,
                     media=Image(data=np.zeros((5, 5, 3))),
                     annotations=[
-                        Bbox(0, 0, 4, 4, id=1),
+                        Bbox(0, 0, 5, 5, id=1),
                         Bbox(1, 1, 3, 3, id=2),
                         Bbox(1, 1, 1, 1, id=3),
                         Bbox(2, 2, 2, 2, id=4),
diff --git a/tests/test_validator.py b/tests/test_validator.py
@@ -1,4 +1,5 @@
 # Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2024 CVAT.ai Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -967,17 +968,17 @@ def test_validate_annotations_segmentation(self):
             report_types = [r["anomaly_type"] for r in actual_reports]
             count_by_type = Counter(report_types)
 
-            self.assertEqual(len(actual_reports), 24)
+            self.assertEqual(len(actual_reports), 25)
             self.assertEqual(count_by_type["ImbalancedDistInLabel"], 0)
-            self.assertEqual(count_by_type["ImbalancedDistInAttribute"], 13)
+            self.assertEqual(count_by_type["ImbalancedDistInAttribute"], 14)
             self.assertEqual(count_by_type["MissingAnnotation"], 1)
             self.assertEqual(count_by_type["UndefinedLabel"], 2)
             self.assertEqual(count_by_type["FewSamplesInAttribute"], 4)
             self.assertEqual(count_by_type["UndefinedAttribute"], 4)
 
         with self.subTest("Test of summary", i=2):
             actual_summary = actual_results["summary"]
-            expected_summary = {"errors": 6, "warnings": 18}
+            expected_summary = {"errors": 6, "warnings": 19}
 
             self.assertEqual(actual_summary, expected_summary)