Merge branch 'develop'

pythonlessons · Mar 21, 2024 · 4bc9edc · 4bc9edc
2 parents 42aa5c8 + b4181cc
commit 4bc9edc
Show file tree

Hide file tree

Showing 10 changed files with 170 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,12 @@
+## [1.2.4] - 2024-03-21
+### Added
+- Added `RandomElasticTransform` to `mltu.augmentors` to work with `Image` objects
+- Added `xyxy_abs` to `mltu.annotations.detections.Detection` object to return absolute bounding boxes
+
+### Changes
+- Changed `ImageShowCV2` transformer in `mltu.transformers` to display images when running with multiple threads
+
+
 ## [1.2.3] - 2024-03-17
 ### Added
 - Added Tutorial how to run YOLOv8 pretrained Object Detection model `Tutorials.11_Yolov8.README.md`

diff --git a/Tutorials/02_captcha_to_text/train.py b/Tutorials/02_captcha_to_text/train.py
@@ -86,11 +86,11 @@ def download_and_unzip(url, extract_to="Datasets"):
 os.makedirs(configs.model_path, exist_ok=True)
 
 # Define callbacks
-earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1)
+earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1, mode="min")
 checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
 trainLogger = TrainLogger(configs.model_path)
 tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
-reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="auto")
+reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="min")
 model2onnx = Model2onnx(f"{configs.model_path}/model.h5")
 
 # Train the model

diff --git a/Tutorials/11_Yolov8/README.md b/Tutorials/11_Yolov8/README.md
@@ -105,7 +105,6 @@ onnx.save(onnx_model, "yolov8m.onnx")
 ## Run the YOLOv8 ONNX model with ONNX Runtime:
 ```python
 import cv2
-from ultralytics.engine.model import Model as BaseModel
 from mltu.torch.yolo.detectors.onnx_detector import Detector as OnnxDetector
 
 input_width, input_height = 640, 640

diff --git a/mltu/__init__.py b/mltu/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.3"
+__version__ = "1.2.4"
 
 from .annotations.images import Image
 from .annotations.images import CVImage

diff --git a/mltu/annotations/detections.py b/mltu/annotations/detections.py
@@ -82,6 +82,10 @@ def xywh(self, xywh: np.ndarray):
     @property
     def xyxy(self):
         return self._xyxy
+
+    @property
+    def xyxy_abs(self):
+        return (self.xyxy * np.array([self.width, self.height, self.width, self.height])).astype(int)
 
     @staticmethod
     def xywh2xyxy(xywh: np.ndarray):
@@ -275,6 +279,9 @@ def validate(self):
         if isinstance(self.labels, list):
             self.labels = {i: label for i, label in enumerate(self.labels)}
 
+        if not self.labels:
+            self.labels = {k: v for k, v in enumerate(sorted(set([detection.label for detection in self.detections])))}
+
     def applyToFrame(self, image: np.ndarray, **kwargs: dict) -> np.ndarray:
         """ Draw the detections on the image """
         for detection in self.detections:

diff --git a/mltu/augmentors.py b/mltu/augmentors.py
@@ -22,6 +22,7 @@
 - RandomMosaic
 - RandomZoom
 - RandomColorMode
+- RandomElasticTransform
 
 Implemented audio augmentors:
 - RandomAudioNoise
@@ -494,14 +495,14 @@ def __init__(
         self, 
         random_chance: float = 0.5,
         log_level: int = logging.INFO,
-        augment_annotation: bool = False,
+        augment_annotation: bool = True,
         ) -> None:
         """ Randomly mirror image
         
         Args:
             random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
             log_level (int): Log level for the augmentor. Defaults to logging.INFO.
-            augment_annotation (bool): Whether to augment the annotation. Defaults to False.
+            augment_annotation (bool): Whether to augment the annotation. Defaults to True.
         """
         super(RandomMirror, self).__init__(random_chance, log_level, augment_annotation)
 
@@ -534,14 +535,14 @@ def __init__(
         self, 
         random_chance: float = 0.5,
         log_level: int = logging.INFO,
-        augment_annotation: bool = False,
+        augment_annotation: bool = True,
         ) -> None:
         """ Randomly mirror image
         
         Args:
             random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
             log_level (int): Log level for the augmentor. Defaults to logging.INFO.
-            augment_annotation (bool): Whether to augment the annotation. Defaults to False.
+            augment_annotation (bool): Whether to augment the annotation. Defaults to True.
         """
         super(RandomFlip, self).__init__(random_chance, log_level, augment_annotation)
 
@@ -839,6 +840,110 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
         return image, annotation
 
 
+class RandomElasticTransform(Augmentor):
+    """ Randomly apply elastic transform to an image
+    
+    Attributes:
+        random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
+        alpha_range (tuple): Tuple of 2 floats, setting bounds for random alpha value. Defaults to (0, 0.1).
+        sigma_range (tuple): Tuple of 2 floats, setting bounds for random sigma value. Defaults to (0.01, 0.02).
+        log_level (int): Log level for the augmentor. Defaults to logging.INFO.
+        augment_annotation (bool): Whether to augment the annotation. Defaults to False.
+    """
+    def __init__(
+        self, 
+        random_chance: float = 0.5,
+        alpha_range: tuple = (0, 0.1),
+        sigma_range: tuple = (0.01, 0.02),
+        log_level: int = logging.INFO,
+        augment_annotation: bool = True,
+        ) -> None:
+        super(RandomElasticTransform, self).__init__(random_chance, log_level, augment_annotation)
+        self.alpha_range = alpha_range
+        self.sigma_range = sigma_range
+
+    @staticmethod
+    def elastic_transform(image: np.ndarray, alpha: float, sigma: float) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """ Apply elastic transform to an image
+
+        Args:
+            image (np.ndarray): Image to be used for elastic transform
+            alpha (float): Alpha value for elastic transform
+            sigma (float): Sigma value for elastic transform
+
+        Returns:
+            remap_fn (np.ndarray): Elastic transformed image
+            dx (np.ndarray): X-axis displacement
+            dy (np.ndarray): Y-axis displacement
+        """
+        height, width, channels = image.shape
+        dx = np.random.rand(height, width).astype(np.float32) * 2 - 1
+        dy = np.random.rand(height, width).astype(np.float32) * 2 - 1
+
+        cv2.GaussianBlur(dx, (0, 0), sigma, dst=dx)
+        cv2.GaussianBlur(dy, (0, 0), sigma, dst=dy)
+
+        dx *= alpha
+        dy *= alpha
+
+        x, y = np.meshgrid(np.arange(width), np.arange(height))
+
+        map_x = np.float32(x + dx)
+        map_y = np.float32(y + dy)
+
+        remap_fn = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
+
+        return remap_fn, dx, dy
+
+    @randomness_decorator
+    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
+        """ Randomly apply elastic transform to an image
+
+        Args:
+            image (Image): Image to be used for elastic transform
+            annotation (typing.Any): Annotation to be used for elastic transform
+
+        Returns:
+            image (Image): Elastic transformed image
+            annotation (typing.Any): Elastic transformed annotation if necessary
+        """
+        alpha = image.width * np.random.uniform(*self.alpha_range)
+        sigma = image.width * np.random.uniform(*self.sigma_range)
+        new_image, dx, dy = self.elastic_transform(image.numpy(), alpha, sigma)
+        image.update(new_image)
+
+        if isinstance(annotation, Detections) and self._augment_annotation:
+            detections = []
+            for detection in annotation:
+                x_min, y_min, x_max, y_max = detection.xyxy_abs
+                new_x_min = min(max(0, x_min + dx[y_min, x_min]), image.width - 1)
+                new_y_min = min(max(0, y_min + dy[y_min, x_min]), image.height - 1)
+                new_x_max = min(max(0, x_max + dx[y_max, x_max]), image.width - 1)
+                new_y_max = min(max(0, y_max + dy[y_max, x_max]), image.height - 1)
+                detections.append(
+                    Detection(
+                        [new_x_min, new_y_min, new_x_max, new_y_max],
+                        label=detection.label, 
+                        labels=detection.labels,
+                        confidence=detection.confidence, 
+                        image_path=detection.image_path, 
+                        width=image.width, 
+                        height=image.height,
+                        relative=False,
+                        bbox_type = BboxType.XYXY
+                    )
+                )
+
+            annotation = Detections(
+                labels=annotation.labels,
+                width=image.width,
+                height=image.height,
+                detections=detections
+            )
+
+        return image, annotation
+
+
 class RandomAudioNoise(Augmentor):
     """ Randomly add noise to audio
 

diff --git a/mltu/tensorflow/metrics.py b/mltu/tensorflow/metrics.py
@@ -55,7 +55,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
         self.cer_accumulator.assign_add(tf.reduce_sum(distance))
 
         # Increment the batch_counter by the batch size
-        self.batch_counter.assign_add(len(y_true))
+        self.batch_counter.assign_add(input_shape[0])
 
         # Calculate the number of wrong words in batch and add to wer_accumulator variable
         self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(tf.not_equal(distance, 0), tf.float32)))
@@ -146,7 +146,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
         self.cer_accumulator.assign_add(tf.reduce_sum(distance))
 
         # Increment the batch_counter by the batch size
-        self.batch_counter.assign_add(len(y_true))
+        self.batch_counter.assign_add(input_shape[0])
 
     def result(self):
         """ Computes and returns the metric result.
@@ -253,7 +253,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
         self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(distance, tf.float32)))
 
         # Increment the batch_counter by the batch size
-        self.batch_counter.assign_add(len(y_true))
+        self.batch_counter.assign_add(input_shape[0])
 
     def result(self):
         """Computes and returns the metric result.

diff --git a/mltu/torch/yolo/annotation.py b/mltu/torch/yolo/annotation.py
@@ -10,9 +10,10 @@ class VOCAnnotationReader:
     def __init__(self, labels: dict, images_path: str=None):
         self.labels = labels
         self.images_path = images_path
+        self.dataset_found_labels = {}
 
     @staticmethod
-    def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -> Detections:
+    def readFromVOC(voc_annotation_path: str, labels: dict={}, images_path: str=None) -> Detections:
         annotation_path = Path(voc_annotation_path)
         tree = ET.parse(voc_annotation_path)
         root = tree.getroot()
@@ -49,7 +50,7 @@ def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -
         image_path = os.path.join(images_path, annotation_dict['filename'])
         dets = []
         for obj in annotation_dict['objects']:
-            if obj['name'] not in labels.values():
+            if labels and obj['name'] not in labels.values():
                 print(f"Label {obj['name']} not found in labels")
                 continue
 

diff --git a/mltu/torch/yolo/train_yolo.py b/mltu/torch/yolo/train_yolo.py
@@ -5,7 +5,7 @@
 from mltu.annotations.images import CVImage
 from mltu.transformers import ImageResizer, ImageShowCV2, ImageNormalizer
 from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, \
-    RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic
+    RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic, RandomElasticTransform
 from mltu.torch.model import Model
 from mltu.torch.dataProvider import DataProvider
 from mltu.torch.yolo.annotation import VOCAnnotationReader
@@ -21,6 +21,7 @@
 
 annotations_path = "Datasets/car-plate-detection/annotations"
 
+# Create a dataset from the annotations, the dataset is a list of lists where each list contains the [image path, annotation path]
 dataset = [[None, os.path.join(annotations_path, f)] for f in os.listdir(annotations_path)]
 
 # Make sure torch can see GPU device, it is not recommended to train with CPU
@@ -49,9 +50,6 @@
     numpy=False,
 )
 
-# for b in data_provider:
-#     pass
-
 # split the dataset into train and test
 train_data_provider, val_data_provider = data_provider.split(0.9, shuffle=False)
 
@@ -62,20 +60,14 @@
     RandomSharpen(),
     RandomMirror(),
     RandomFlip(),
+    RandomElasticTransform(),
     RandomGaussianBlur(),
     RandomSaltAndPepper(),
     RandomRotate(angle=10),
     RandomDropBlock(),
     RandomMosaic(),
 ]
 
-# for batch in train_data_provider:
-#     pass
-    # print(batch)
-    # break
-
-
-
 base_model = BaseModel("yolov8n.pt")
 # Create a YOLO model
 model = DetectionModel('yolov8n.yaml', nc=len(labels))

diff --git a/mltu/transformers.py b/mltu/transformers.py
@@ -1,6 +1,9 @@
 import cv2
+import time
+import queue
 import typing
 import logging
+import threading
 import importlib
 import numpy as np
 
@@ -344,6 +347,26 @@ def __init__(
         super(ImageShowCV2, self).__init__(log_level=log_level)
         self.verbose = verbose
         self.name = name
+        self.thread_started = False
+
+    def init_thread(self):
+        if not self.thread_started:
+            self.thread_started = True
+            self.image_queue = queue.Queue()
+
+            # Start a new thread to display the images, so that the main loop could run in multiple threads
+            self.thread = threading.Thread(target=self._display_images)
+            self.thread.start()
+
+    def _display_images(self) -> None:
+        """ Display images in a continuous loop """
+        while True:
+            image, label = self.image_queue.get()
+            if isinstance(label, Image):
+                cv2.imshow(self.name + "Label", label.numpy())
+            cv2.imshow(self.name, image.numpy())
+            cv2.waitKey(0)
+            cv2.destroyAllWindows()
 
     def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typing.Any]:
         """ Show image for visual inspection
@@ -356,6 +379,9 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
             data (np.ndarray): Image data
             label (np.ndarray): Label data (unchanged)
         """
+        # Start cv2 image display thread
+        self.init_thread()
+
         if self.verbose:
             if isinstance(label, (str, int, float)):
                 self.logger.info(f"Label: {label}")
@@ -365,10 +391,12 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
                 img = detection.applyToFrame(np.asarray(image.numpy()))
                 image.update(img)
 
-        cv2.imshow(self.name, image.numpy())
-        if isinstance(label, Image):
-            cv2.imshow(self.name+"Label", label.numpy())
-        cv2.waitKey(0)
-        cv2.destroyAllWindows()
+        # Add image to display queue
+        # Sleep if queue is not empty
+        while not self.image_queue.empty():
+            time.sleep(0.5)
+
+        # Add image to display queue
+        self.image_queue.put((image, label))
 
         return image, label