Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
pythonlessons committed Mar 21, 2024
2 parents 42aa5c8 + b4181cc commit 4bc9edc
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 29 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## [1.2.4] - 2024-03-21
### Added
- Added `RandomElasticTransform` to `mltu.augmentors` to work with `Image` objects
- Added `xyxy_abs` to `mltu.annotations.detections.Detection` object to return absolute bounding boxes

### Changes
- Changed `ImageShowCV2` transformer in `mltu.transformers` to display images when running with multiple threads


## [1.2.3] - 2024-03-17
### Added
- Added Tutorial how to run YOLOv8 pretrained Object Detection model `Tutorials.11_Yolov8.README.md`
Expand Down
4 changes: 2 additions & 2 deletions Tutorials/02_captcha_to_text/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ def download_and_unzip(url, extract_to="Datasets"):
os.makedirs(configs.model_path, exist_ok=True)

# Define callbacks
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1)
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1, mode="min")
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
trainLogger = TrainLogger(configs.model_path)
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="auto")
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="min")
model2onnx = Model2onnx(f"{configs.model_path}/model.h5")

# Train the model
Expand Down
1 change: 0 additions & 1 deletion Tutorials/11_Yolov8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ onnx.save(onnx_model, "yolov8m.onnx")
## Run the YOLOv8 ONNX model with ONNX Runtime:
```python
import cv2
from ultralytics.engine.model import Model as BaseModel
from mltu.torch.yolo.detectors.onnx_detector import Detector as OnnxDetector

input_width, input_height = 640, 640
Expand Down
2 changes: 1 addition & 1 deletion mltu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.2.3"
__version__ = "1.2.4"

from .annotations.images import Image
from .annotations.images import CVImage
Expand Down
7 changes: 7 additions & 0 deletions mltu/annotations/detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def xywh(self, xywh: np.ndarray):
@property
def xyxy(self):
return self._xyxy

@property
def xyxy_abs(self):
return (self.xyxy * np.array([self.width, self.height, self.width, self.height])).astype(int)

@staticmethod
def xywh2xyxy(xywh: np.ndarray):
Expand Down Expand Up @@ -275,6 +279,9 @@ def validate(self):
if isinstance(self.labels, list):
self.labels = {i: label for i, label in enumerate(self.labels)}

if not self.labels:
self.labels = {k: v for k, v in enumerate(sorted(set([detection.label for detection in self.detections])))}

def applyToFrame(self, image: np.ndarray, **kwargs: dict) -> np.ndarray:
""" Draw the detections on the image """
for detection in self.detections:
Expand Down
113 changes: 109 additions & 4 deletions mltu/augmentors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- RandomMosaic
- RandomZoom
- RandomColorMode
- RandomElasticTransform
Implemented audio augmentors:
- RandomAudioNoise
Expand Down Expand Up @@ -494,14 +495,14 @@ def __init__(
self,
random_chance: float = 0.5,
log_level: int = logging.INFO,
augment_annotation: bool = False,
augment_annotation: bool = True,
) -> None:
""" Randomly mirror image
Args:
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
augment_annotation (bool): Whether to augment the annotation. Defaults to True.
"""
super(RandomMirror, self).__init__(random_chance, log_level, augment_annotation)

Expand Down Expand Up @@ -534,14 +535,14 @@ def __init__(
self,
random_chance: float = 0.5,
log_level: int = logging.INFO,
augment_annotation: bool = False,
augment_annotation: bool = True,
) -> None:
""" Randomly mirror image
Args:
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
augment_annotation (bool): Whether to augment the annotation. Defaults to True.
"""
super(RandomFlip, self).__init__(random_chance, log_level, augment_annotation)

Expand Down Expand Up @@ -839,6 +840,110 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
return image, annotation


class RandomElasticTransform(Augmentor):
""" Randomly apply elastic transform to an image
Attributes:
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
alpha_range (tuple): Tuple of 2 floats, setting bounds for random alpha value. Defaults to (0, 0.1).
sigma_range (tuple): Tuple of 2 floats, setting bounds for random sigma value. Defaults to (0.01, 0.02).
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
"""
def __init__(
self,
random_chance: float = 0.5,
alpha_range: tuple = (0, 0.1),
sigma_range: tuple = (0.01, 0.02),
log_level: int = logging.INFO,
augment_annotation: bool = True,
) -> None:
super(RandomElasticTransform, self).__init__(random_chance, log_level, augment_annotation)
self.alpha_range = alpha_range
self.sigma_range = sigma_range

@staticmethod
def elastic_transform(image: np.ndarray, alpha: float, sigma: float) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
""" Apply elastic transform to an image
Args:
image (np.ndarray): Image to be used for elastic transform
alpha (float): Alpha value for elastic transform
sigma (float): Sigma value for elastic transform
Returns:
remap_fn (np.ndarray): Elastic transformed image
dx (np.ndarray): X-axis displacement
dy (np.ndarray): Y-axis displacement
"""
height, width, channels = image.shape
dx = np.random.rand(height, width).astype(np.float32) * 2 - 1
dy = np.random.rand(height, width).astype(np.float32) * 2 - 1

cv2.GaussianBlur(dx, (0, 0), sigma, dst=dx)
cv2.GaussianBlur(dy, (0, 0), sigma, dst=dy)

dx *= alpha
dy *= alpha

x, y = np.meshgrid(np.arange(width), np.arange(height))

map_x = np.float32(x + dx)
map_y = np.float32(y + dy)

remap_fn = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)

return remap_fn, dx, dy

@randomness_decorator
def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
""" Randomly apply elastic transform to an image
Args:
image (Image): Image to be used for elastic transform
annotation (typing.Any): Annotation to be used for elastic transform
Returns:
image (Image): Elastic transformed image
annotation (typing.Any): Elastic transformed annotation if necessary
"""
alpha = image.width * np.random.uniform(*self.alpha_range)
sigma = image.width * np.random.uniform(*self.sigma_range)
new_image, dx, dy = self.elastic_transform(image.numpy(), alpha, sigma)
image.update(new_image)

if isinstance(annotation, Detections) and self._augment_annotation:
detections = []
for detection in annotation:
x_min, y_min, x_max, y_max = detection.xyxy_abs
new_x_min = min(max(0, x_min + dx[y_min, x_min]), image.width - 1)
new_y_min = min(max(0, y_min + dy[y_min, x_min]), image.height - 1)
new_x_max = min(max(0, x_max + dx[y_max, x_max]), image.width - 1)
new_y_max = min(max(0, y_max + dy[y_max, x_max]), image.height - 1)
detections.append(
Detection(
[new_x_min, new_y_min, new_x_max, new_y_max],
label=detection.label,
labels=detection.labels,
confidence=detection.confidence,
image_path=detection.image_path,
width=image.width,
height=image.height,
relative=False,
bbox_type = BboxType.XYXY
)
)

annotation = Detections(
labels=annotation.labels,
width=image.width,
height=image.height,
detections=detections
)

return image, annotation


class RandomAudioNoise(Augmentor):
""" Randomly add noise to audio
Expand Down
6 changes: 3 additions & 3 deletions mltu/tensorflow/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
self.cer_accumulator.assign_add(tf.reduce_sum(distance))

# Increment the batch_counter by the batch size
self.batch_counter.assign_add(len(y_true))
self.batch_counter.assign_add(input_shape[0])

# Calculate the number of wrong words in batch and add to wer_accumulator variable
self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(tf.not_equal(distance, 0), tf.float32)))
Expand Down Expand Up @@ -146,7 +146,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
self.cer_accumulator.assign_add(tf.reduce_sum(distance))

# Increment the batch_counter by the batch size
self.batch_counter.assign_add(len(y_true))
self.batch_counter.assign_add(input_shape[0])

def result(self):
""" Computes and returns the metric result.
Expand Down Expand Up @@ -253,7 +253,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(distance, tf.float32)))

# Increment the batch_counter by the batch size
self.batch_counter.assign_add(len(y_true))
self.batch_counter.assign_add(input_shape[0])

def result(self):
"""Computes and returns the metric result.
Expand Down
5 changes: 3 additions & 2 deletions mltu/torch/yolo/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ class VOCAnnotationReader:
def __init__(self, labels: dict, images_path: str=None):
self.labels = labels
self.images_path = images_path
self.dataset_found_labels = {}

@staticmethod
def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -> Detections:
def readFromVOC(voc_annotation_path: str, labels: dict={}, images_path: str=None) -> Detections:
annotation_path = Path(voc_annotation_path)
tree = ET.parse(voc_annotation_path)
root = tree.getroot()
Expand Down Expand Up @@ -49,7 +50,7 @@ def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -
image_path = os.path.join(images_path, annotation_dict['filename'])
dets = []
for obj in annotation_dict['objects']:
if obj['name'] not in labels.values():
if labels and obj['name'] not in labels.values():
print(f"Label {obj['name']} not found in labels")
continue

Expand Down
14 changes: 3 additions & 11 deletions mltu/torch/yolo/train_yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mltu.annotations.images import CVImage
from mltu.transformers import ImageResizer, ImageShowCV2, ImageNormalizer
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, \
RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic
RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic, RandomElasticTransform
from mltu.torch.model import Model
from mltu.torch.dataProvider import DataProvider
from mltu.torch.yolo.annotation import VOCAnnotationReader
Expand All @@ -21,6 +21,7 @@

annotations_path = "Datasets/car-plate-detection/annotations"

# Create a dataset from the annotations, the dataset is a list of lists where each list contains the [image path, annotation path]
dataset = [[None, os.path.join(annotations_path, f)] for f in os.listdir(annotations_path)]

# Make sure torch can see GPU device, it is not recommended to train with CPU
Expand Down Expand Up @@ -49,9 +50,6 @@
numpy=False,
)

# for b in data_provider:
# pass

# split the dataset into train and test
train_data_provider, val_data_provider = data_provider.split(0.9, shuffle=False)

Expand All @@ -62,20 +60,14 @@
RandomSharpen(),
RandomMirror(),
RandomFlip(),
RandomElasticTransform(),
RandomGaussianBlur(),
RandomSaltAndPepper(),
RandomRotate(angle=10),
RandomDropBlock(),
RandomMosaic(),
]

# for batch in train_data_provider:
# pass
# print(batch)
# break



base_model = BaseModel("yolov8n.pt")
# Create a YOLO model
model = DetectionModel('yolov8n.yaml', nc=len(labels))
Expand Down
38 changes: 33 additions & 5 deletions mltu/transformers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import cv2
import time
import queue
import typing
import logging
import threading
import importlib
import numpy as np

Expand Down Expand Up @@ -344,6 +347,26 @@ def __init__(
super(ImageShowCV2, self).__init__(log_level=log_level)
self.verbose = verbose
self.name = name
self.thread_started = False

def init_thread(self):
if not self.thread_started:
self.thread_started = True
self.image_queue = queue.Queue()

# Start a new thread to display the images, so that the main loop could run in multiple threads
self.thread = threading.Thread(target=self._display_images)
self.thread.start()

def _display_images(self) -> None:
""" Display images in a continuous loop """
while True:
image, label = self.image_queue.get()
if isinstance(label, Image):
cv2.imshow(self.name + "Label", label.numpy())
cv2.imshow(self.name, image.numpy())
cv2.waitKey(0)
cv2.destroyAllWindows()

def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typing.Any]:
""" Show image for visual inspection
Expand All @@ -356,6 +379,9 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
data (np.ndarray): Image data
label (np.ndarray): Label data (unchanged)
"""
# Start cv2 image display thread
self.init_thread()

if self.verbose:
if isinstance(label, (str, int, float)):
self.logger.info(f"Label: {label}")
Expand All @@ -365,10 +391,12 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
img = detection.applyToFrame(np.asarray(image.numpy()))
image.update(img)

cv2.imshow(self.name, image.numpy())
if isinstance(label, Image):
cv2.imshow(self.name+"Label", label.numpy())
cv2.waitKey(0)
cv2.destroyAllWindows()
# Add image to display queue
# Sleep if queue is not empty
while not self.image_queue.empty():
time.sleep(0.5)

# Add image to display queue
self.image_queue.put((image, label))

return image, label

0 comments on commit 4bc9edc

Please sign in to comment.