Skip to content

Commit

Permalink
feat(data): Mimick detectron2 ResizeShortestEdge
Browse files Browse the repository at this point in the history
Because albumentations does not support it natively. Code may have some bugs.
  • Loading branch information
charitarthchugh committed Nov 26, 2024
1 parent 275e8f0 commit acaaa44
Showing 1 changed file with 83 additions and 0 deletions.
83 changes: 83 additions & 0 deletions src/lightningsparseinst/utils/transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import albumentations as A
import cv2
import numpy as np


class ResizeShortestEdge(A.DualTransform):
"""ResizeShortestEdge transformation class.
This class provides functionality to resize an image such that its shortest
edge matches a specified size while maintaining the aspect ratio. The
longest edge is also constrained by a maximum size to avoid excessively
large dimensions.
Attributes:
shortest_max_size (list[int] | int): Target size(s) for the shortest edge of the image.
largest_max_size (int): Maximum allowable size for the longest edge of the image.
interpolation: OpenCV interpolation method.
"""

def __init__(
self,
shortest_max_size: list[int] | int,
largest_max_size: int,
interpolation=cv2.INTER_LINEAR,
always_apply=True,
p=1.0,
):
super().__init__(always_apply, p)

if isinstance(shortest_max_size, int):
self.shortest_max_size = [shortest_max_size]
else:
self.shortest_max_size = shortest_max_size

self.largest_max_size = largest_max_size
self.interpolation = interpolation

def apply(self, img, shortest_max_size=0, largest_max_size=0, **params):
h, w = img.shape[:2]

# Compute new size
scale = shortest_max_size / min(h, w)
new_h, new_w = (shortest_max_size, int(scale * w)) if h < w else (int(scale * h), shortest_max_size)

# Check if the largest edge is larger than the target largest_max_size
if max(new_h, new_w) > largest_max_size:
scale = largest_max_size / max(new_h, new_w)
new_h = max(1, int(new_h * scale))
new_w = max(1, int(new_w * scale))

# Resize image to new_h and new_w
img_resized = cv2.resize(img, (new_w, new_h), interpolation=self.interpolation)

# Ensure consistent output size
final_h, final_w = largest_max_size, largest_max_size
img_final = cv2.resize(img_resized, (final_w, final_h), interpolation=self.interpolation)

return img_final

def get_params(self):
min_size = np.random.choice(self.shortest_max_size)
max_size = self.largest_max_size
return {"shortest_max_size": min_size, "largest_max_size": max_size}

def get_transform_init_args_names(self):
return ("shortest_max_size", "largest_max_size", "interpolation")


if __name__ == "__main__":
import fiftyone as fo
from PIL import Image

ds = fo.load_dataset("coco-2017")
img_pth = ds.values("filepath")[0]
sample = ds[img_pth]
img = Image.open(img_pth).convert("RGB")
img.show()
img = np.asarray(img)
transforms = A.Compose([ResizeShortestEdge([416, 448, 480, 512, 544, 576, 608, 640], 853)])
transformed = transforms(image=img)["image"]

transformed_img = Image.fromarray(transformed)
transformed_img.show()

0 comments on commit acaaa44

Please sign in to comment.