data_loader.py

import torch
import numpy as np

from torch.utils.data import Dataset, Sampler
from torchvision import transforms, utils

from pycocotools.coco import COCO

import skimage.io
import skimage.transform
import skimage

import sys, os
import random


class CocoDataset(Dataset):
    """ Coco dataset."""
    
    def __init__(self, ds_path, ds_name='train2017', transform=None):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.path = ds_path
        self.name = ds_name
        self.transform = transform
        
        self.coco = COCO(os.path.join(self.path, 'annotations', 'instances_' + self.name + '.json'))
        self.image_ids = self.coco.getImgIds()
        print("TRAIN IMAGES:",len(self.image_ids))
        
        self.load_classes()
        
        
    def load_classes(self):
        # load class names (name -> label)
        categories = self.coco.loadCats(self.coco.getCatIds())
        categories.sort(key=lambda x: x['id'])
        print("TRAIN CATEGORIES:", len(categories))
        
        self.classes = {}
        self.coco_labels = {}
        self.coco_labels_inverse = {}
        for c in categories:
            self.coco_labels[len(self.classes)] = c['id']
            self.coco_labels_inverse[c['id']] = len(self.classes)
            self.classes[c['name']] = len(self.classes)
            
        # also load the reverse (label -> name)
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key
            
    
    def __len__(self):
        return len(self.image_ids)
    
    
    def __getitem__(self, idx):
        img = self._load_image(idx) # (H, W, C)
        annot = self._load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        # Transform ToTensor()
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    
    def _load_image(self, image_idx):
        """ Loads image from folder using the filename from COCO. """
        # Image info: {filename: _, height: _, etc.}
        image_info = self.coco.loadImgs(self.image_ids[image_idx])[0]
        
        # Retrieve image from folder
        path = os.path.join(self.path, self.name, image_info['file_name'])
        img = skimage.io.imread(path)
        
        if len(img.shape) == 2:
            img = skimage.color.gray2rgb(img)
            
        return img.astype(np.float32)/255.
    
    
    def _load_annotations(self, image_idx):
        """ Loads annotations in format: [x1, x2, y1, y2, cat_id]"""
        # Get groundtruth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_idx], iscrowd=False)
        annotations = np.zeros((0, 5))
        
        # Some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:
            return annotations
        
        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):
            
            # some annotations have basically no width / height, skip them
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotation = np.zeros((1, 5))
            annotation[0, :4] = a['bbox']
            annotation[0, 4] = self.coco_label__to__label(a['category_id'])
            annotations = np.append(annotations, annotation, axis=0)
            
        # Transform from [x, y, w, h] to [x1, y1, x2, y2]
        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
        
        return annotations
    
    def coco_label__to__label(self, coco_label):
        return self.coco_labels_inverse[coco_label]
    
    def label__to__coco_label(self, label):
        return self.coco_labels[label]
    
    def image_aspect_ratio(self, image_idx):
        image = self.coco.loadImgs(self.image_ids[image_idx])[0]
        return float(image['width']) / float(image['height'])
    

# ====================== COLLATE & SAMPLER =======================
def collater(data):
    """ 
    Fills bottom-right corner of images with zeros to have fixed 
    heigth and width (max in batch). Fills annotations.
    
    Converts input:
        [[img, annot, scale],...] => ([img],[annot],[scale])
    """
    imgs = [s['img'] for s in data]
    annots = [s['annot'] for s in data]
    scales = [s['scale'] for s in data]
    
    widths = [int(s.shape[0]) for s in imgs]
    heights = [int(s.shape[1]) for s in imgs]
    batch_size = len(imgs)
    
    # Images still vary in width and height
    # Pad images with zeros at bottom-right corners (np.pad might be faster)
    max_width = np.array(widths).max()
    max_height = np.array(heights).max()

    padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
    for i in range(batch_size):
        img = imgs[i]
        padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
    
    # Number of annotations also varies from image to image
    max_num_annots = max(annot.shape[0] for annot in annots)
    
    if max_num_annots > 0:

        annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1

        for idx, annot in enumerate(annots):
            # print(annot.shape)
            if annot.shape[0] > 0:
                annot_padded[idx, :annot.shape[0], :] = annot    
    else:
        annot_padded = torch.ones((len(annots), 1, 5)) * -1
        
    # Re-shapes tensor to [N, C, H, W]
    padded_imgs = padded_imgs.permute(0, 3, 1, 2)
    
    return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}

class AspectRatioSampler(Sampler):
    
    def __init__(self, data_source, batch_size, drop_last):
        self.data_source = data_source
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.groups = self.group_images()
        
    def __iter__(self):
        random.shuffle(self.groups)
        for group in self.groups:
            yield group
            
    def __len__(self):
        if self.drop_last:
            return len(self.data_source) // self.batch_size
        else:
            return (len(self.data_source) + self.batch_size - 1) // self.batch_size
        
        
    def group_images(self):
        # determine the order of the images
        order = list(range(len(self.data_source)))
        order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))

        # divide into groups, one group = one batch
        return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]

    
# =========================== TRANSFORMATIONS ====================================

class Resizer(object):
    """ Resizes image to [min_size, max_size]."""

    def __call__(self, sample, min_side=608, max_side=1024):
        image, annots = sample['img'], sample['annot']

        rows, cols, cns = image.shape

        smallest_side = min(rows, cols)

        # rescale the image so the smallest side is min_side
        scale = min_side / smallest_side

        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)

        if largest_side * scale > max_side:
            scale = max_side / largest_side

        # resize the image with the computed scale
        image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
        rows, cols, cns = image.shape

        pad_w = 32 - rows%32
        pad_h = 32 - cols%32

        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)

        annots[:, :4] *= scale

        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}


class Augmenter(object):
    """ Horizontal Flip on images and annotations"""
    def __call__(self, sample, flip_x=1.):

        if np.random.rand() < flip_x:
            image, annots = sample['img'], sample['annot']
            
            # Flip image
            image = image[:, ::-1, :]
            
            rows, cols, channels = image.shape

            x1 = annots[:, 0].copy()
            x2 = annots[:, 2].copy()
            
            x_tmp = x1.copy()

            # Flip annotations (max - xbox)
            annots[:, 0] = cols - x2
            annots[:, 2] = cols - x_tmp
            
            sample = {'img': image, 'annot': annots}
            
        return sample
    
class Normalizer(object):
    """ Per-Channel Zero-Mean Normalization."""
    def __init__(self, mean=None, std=None):
        if mean == None:
            self.mean = [0.485, 0.456, 0.406]
        else:
            self.mean = mean
        if std == None:
            self.std = [0.229, 0.224, 0.225]
        else:
            self.std = std
            
    def __call__(self, sample):
        """
        Input:
            - sample : Tensor image of size (C, H, W) to be normalized
        Returns:
            - Tensor: Normalized image
        """
        image, annots = sample['img'], sample['annot']

        return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}