evaluation.py

# THIS FILE SHOULD NOT BE CHANGED

# Description:
# This file should be used for computing scores for each of the project topics
# Usage: python evaluation.py <project_code> <path_2_output_predictions>

import sys
import os
from skimage import io, color
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import pandas as pd
import numpy as np


############## DET ##########################################################################
def iou_bboxs(bbox1, bbox2):
    # input bboxes
    b1_x_min, b1_y_min, b1_x_max, b1_y_max = bbox1
    b2_x_min, b2_y_min, b2_x_max, b2_y_max = bbox2
    # intersection area
    intersection_width = min(b1_x_max, b2_x_max) - max(b1_x_min, b2_x_min)
    intersection_height = min(b1_y_max, b2_y_max) - max(b1_y_min, b2_y_min)
    # if no intersection
    if intersection_width <= 0 or intersection_height <= 0:
        return 0

    intersection_area = intersection_width * intersection_height

    # union area
    box1_area = (b1_x_max - b1_x_min) * (b1_y_max - b1_y_min)
    box2_area = (b2_x_max - b2_x_min) * (b2_y_max - b2_y_min)

    union_area = box1_area + box2_area - intersection_area

    # calculate IoU
    iou = intersection_area / union_area
    return iou


def map_ref_pred(ref_boxes, pred_boxes, mapping, reverse_mapping, all_confs, epsilon=0.5):
    '''
	Match the reference boxes to the predictions and vice versa.
	Boxes are only matched if they have IoU over epsilon.
	'''
    for ref_idx, ref_box in ref_boxes.iterrows():
        ref_name = f'{ref_box["filename"]}_{ref_box["object_id"]}'
        ref_bbox = [ref_box['xmin'], ref_box['ymin'], ref_box['xmax'], ref_box['ymax']]
        mapping[ref_name] = []
        for pred_idx, pred_box in pred_boxes.iterrows():
            pred_name = f'{pred_box["filename"]}_{pred_idx}'
            if pred_name not in reverse_mapping.keys():
                reverse_mapping[pred_name] = []
            pred_bbox = [pred_box['xmin'], pred_box['ymin'], pred_box['xmax'], pred_box['ymax']]
            conf = pred_box['confidence']
            if pred_name not in all_confs.keys():
                all_confs[pred_name] = conf
            iou_score = iou_bboxs(ref_bbox, pred_bbox)
            if iou_score >= epsilon:
                mapping[ref_name].append((pred_name, iou_score, conf, iou_score))
                reverse_mapping[pred_name].append(ref_name)


# Don't return anything, we update both mappings in place

def process_mappings(mapping, reverse_mapping, all_confs):
    '''
	Calcualte thresholds for FPs and FNs for the prec-recall curve.
	A reference detection is a FN at a threshold higher than the confidence of its highest confidence match.
	A detection is a FP at a threshold where it is no longer the highest-IoU detection for at least one reference box.
	'''
    FN_thresholds = []
    for k in mapping.keys():
        matches = mapping[k]
        if len(matches) == 0:
            FN_thresh = -1.0  # If there is no match, it's always a FN
        else:
            FN_thresh = np.max(
                [match[2] for match in matches])  # As soon as the first match appears, it is no longer a FN
        FN_thresholds.append(FN_thresh)
    FN_thresholds.append(-1.0)  # Add an extra threshold that will never be reached

    FN_thresholds = np.array(FN_thresholds)
    FN_thresholds = FN_thresholds[np.argsort(-FN_thresholds)]

    FP_thresholds = []
    for k in reverse_mapping.keys():
        FP_thresh = all_confs[k]  # If there's no match, it's always a FP
        matches = reverse_mapping[k]
        if len(matches) != 0:
            for match in matches:
                ref_matches = mapping[match]
                ordering = list(np.argsort([-rm[1] for rm in ref_matches]))  # Sort the matches by descending IoU
                curr_elem = [rm[0] for rm in ref_matches].index(k)
                rank = ordering.index(curr_elem)  # Where is the current element in the ordering?
                if rank == 0:
                    FP_thresh = -1.0  # If this is the best match for any ref box, then it's never a FP
                    break
                else:
                    all_match_confs = [ref_matches[ordering[ridx]][2] for ridx in np.arange(rank - 1, -1,
                                                                                            -1)]  # Find the confidences of all the matches with higher IoU
                    min_match_conf = np.min(all_match_confs)
                    if min_match_conf > all_confs[
                        k]:  # There is a better match that appears before this one. For this ref box, it's a FP.
                        pass
                    else:
                        FP_thresh = min(FP_thresh,
                                        min_match_conf)  # This match is the first until another one appears later. What is the lowest confidence for which this is still not a FP?
        FP_thresholds.append(FP_thresh)
    FP_thresholds.append(-1.0)  # Add an extra threshold that will never be reached
    FP_thresholds = np.array(FP_thresholds)
    FP_thresholds = FP_thresholds[np.argsort(-FP_thresholds)]
    return FN_thresholds, FP_thresholds


def compute_avg_precision(project_code, folder_ref, folder_pred):
    ref_files = sorted([f for f in os.listdir(folder_ref)])
    pred_files = sorted([f for f in os.listdir(folder_pred)])

    mapping, reverse_mapping = {}, {}
    all_confs = {}
    confidences = []
    for ref_file_name, pred_file_name in zip(ref_files, pred_files):
        pred_boxes = pd.read_csv(os.path.join(folder_pred, pred_file_name))
        ref_boxes = pd.read_csv(os.path.join(folder_ref, ref_file_name))
        for pred_idx, pred_box in pred_boxes.iterrows():
            confidences.append(pred_box['confidence'])
        map_ref_pred(ref_boxes, pred_boxes, mapping, reverse_mapping, all_confs, 0.5)
    FN_thresholds, FP_thresholds = process_mappings(mapping, reverse_mapping, all_confs)
    ref_num = len(mapping.keys())
    confidences.append(-1.0)  # Add an extra threshold that will never be reached
    confidences = np.array(confidences)
    confidences = confidences[np.argsort(-confidences)]  # Sort the confidences in descending order
    pred_thresholds = np.flip(np.unique(confidences[:-1]))
    FP_num, TP_num, pred_num = 0, 0, 0
    precisions, recalls = [1], [0]
    for thresh in pred_thresholds:
        while thresh <= confidences[pred_num]:
            pred_num += 1
        while thresh <= FP_thresholds[FP_num]:
            FP_num += 1
        while thresh <= FN_thresholds[TP_num]:
            TP_num += 1
        prec = (pred_num - FP_num) / pred_num
        rec = TP_num / ref_num
        precisions.append(prec)
        recalls.append(rec)
    precisions.extend([0, 0])
    precisions = np.array(precisions)
    recalls.extend([recalls[-1], 1.0])
    recalls = np.array(recalls)
    # AUC = np.sum((recalls[1:]-recalls[:-1])*precisions[1:])

    interp_prec = np.flip(np.maximum.accumulate(np.flip(precisions)))  # Use the interpolated prec-rec curve
    interp_rec = recalls
    AUC2 = np.sum((interp_rec[1:] - interp_rec[:-1]) * interp_prec[1:])

    return AUC2


###########  SEG ###################################################################
label_dict = {
    'void': (0, 0, 0),
    'flat': (128, 64, 128),
    'construction': (70, 70, 70),
    'object': (153, 153, 153),
    'nature': (107, 142, 35),
    'sky': (70, 130, 180),
    'human': (220, 20, 60),
    'vehicle': (0, 0, 142)
}


def labmask_to_onehot_pixel_counts(mask, label_dict):
    # prepare empty list
    onehot_labels = []
    # class_pixel_counts = {label: 0 for label in label_dict}
    # SOLUTION
    # iterate over all classes defined in the label_dict
    for key in label_dict:
        rgb = label_dict[key]
        # prepare empty binary mask
        class_mask = np.zeros((mask.shape[:2]), dtype=bool)

        # make binary mask of positions given by all rgb values
        # corresponding to a specific class
        label_pos = np.equal(mask, rgb)
        # add label positions to binary mask
        class_mask = np.logical_or(class_mask, np.all(label_pos, axis=-1))

        # add binary mask of the respective class to the list
        onehot_labels.append(class_mask)
    # class_pixel_counts[key] = np.sum(class_mask)
    # END OF SOLUTION
    # return class_pixel_counts
    return onehot_labels


def labmask_to_onehot_pixel_counts_alternative(mask, label_dict):
    # prepare empty list
    onehot_labels = []
    # class_pixel_counts = {label: 0 for label in label_dict}
    # SOLUTION
    # iterate over all classes defined in the label_dict
    for key in label_dict:
        rgb = label_dict[key]
        # prepare empty binary mask

        # make binary mask of positions given by all rgb values
        # corresponding to a specific class
        label_pos = np.equal(mask, rgb)
        # add label positions to binary mask

        # add binary mask of the respective class to the list
        onehot_labels.append(np.all(label_pos, axis=-1))
    # class_pixel_counts[key] = np.sum(class_mask)
    # END OF SOLUTION
    # return class_pixel_counts
    return onehot_labels


def multiply_by_weights(class_wise_scores):
    # weights for the whole dataset (including the secret)
    weights = [0.105, 0.387, 0.217, 0.018, 0.152, 0.035, 0.012, 0.073]  # 8 classes

    # multiply
    result = [a * b for a, b in zip(class_wise_scores, weights)]

    # calculate the mean
    mean = sum(result)
    return mean


def classwise_iou_mask_4_img_pair(img_ref, img_pre):
    onehot_ref = labmask_to_onehot_pixel_counts(img_ref, label_dict)
    onehot_pre = labmask_to_onehot_pixel_counts(img_pre, label_dict)

    classwise_scores = []
    for idx, class_key in enumerate(label_dict):
        mask_ref = onehot_ref[idx]
        mask_pre = onehot_pre[idx]

        intersection = np.logical_and(mask_ref, mask_pre)
        union = np.logical_or(mask_ref, mask_pre)
        if np.sum(union) != 0:
            iou = np.sum(intersection) / np.sum(union)
        else:
            iou = None
        classwise_scores.append(iou)
    return classwise_scores


def compute_avg_iou_mask(project_code, folder_ref, folder_pre):
    # reading the images
    ref_files = sorted([f for f in os.listdir(folder_ref)])
    pre_files = sorted([f for f in os.listdir(folder_pre)])

    score_global = [0., 0., 0., 0., 0., 0., 0., 0.]
    n_class_img = [0, 0, 0, 0, 0, 0, 0, 0]
    for ref_file, pre_file in zip(ref_files, pre_files):
        ref_img = io.imread(os.path.join(folder_ref, ref_file))
        pre_img = io.imread(os.path.join(folder_pre, pre_file))

        score_class_wise_pair = classwise_iou_mask_4_img_pair(ref_img, pre_img)
        score_global = [a + b if a is not None else b for a, b in zip(score_class_wise_pair, score_global)]
        n_class_img = [b + 1 if a is not None else b for a, b in zip(score_class_wise_pair, n_class_img)]

    score_global = [a / n_cl for a, n_cl in zip(score_global, n_class_img)]
    print(f'class-wise scores = {score_global}')
    final_score = multiply_by_weights(score_global)
    return final_score


############ CLA ###################################################################
class_dict = {
    'bus': 0,
    'car': 1,
    'light': 2,
    'sign': 3,
    'truck': 4,
    'vegetation': 5
}


# classes = ('bus',  'car',  'light', 'sign',  'truck' , 'vegetation')


def f1_score_4_two_lists(class_list_true, class_list_pred):
    score = f1_score(class_list_true, class_list_pred, average='micro')
    return score


def get_csv_file_name(folder_path):
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            if filename.endswith('.csv'):
                file_path = os.path.join(folder_path, filename)
                return file_path

        print(f'CSV file is not found in folder {folder_path}')
        return -1
    else:
        print("Folder not found.")
        return -1


def compute_f1score4_classes(project_code, path_ref, path_pre):
    # reading the csvs
    # format is the following ['filename', 'class_id']
    df_ref = pd.read_csv(get_csv_file_name(path_ref))
    df_pre = pd.read_csv(get_csv_file_name(path_pre))

    # sort the dataframe by 'img_name'
    df_ref.sort_values(by='filename', inplace=True)
    df_pre.sort_values(by='filename', inplace=True)

    # form a list consisting only of class_ids
    reference_class_id_list = df_ref['class_id'].tolist()
    predicted_class_id_list = df_pre['class_id'].tolist()

    return f1_score_4_two_lists(reference_class_id_list, predicted_class_id_list)


###########  COL, SUP  #####################################################################
def dssim(img_ref, img_pred):
    ssim_none = ssim(img_ref, img_pred, channel_axis=2, data_range=255)
    dssim_score = 1 - ssim_none
    return dssim_score


def psnr(img_ref, img_pred):
    psnr_none = psnr(img_ref, img_pred)
    return psnr_none


def compute_simple_score_images(project_code, folder_ref, folder_pre):
    # reading the images
    ref_files = sorted([f for f in os.listdir(folder_ref)])
    pre_files = sorted([f for f in os.listdir(folder_pre)])

    scores = []
    for ref_file, pre_file in zip(ref_files, pre_files):
        # Open images
        ref_img = io.imread(os.path.join(folder_ref, ref_file))
        pre_img = io.imread(os.path.join(folder_pre, pre_file))

        if project_code == 'COL':
            score_pair = dssim(ref_img, pre_img)
        elif project_code == 'SUP':
            score_pair = psnr(ref_img, pre_img)
        # print(f'Img {ref_file}: score pair = {score_pair}')
        scores.append(score_pair)
    return sum(scores) / len(scores)


# #### code below should not be changed ############################################################################
def get_arguments():
    if len(sys.argv) != 4:
        print("Usage: python evaluation.py <project_code> <path_2_reference> <path_2_output_predictions>")
        sys.exit(1)

    try:
        project_code = sys.argv[1]
        path_2_reference = sys.argv[2]
        path_2_output_predictions = sys.argv[3]
    except Exception as e:
        print(e)
        sys.exit(1)
    return project_code, path_2_reference, path_2_output_predictions


if __name__ == "__main__":
    project_type, path_2_ground_truth, path_2_predictions = get_arguments()
    if project_type == 'DET':
        score = compute_avg_precision(project_type, path_2_ground_truth, path_2_predictions)
    elif project_type == 'CLA':
        score = compute_f1score4_classes(project_type, path_2_ground_truth, path_2_predictions)
    elif project_type == 'SEG':
        score = compute_avg_iou_mask(project_type, path_2_ground_truth, path_2_predictions)
    elif project_type == 'COL' or project_type == 'SUP':
        score = compute_simple_score_images(project_type, path_2_ground_truth, path_2_predictions)

    print(f'Final score for {project_type} project: {score}')