utils.py 8.06 KB
Newer Older
1 2
import torch
import torchvision
3 4 5 6
import numpy as np
from skimage.measure import ransac
from scipy.spatial import cKDTree
from skimage.transform import AffineTransform
7 8 9
#from sklearn.decomposition import PCA
#from Training.delf import Delf
#import pickle
10 11
SCALES = [2, 1.4142, 1, 0.7071, 0.5, 0.3536, 0.25]
RF_VALUES = {"layer3": (267, 16, 133), "layer4": (427, 32, 213)}
12 13 14 15 16 17 18

KD_TREE_DISTANCE_THRESHOLD = 4.5 # has to be evaluated
RANSAC_MIN_SAMPLES = 3
RANSAC_NUM_TRAILS = 1000
RANSAC_RESIDUAL_THRESHOLD = 20


19 20
# TODO will be moved to experiment when model/ dataloaders are available

21 22
"""
def extract_features(self, stage, pca=None):
23 24 25 26
    model = Delf(10, "retrieval", "../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth").cuda()
    model.eval()
    all_features = None
    all_boxes = None
27
    feature_list = []
28
    with torch.no_grad():
29
        for image in range(5): # here is where i put the dataloader
30 31 32
            # TODO think about limiting the image size ranges. This works if all imgs are 2000X2000 but might use up
            #  more mem if size is variable, also much larger images need more than 8gb vram
            #  Could also restrict larger scales for big images
33 34 35 36
            data = torch.rand((1,3,1000,1000)).cuda()# c x h x w
            label = 0
            filename = "blub"
            
37 38 39 40 41 42 43 44 45 46
            all_scale_features = None
            all_scale_scores = None
            all_scale_boxes = None
            for scale in SCALES:
                # get features and scores from model
                features, attention_scores = model.single_scale_retrieval(scale, data)
                # we will not be using the features in cuda anymore so detach to save memory
                features = features.detach().cpu()
                # calculate the receptive boxes in original scale
                receptive_boxes = get_receptive_boxes(features.size(2), features.size(3), scale)
47 48
                #print(receptive_boxes)
                #print(receptive_boxes.size())
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
                # flatten h,w dimensions so we can append values from different scales
                # spatial information can be inferred from the receptive boxes
                features = features.view(features.size(1), -1).t()
                attention_scores = attention_scores.view(-1)
                receptive_boxes = receptive_boxes.view(-1, 4)
                # accumulate the values of all scales
                if all_scale_features is None:
                    all_scale_features = features
                else:
                    all_scale_features = torch.cat((all_scale_features, features), dim=0)
                if all_scale_scores is None:
                    all_scale_scores = attention_scores
                else:
                    all_scale_scores = torch.cat((all_scale_scores, attention_scores), dim=0)
                if all_scale_boxes is None:
                    all_scale_boxes = receptive_boxes
                else:
                    all_scale_boxes = torch.cat((all_scale_boxes, receptive_boxes), dim=0)
            # perform nms based on scores and receptive boxes then keep the top 1000 scoring boxes remaining
            keep = get_top_k_index(all_scale_scores, all_scale_boxes.cuda(), iou_threshold=0.8, k=1000)
            # index select the best boxes
            # we do not need the values in cuda anymore
            all_scale_boxes = all_scale_boxes[keep].detach().cpu()
            # could probably just ignore scores at this point
73
            # all_scale_scores = all_scale_scores[keep].detach().cpu()
74
            all_scale_features = all_scale_features[keep]
75 76
            
            all_scale_features, all_scale_boxes = model.extract_features(data)
77 78
            # for pca we can just stitch all features together for retrieval we need some kind of reference
            # to which image they belong
79 80
            extracted_data = (all_scale_features, all_scale_boxes, label, filename)
            feature_list.append(extracted_data)
81 82 83 84 85 86 87 88
            if all_features is None:
                all_features = all_scale_features
            else:
                all_features = torch.cat((all_features, all_scale_features), dim=0)
            if all_boxes is None:
                all_boxes = all_scale_boxes
            else:
                all_boxes = torch.cat((all_boxes, all_scale_boxes), dim=0)
89 90 91 92 93
            #print(all_features.size())
            #print(all_boxes.size())
        # TODO tf repo  says l2 normalize features before
        print(feature_list)
        print(len(feature_list))
94 95 96 97 98
        # convert features to numpy for pca
        all_features = all_features.numpy()
        # fit pca
        delf_pca = PCA(n_components=40, whiten=True)
        delf_pca.fit(all_features)
99
        pickle.dump(delf_pca, open("pca.pkl", "wb"))
100 101 102 103
        print(f"pca componenets {delf_pca.components_} explained var ratio {delf_pca.explained_variance_ratio_}")
        # test application of pca
        test_sample = torch.rand(size=(1,1024)).numpy()
        print(delf_pca.transform(test_sample))
104 105 106 107 108
        print("load")
        l_pca = pickle.load(open("pca.pkl", "rb"))
        print(l_pca.transform(test_sample))
        # TODO tf repo  says l2 normalize features after
"""
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
import time

def score_match(index_features, query_features, index_locations, query_locations):
    index_tree = cKDTree(index_features)
    distances, indices = index_tree.query(
        query_features, distance_upper_bound=KD_TREE_DISTANCE_THRESHOLD, n_jobs=-1)
    #print(distances)
    #print(indices)
    # Filter out features with no close neighbours
    cleaned_query_locations = np.array([
        query_locations[i, ] for i in range(query_locations.shape[0])
        if indices[i] != index_locations.shape[0]   # KD-Tree returns number of entries as index if no match is found
                                                    # within threshold
    ])
    cleaned_index_locations = np.array([
        index_locations[indices[i], ] for i in range(query_locations.shape[0])
        if indices[i] != index_locations.shape[0]
    ])
    #print(cleaned_index_locations)
    #print(cleaned_query_locations)
    #print(cleaned_query_locations.shape)
    if cleaned_query_locations.shape[0] <= RANSAC_MIN_SAMPLES:
        return 0
    # Perform geometric verification using RANSAC.
    # Ransac currently takes ~35 times longer than kd tree
    model_robust, inliers = ransac(
        (cleaned_index_locations, cleaned_query_locations),
        AffineTransform,
        min_samples=RANSAC_MIN_SAMPLES,
        residual_threshold=RANSAC_RESIDUAL_THRESHOLD,
        max_trials=RANSAC_NUM_TRAILS)
    if model_robust is None:
        return 0
    #print(sum(inliers))
    #print(model_robust)
    return sum(inliers)


147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
def get_receptive_boxes(height, width, scale, target_layer="layer3"):
    """
    calculates the receptive boxes for a feature map, based on its height and width and the RF parameters of the
    target layer. Rescales them to original scale before returning
    :param height:
    :param width:
    :param scale:
    :param target_layer:
    :return:
    """
    rf_size, rf_stride, padding = RF_VALUES[target_layer]
    offsets = torch.tensor([-padding, -padding, -padding + rf_size - 1, -padding + rf_size - 1], dtype=torch.float)
    boxes = get_coords(height, width) * rf_stride + offsets
    boxes = boxes / scale
    return boxes


164 165 166 167 168 169
def get_receptive_centers(boxes):
    min_xy = boxes.index_select(dim=1, index=torch.LongTensor([0, 1]))
    max_xy = boxes.index_select(dim=1, index=torch.LongTensor([2, 3]))
    return (min_xy + max_xy) / 2


170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
def get_coords(h, w):
    """
    Builds a coordinate system based on height and width on the input, with values for x_min, y_min, x_max, y_max
    :param h:
    :param w:
    :return:
    """
    grid_y, grid_x = torch.meshgrid([torch.arange(h), torch.arange(w)])
    coord = torch.stack([grid_x, grid_y, grid_x, grid_y], dim=2)
    return coord


def get_top_k_index(scores, boxes, iou_threshold, k):
    """
    Applies NMS then selects top k scoring remaining boxes
    :param scores:
    :param boxes:
    :param iou_threshold:
    :param k:
    :return:
    """
    keep = torchvision.ops.nms(boxes, scores, iou_threshold)
    keep = keep[:k]
    return keep