experiment.py 28 KB
Newer Older
1
from Training.delf import Delf
2 3
from Training.dataloader import get_data_loaders, get_path_data_loader
from sklearn.decomposition import PCA
4
from pathlib import Path
5
from Training.utils import score_match
6
import torch
7
import numpy as np
8
import uuid
9
import time
10
import json
11
import pickle
12 13 14 15
CENTER_CROP_SIZE_FINETUNING = 250
RANDOM_CROP_SITE_FINETUNING = 224
CENTER_CROP_SIZE_KEYPOINT = 900
RANDOM_CROP_SITE_KEYPOINT = 720
16
PCA_COMPONENTS = 40
17 18
BATCH_PRINT_FREQ = 20

19 20 21 22 23

class ExperimentManager:
    """
    ExperimentManager: Controls the various stages of the experiments. parameters can be provided as single values or
    as dictionaries. If they are single values, they will be used in all applicable stages. The stages must be provided
24
    as a list. Possible entries are "finetuning", "keypoints", "pca" and "retrieval".
25
    """
26
    def __init__(self, experiment_name, stages, dataset, validation_split=0.2, batch_size=8, num_workers=4, epochs=30,
27
                 learning_rate=0.008, learning_rate_gamma=0.5, learning_rate_step_size=10, weight_decay=0.0001,
28
                 load_from=None, target_layer="layer3", use_l2_normalization=True):
29 30
        print("Preparing experiment:")
        # check if desired stages are in valid format
31 32 33 34
        assert isinstance(stages, set), f"stages must be provided as set! E.g. {'finetuning', 'keypoints'}"
        assert stages.issubset({'finetuning', 'keypoints', 'pca', 'retrieval'}),\
            "stages must be subset of {'finetuning', 'keypoints', 'pca', 'retrieval'}"
        self.stages = stages
35
        print(f"Experiments contains following stages: {self.stages}")
36 37
        self.center_crop_size = {"finetuning": CENTER_CROP_SIZE_FINETUNING, "keypoints": CENTER_CROP_SIZE_KEYPOINT}
        self.random_crop_size = {"finetuning": RANDOM_CROP_SITE_FINETUNING, "keypoints": RANDOM_CROP_SITE_KEYPOINT}
38
        # set the cuda device
39
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
40
        # create directory for the experiment
41 42 43
        self.experiment_path = Path.joinpath(Path("Experiments"), Path(experiment_name))
        if not self.experiment_path.is_dir():
            self.experiment_path.mkdir(parents=True)
44 45 46
        print(f"Results will be saved at {self.experiment_path}")
        # check if the provided parameters are in valid format and convert to dict if given as single value
        print(f"Checking validity of your input parameters!")
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
        self.validation_split = self.check_and_parse_parameter(parameter=validation_split,
                                                               parameter_name="validation_split",
                                                               required_type=float,
                                                               required_in_stages={"finetuning", "keypoints"},
                                                               min_val=0,
                                                               max_val=1
                                                               )
        self.batch_size = self.check_and_parse_parameter(parameter=batch_size,
                                                         parameter_name="batch_size",
                                                         required_type=int,
                                                         required_in_stages={"finetuning", "keypoints"},
                                                         min_val=1
                                                         )
        self.num_workers = self.check_and_parse_parameter(parameter=num_workers,
                                                          parameter_name="num_workers",
                                                          required_type=int,
                                                          required_in_stages={"finetuning", "keypoints"},
                                                          min_val=1,
                                                          )
        self.epochs = self.check_and_parse_parameter(parameter=epochs,
                                                     parameter_name="epochs",
                                                     required_type=int,
                                                     required_in_stages={"finetuning", "keypoints"},
                                                     min_val=1
                                                     )
        self.learning_rate = self.check_and_parse_parameter(parameter=learning_rate,
                                                            parameter_name="learning_rate",
                                                            required_type=float,
                                                            required_in_stages={"finetuning", "keypoints"},
                                                            min_val=0.001
                                                            )
        self.learning_rate_gamma = self.check_and_parse_parameter(parameter=learning_rate_gamma,
                                                                  parameter_name="learning_rate_gamma",
                                                                  required_type=float,
                                                                  required_in_stages={"finetuning", "keypoints"},
                                                                  min_val=0.1,
                                                                  max_val=1.0
                                                                  )
        self.learning_rate_step_size = self.check_and_parse_parameter(parameter=learning_rate_step_size,
                                                                      parameter_name="learning_rate_step_size",
                                                                      required_type=int,
                                                                      required_in_stages={"finetuning", "keypoints"},
                                                                      min_val=1
                                                                      )
        self.weight_decay = self.check_and_parse_parameter(parameter=weight_decay,
                                                           parameter_name="weight_decay",
                                                           required_type=float,
                                                           required_in_stages={"finetuning", "keypoints"},
                                                           min_val=0.0
                                                           )
97 98 99
        self.target_layer = check_experiment_wide_parameter(target_layer, "target_layer", str, ["layer3", "layer4"])
        self.use_l2_normalization = check_experiment_wide_parameter(use_l2_normalization, "use_l2_normalizazion",
                                                                    bool, [True, False])
100 101 102 103 104
        self.dataset = self.check_and_parse_parameter(parameter=dataset,
                                                      parameter_name="dataset",
                                                      required_type=str,
                                                      required_in_stages={"finetuning", "keypoints", "pca", "retrieval"}
                                                      )
105
        # convert dataset path strings to proper paths
106
        self.dataset = {stage: Path(self.dataset[stage])for stage in self.dataset.keys()}
107
        # ensure dataset paths exist
108 109
        for stage in self.dataset.keys():
            assert self.dataset[stage].is_dir()
110 111 112
        print("All dataset directories found!")
        # create load and save directories for the created/ used models. Automatically infer paths if multiple stages
        # are trained in sequence
113 114
        self.load_paths, self.save_paths = self.get_load_and_save_paths(load_from)
        # TODO if retrieval in self.stages and pca not in self.stages: assert pca file is available
115

116 117 118 119
        if "finetuning" in self.stages:
            self.train_model("finetuning")
        if "keypoints" in self.stages:
            self.train_model("keypoints")
120

121
    def get_load_and_save_paths(self, load_from):
122 123 124 125 126 127 128
        """
        Uses load_from parameter and information an previously run stages to find load paths for needed models.
        Also determines the save paths for the models after completing a training phase.
        :param load_from:
        :return: load_paths, save_paths
        """
        # determine selected stages that need a model to load parameters from
129
        required_load_paths = self.stages.intersection({"keypoints", "pca", "retrieval"})
130
        # create or load load_paths container
131
        if load_from is not None:
132
            assert isinstance(load_from, dict), "if load from is defined it must be given as a dict"
133 134 135 136 137 138
            load_paths = {stage: Path(load_from[stage]) for stage in load_from.keys()}
        else:
            load_paths = {}
        stage_base_path = {}
        model_save_paths = {}
        for stage in self.stages:
139
            # create base directory for current stage
140 141 142 143 144
            base_path = Path.joinpath(self.experiment_path, Path(stage))
            if not base_path.is_dir():
                base_path.mkdir()
            stage_base_path[stage] = base_path
        if "finetuning" in self.stages:
145
            # models get a unique identifier as a name base
146 147 148
            model_save_paths["finetuning"] = Path.joinpath(stage_base_path["finetuning"],
                                                           Path(f"{uuid.uuid4().__str__()[:8]}.pth"))
            load_paths["finetuning"] = None
149
            # if we continue with keypoints training we will use the model just created during finetuning
150 151 152 153 154 155 156
            if "keypoints" in required_load_paths:
                assert "keypoints" not in load_paths.keys(),\
                    "running finetuning and keypoints in the same experiment assumes usage of the newly created" \
                    " finetune model! Do not provide a load path for keypoint training!"
                load_paths["keypoints"] = model_save_paths["finetuning"]
        if "keypoints" in self.stages:
            model_name = load_paths["keypoints"].stem
157
            # for the keypoint training the model name is simple extended, so we know what model it belongs to
158 159
            model_save_paths["keypoints"] = Path.joinpath(stage_base_path["keypoints"],
                                                          Path(f"{model_name}_{uuid.uuid4().__str__()[:8]}.pth"))
160
            # pca and retrieval must reuse this model if run in the same experiment
161 162 163 164 165 166 167 168 169 170 171
            if "pca" in required_load_paths:
                assert "pca" not in load_paths.keys(), "running keypoints and pca in the same experiment assumes" \
                                                       " usage of the newly created keypoint model! Do not provide a " \
                                                       "load path for keypoint training!"
                load_paths["pca"] = model_save_paths["keypoints"]
            if "retrieval" in required_load_paths:
                assert "retrieval" not in load_paths.keys(), "running keypoints and retrieval in the same experiment" \
                                                             " assumes usage of the newly created retrieval model!" \
                                                             " Do not provide a load path for keypoint training!"
                load_paths["retrieval"] = model_save_paths["keypoints"]
        if "pca" in self.stages:
172
            # pca keeps the model name with a different file extension as the result should be deterministic
173 174 175 176 177 178
            model_name = load_paths["pca"].stem
            model_save_paths["pca"] = Path.joinpath(stage_base_path["pca"], Path(f"{model_name}.pca"))
            if "retrieval" in required_load_paths:
                assert load_paths["pca"] == load_paths["retrieval"], "pca and retrieval must load from the same" \
                                                                     " keypoints model when done in the same " \
                                                                     "experiment!"
179
        # check if all required load_paths are given or inferred at this point
180 181 182 183 184 185
        if "keypoints" in required_load_paths and "finetuning" not in self.stages:
            assert load_paths["keypoints"].is_file(), f"could not find finetune model at {load_paths['keypoints']}"
        if "pca" in required_load_paths and "keypoints" not in self.stages:
            assert load_paths["pca"].is_file(), f"could not find keypoints model at {load_paths['pca']}"
        if "retrieval" in required_load_paths and "keypoints" not in self.stages:
            assert load_paths["retrieval"].is_file(), f"could not find keypoints model at {load_paths['retrieval']}"
186 187 188 189 190
        print("Load and save paths are valid!")
        for stage in load_paths.keys():
            print(f"In stage {stage} loading model from {load_paths[stage]}")
        for stage in model_save_paths.keys():
            print(f"In stage {stage} saving model to {model_save_paths[stage]}")
191 192 193 194
        return load_paths, model_save_paths

    def check_and_parse_parameter(self, parameter, parameter_name, required_type, required_in_stages,
                                  min_val=None, max_val=None):
195 196 197 198 199 200 201 202 203 204
        """
        checks if a parameter is of the correct type and in bounds. Creates a dict if a single values is given.
        :param parameter:
        :param parameter_name:
        :param required_type:
        :param required_in_stages:
        :param min_val:
        :param max_val:
        :return:
        """
205 206 207 208 209 210 211 212 213 214 215 216 217 218
        required_in_stages = self.stages.intersection(required_in_stages)
        if isinstance(parameter, dict):
            assert required_in_stages.issubset(parameter.keys()),\
                f"{parameter_name} must be provided for {required_in_stages} but was only given for {parameter.keys()}"
        else:
            parameter = {stage: parameter for stage in required_in_stages}
        assert all([isinstance(parameter_instance, required_type) for parameter_instance in parameter.values()]), \
            f"{parameter_name} must be {required_type} or dict of {required_type}"
        if min_val is not None:
            assert all([val >= min_val for val in parameter.values()]),\
                f"{parameter_name} must be larger or equal to {min_val}"
        if max_val is not None:
            assert all([val <= max_val for val in parameter.values()]),\
                f"{parameter_name} must be smaller or equal to {max_val}"
219 220 221
        print(f"Values for {parameter_name} are in valid format")
        for stage in parameter.keys():
            print(f"Using {parameter[stage]} at stage {stage}")
222 223 224
        return parameter

    def train_model(self, stage):
225
        print("Creating data loaders")
226 227 228 229
        training_loader, validation_loader = get_data_loaders(self.dataset[stage], self.validation_split[stage],
                                                              self.batch_size[stage], self.num_workers[stage],
                                                              self.center_crop_size[stage],
                                                              self.random_crop_size[stage])
230
        print(f"Creating {stage}-model")
231 232
        model = Delf(len(training_loader.dataset.dataset.classes), stage, self.load_paths[stage],
                     target_layer=self.target_layer, use_l2_normalization=self.use_l2_normalization).to(self.device)
233
        print(f"Starting {stage} training.")
234 235 236
        min_validation_loss = np.inf
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
237 238
                                    lr=self.learning_rate[stage],
                                    weight_decay=self.weight_decay[stage])
239 240
        learning_rate_scheduler = \
            torch.optim.lr_scheduler.StepLR(optimizer=optimizer,
241 242 243
                                            step_size=self.learning_rate_step_size[stage],
                                            gamma=self.learning_rate_gamma[stage])
        model_path = self.save_paths[stage]
244 245 246
        start = time.time()
        val_accuracy_log = []
        val_loss_log = []
247
        for epoch in range(self.epochs[stage]):
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
            # training
            model.train()
            running_loss = 0.0
            for i, batch in enumerate(training_loader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = batch[0].to(self.device), batch[1].to(self.device)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward + backward + optimize
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                # print statistics
                running_loss += loss.item()
263 264 265 266
                if i % BATCH_PRINT_FREQ == BATCH_PRINT_FREQ-1:  # print every BATCH_PRINT_FREQ
                    print(f'[{epoch}: {i + 1}/{len(training_loader)}] loss: {running_loss/BATCH_PRINT_FREQ:.3f}'
                          f' learning rate: {learning_rate_scheduler.get_lr()[0]}'
                          f' total time: {format_time(time.time()-start)}')
267 268 269
                    running_loss = 0.0
            # validation
            model.eval()
270
            print("Running Validation")
271 272
            with torch.no_grad():
                average_validation_loss = 0.0
273 274
                total = 0
                correct = 0
275 276 277
                running_total = 0
                running_correct = 0
                running_loss = 0.0
278 279 280
                for i, batch in enumerate(validation_loader, 0):
                    inputs, labels = batch[0].to(self.device), batch[1].to(self.device)
                    outputs = model(inputs)
281
                    predicted = torch.argmax(outputs.data, 1)
282 283 284 285 286 287 288 289 290 291 292 293 294 295
                    batch_validation_loss = criterion(outputs, labels)
                    batch_correct = (predicted == labels).sum().item()

                    running_total += labels.size(0)
                    running_correct += batch_correct
                    running_loss += batch_validation_loss

                    if i % BATCH_PRINT_FREQ == BATCH_PRINT_FREQ-1:
                        print(f'[{epoch}: {i + 1}/{len(validation_loader)}] loss: {running_loss / BATCH_PRINT_FREQ:.3f}'
                              f' batch accuracy: {running_correct/running_total:.2f}'
                              f' total time: {format_time(time.time()-start)}')
                        running_total = 0
                        running_correct = 0
                        running_loss = 0.0
296
                    total += labels.size(0)
297 298 299
                    correct += batch_correct
                    average_validation_loss += batch_validation_loss

300
                average_validation_loss /= (len(validation_loader))
301 302 303 304 305
                accuracy = correct/total
                val_accuracy_log.append(accuracy)
                val_loss_log.append(average_validation_loss.item())
                print(f"Validation complete. Average validation loss: {average_validation_loss:.3f}"
                      f" Validation accuracy: {accuracy:.2f}")
306 307 308
            # check for validation loss improvement
            if average_validation_loss < min_validation_loss:
                min_validation_loss = average_validation_loss
309
                print(f"new best epoch! saving model to {model_path}")
310
                # create checkpoint of model with lowest validation loss so far
311
                torch.save(model.get_state_for_saving(), model_path)
312
            # learning rate scheduler is slightly buggy in this version of torch but should not matter for training.
313
            learning_rate_scheduler.step()
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
        log_path = Path.joinpath(model_path.parent, Path(f"{model_path.stem}_log.json"))
        log = {"Experiment": self.experiment_path.stem.__str__(),
               "Stage": stage,
               "Dataset": self.dataset[stage].__str__(),
               "Learning_Rate:": self.learning_rate[stage],
               "Learning_Rate_Gamma:": self.learning_rate_gamma[stage],
               "Learning_Rate_Step_Size:": self.learning_rate_step_size[stage],
               "Weight_Decay": self.weight_decay[stage],
               "Epochs": self.epochs[stage],
               "Validation_Accuracy": val_accuracy_log,
               "Validation_Loss": val_loss_log
               }
        with log_path.open('w', encoding='utf-8') as json_file:
            json.dump(log, json_file, ensure_ascii=False)
        print(f"Completed {stage} training!")

330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    def perform_retrieval(self, model_path, index_dataset, query_dataset, pca_load=None, pca_dataset=None, pca_save=None):
        # load model in retrieval mode
        model = Delf(None, "retrieval", model_path).to(self.device)
        model.eval()
        # used to check if pca has been provided or is calculated on dedicated data
        use_index_for_pca = False
        pca = None
        with torch.no_grad():
            if pca_load:
                # load pca from pickle file
                pca_load = Path(pca_load)
                pca = pickle.load(pca_load.open("rb"))
                print(f"loaded pca matrix from {pca_load}")
            elif pca_dataset:
                print(f"calculating pca matrix on {pca_dataset}")
                accumulated_features = None
                pca_dataset = Path(pca_dataset)
                pca_data_loader = get_path_data_loader(pca_dataset)
                # iterate over dataset
                for ind, batch in enumerate(pca_data_loader):
                    # get img tensor
                    image_input = batch[0].to(self.device)
                    # get features, ignore boxes
                    features, _ = model.extract_features(image_input)
                    # accumulate all features in one container
                    if accumulated_features is None:
                        accumulated_features = features
                    else:
                        accumulated_features = torch.cat((accumulated_features, features), dim=0)
                    print(f"[{ind+1}/{len(pca_data_loader)}] gathering features for pca:"
                          f" {accumulated_features.size()[0]} features gathered")
                # Delf recommends normalizing features before pca
                accumulated_features = torch.nn.functional.normalize(accumulated_features, p=2, dim=1)
                # sckit pca needs numpy array as input
                accumulated_features = accumulated_features.numpy()
                # calculate pca matrix and save to file
                pca = calculate_pca(accumulated_features, Path(pca_save))
            else:
                # if we do not have a pca matrix yet, use the index set
                print(f"calculating pca matrix on {index_dataset}")
                use_index_for_pca = True
            # container for all the gathered index information
            index_extraction_list = []
            if use_index_for_pca:
                # accumulator for pca
                accumulated_features = None
            # iterate over dataset
            index_dataset = Path(index_dataset)
            index_data_loader = get_path_data_loader(index_dataset)
            for ind, batch in enumerate(index_data_loader):
                # get all image info from loader
                image_input, label, path = batch
                image_input = image_input.to(self.device)
                # get features and rf info from model
384
                features, rf_centers = model.extract_features(image_input)
385 386 387 388 389 390
                # Delf says normalize before pca
                features = torch.nn.functional.normalize(features, p=2, dim=1)
                if pca:
                    # if we already have pca calculated we can apply it now and save space
                    features = pca.transform(features.numpy())
                # create data entry for the image
391
                index_extraction_list.append((Path(path[0]).stem, label, features, rf_centers.numpy()))
392 393 394 395 396 397 398 399 400 401 402 403
                if use_index_for_pca:
                    # if we still have to calculate pca: accumulate features
                    if accumulated_features is None:
                        accumulated_features = features
                    else:
                        accumulated_features = torch.cat((accumulated_features, features), dim=0)
                print(f"[{ind + 1}/{len(index_data_loader)}] gathering index features")
            if use_index_for_pca:
                # calculate pca and apply it to already extracted features if we have not done so already
                pca = calculate_pca(accumulated_features, Path("pca.pkl"))
                index_extraction_list = [(entry[0], entry[1], pca.transform(entry[2].numpy()), entry[3]) for entry in index_extraction_list]

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
            query_dataset = Path(query_dataset)
            query_data_loader = get_path_data_loader(query_dataset)
            for ind, batch in enumerate(query_data_loader):
                # get all image info from loader
                image_input, label, path = batch
                filename = Path(path[0]).stem
                image_input = image_input.to(self.device)
                # get features and rf info from model
                features, rf_centers = model.extract_features(image_input)
                # Delf says normalize before pca
                features = torch.nn.functional.normalize(features, p=2, dim=1)
                # apply pca
                features = pca.transform(features.numpy())
                rf_centers = rf_centers.numpy()
                now= time.time()
                for index_representation in index_extraction_list:
                    inliers = score_match(index_representation[2], features, index_representation[3], rf_centers)
                    print(f"query image {filename}, index image{index_representation[0]} inliers {inliers}")
                print(time.time()-now)
423 424 425 426 427 428 429 430 431 432 433

def calculate_pca(data, save_path=None):
    pca = PCA(n_components=PCA_COMPONENTS, whiten=True)
    pca.fit(data)
    print(f"calculated pca matrix. Explained variance is {sum(pca.explained_variance_ratio_):.2f}"
          f" over {pca.n_components} components")
    if save_path:
        print(f"saving pca data to {save_path}")
        pickle.dump(pca, save_path.open("wb"))
    return pca

434

435 436 437 438
def format_time(seconds):
    minutes, seconds = divmod(seconds.__int__(), 60)
    hours, minutes = divmod(minutes, 60)
    return f"[{hours:02d}:{minutes:02d}:{seconds:02d}]"
439

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458

def check_experiment_wide_parameter(parameter, parameter_name, required_type, allowed_values=None):
    if isinstance(parameter, dict):
        assert parameter, f"Value of {parameter_name} was empty dict. This is an experiment wide parameter," \
                          f" provide a single values of type {required_type} instead"
        expected_value = next(iter(parameter.values()))
        assert all(value == expected_value for value in parameter.values()), f"Value for {parameter_name} " \
                                                                             f"muss be same for all stages," \
                                                                             f" but was not. Provide single " \
                                                                             f"value instead"
        print(f"{parameter_name} should be given as single value and was converted accordingly")
        parameter = expected_value
    assert isinstance(parameter, required_type), f"{parameter_name} must be of type {required_type}"
    if allowed_values is not None:
        assert parameter in allowed_values, f"Value for {parameter_name} must be in {allowed_values}"
    print(f"Using {parameter} for {parameter_name}")
    return parameter


459
torch.backends.cudnn.benchmark = True
460
exp = ExperimentManager("variable_target_layer", {"retrieval"}, "../../Datasets/Oxford/index", load_from={"retrieval":"../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth"}).perform_retrieval("../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth", "../../Datasets/Oxford/index", "../../Datasets/Oxford/query", pca_load="pca.pkl")
461 462
#exp = ExperimentManager("variable target layer", {"finetuning","keypoints"}, "../Datasets/Landmarks", epochs=1)
#exp = ExperimentManager("variable target layer", {"keypoints"}, "../Datasets/Landmarks", epochs=1, load_from={"keypoints":"Experiments/variable target layer/finetuning/5db43e8d.pth"})