- put retrieval into experiment manager structure

keypoint training is weirdly performing much worse now?

Todos: matching parameters (thresholds, trails etc. need estimation)
process output information
parent d6469a27
from Training.delf import Delf
from Training.dataloader import get_data_loaders, get_path_data_loader
from sklearn.decomposition import PCA
from pathlib import Path
from Training.utils import score_match, visualize_match
from Training.utils import score_match, visualize_match, calculate_pca, format_time
import torch
import numpy as np
import uuid
......@@ -13,7 +12,6 @@ CENTER_CROP_SIZE_FINETUNING = 250
RANDOM_CROP_SITE_FINETUNING = 224
CENTER_CROP_SIZE_KEYPOINT = 900
RANDOM_CROP_SITE_KEYPOINT = 720
PCA_COMPONENTS = 40
BATCH_PRINT_FREQ = 20
......@@ -25,12 +23,12 @@ class ExperimentManager:
"""
def __init__(self, experiment_name, stages, dataset, validation_split=0.2, batch_size=8, num_workers=4, epochs=30,
learning_rate=0.008, learning_rate_gamma=0.5, learning_rate_step_size=10, weight_decay=0.0001,
load_from=None, target_layer="layer3", use_l2_normalization=True):
load_from=None, target_layer="layer3", use_l2_normalization=True, pca_dataset=None, pca_load=None):
print("Preparing experiment:")
# check if desired stages are in valid format
assert isinstance(stages, set), f"stages must be provided as set! E.g. {'finetuning', 'keypoints'}"
assert stages.issubset({'finetuning', 'keypoints', 'pca', 'retrieval'}),\
"stages must be subset of {'finetuning', 'keypoints', 'pca', 'retrieval'}"
assert stages.issubset({'finetuning', 'keypoints', 'retrieval'}),\
"stages must be subset of {'finetuning', 'keypoints', 'retrieval'}"
self.stages = stages
print(f"Experiments contains following stages: {self.stages}")
self.center_crop_size = {"finetuning": CENTER_CROP_SIZE_FINETUNING, "keypoints": CENTER_CROP_SIZE_KEYPOINT}
......@@ -38,7 +36,7 @@ class ExperimentManager:
# set the cuda device
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# create directory for the experiment
self.experiment_path = Path.joinpath(Path("Experiments"), Path(experiment_name))
self.experiment_path = Path.joinpath(Path("../Experiments"), Path(experiment_name))
if not self.experiment_path.is_dir():
self.experiment_path.mkdir(parents=True)
print(f"Results will be saved at {self.experiment_path}")
......@@ -60,7 +58,7 @@ class ExperimentManager:
self.num_workers = self.check_and_parse_parameter(parameter=num_workers,
parameter_name="num_workers",
required_type=int,
required_in_stages={"finetuning", "keypoints"},
required_in_stages={"finetuning", "keypoints", "retrieval"},
min_val=1,
)
self.epochs = self.check_and_parse_parameter(parameter=epochs,
......@@ -100,13 +98,33 @@ class ExperimentManager:
self.dataset = self.check_and_parse_parameter(parameter=dataset,
parameter_name="dataset",
required_type=str,
required_in_stages={"finetuning", "keypoints", "pca", "retrieval"}
required_in_stages={"finetuning", "keypoints", "retrieval"}
)
self.pca_dataset = pca_dataset
self.pca_load = pca_load
if "retrieval" in self.stages:
if self.pca_dataset is not None:
self.pca_dataset = Path(pca_dataset)
assert self.pca_dataset.is_dir(), f"Dataset for pca calculation at {self.pca_dataset} not found!"
print(f"PCA is calculated on dedicated dataset at {self.pca_dataset}")
assert self.pca_load is None, "pca_dataset is set so pca_load should be None!"
elif pca_load is not None:
self.pca_load = Path(pca_load)
assert self.pca_load.is_file(), f"Did not find pca file at {self.pca_load}!"
print(f"Using pre-calculated pca matrix: {self.pca_load}")
else:
print(f"PCA is calculated on index part of retrieval dataset at {self.dataset['retrieval']}")
# convert dataset path strings to proper paths
self.dataset = {stage: Path(self.dataset[stage])for stage in self.dataset.keys()}
# ensure dataset paths exist
for stage in self.dataset.keys():
assert self.dataset[stage].is_dir()
assert self.dataset[stage].is_dir(), f"Dataset {self.dataset[stage]} for {stage} not found!"
if stage == "retrieval":
assert self.dataset[stage].joinpath(Path("index")).is_dir(), f"Dataset for retrieval must contain " \
f"index subdirectory"
assert self.dataset[stage].joinpath(Path("query")).is_dir(), f"Dataset for retrieval must contain " \
f"query subdirectory"
print("All dataset directories found!")
# create load and save directories for the created/ used models. Automatically infer paths if multiple stages
# are trained in sequence
......@@ -117,6 +135,8 @@ class ExperimentManager:
self.train_model("finetuning")
if "keypoints" in self.stages:
self.train_model("keypoints")
if "retrieval" in self.stages:
self.perform_retrieval()
def get_load_and_save_paths(self, load_from):
"""
......@@ -126,7 +146,7 @@ class ExperimentManager:
:return: load_paths, save_paths
"""
# determine selected stages that need a model to load parameters from
required_load_paths = self.stages.intersection({"keypoints", "pca", "retrieval"})
required_load_paths = self.stages.intersection({"keypoints", "retrieval"})
# create or load load_paths container
if load_from is not None:
assert isinstance(load_from, dict), "if load from is defined it must be given as a dict"
......@@ -157,30 +177,16 @@ class ExperimentManager:
# for the keypoint training the model name is simple extended, so we know what model it belongs to
model_save_paths["keypoints"] = Path.joinpath(stage_base_path["keypoints"],
Path(f"{model_name}_{uuid.uuid4().__str__()[:8]}.pth"))
# pca and retrieval must reuse this model if run in the same experiment
if "pca" in required_load_paths:
assert "pca" not in load_paths.keys(), "running keypoints and pca in the same experiment assumes" \
" usage of the newly created keypoint model! Do not provide a " \
"load path for keypoint training!"
load_paths["pca"] = model_save_paths["keypoints"]
# retrieval must reuse this model if run in the same experiment
if "retrieval" in required_load_paths:
assert "retrieval" not in load_paths.keys(), "running keypoints and retrieval in the same experiment" \
" assumes usage of the newly created retrieval model!" \
" Do not provide a load path for keypoint training!"
load_paths["retrieval"] = model_save_paths["keypoints"]
if "pca" in self.stages:
# pca keeps the model name with a different file extension as the result should be deterministic
model_name = load_paths["pca"].stem
model_save_paths["pca"] = Path.joinpath(stage_base_path["pca"], Path(f"{model_name}.pca"))
if "retrieval" in required_load_paths:
assert load_paths["pca"] == load_paths["retrieval"], "pca and retrieval must load from the same" \
" keypoints model when done in the same " \
"experiment!"
# check if all required load_paths are given or inferred at this point
if "keypoints" in required_load_paths and "finetuning" not in self.stages:
assert load_paths["keypoints"].is_file(), f"could not find finetune model at {load_paths['keypoints']}"
if "pca" in required_load_paths and "keypoints" not in self.stages:
assert load_paths["pca"].is_file(), f"could not find keypoints model at {load_paths['pca']}"
if "retrieval" in required_load_paths and "keypoints" not in self.stages:
assert load_paths["retrieval"].is_file(), f"could not find keypoints model at {load_paths['retrieval']}"
print("Load and save paths are valid!")
......@@ -327,24 +333,27 @@ class ExperimentManager:
json.dump(log, json_file, ensure_ascii=False)
print(f"Completed {stage} training!")
def perform_retrieval(self, model_path, index_dataset, query_dataset, pca_load=None, pca_dataset=None, pca_save=None):
def perform_retrieval(self):
# load model in retrieval mode
model = Delf(None, "retrieval", model_path).to(self.device)
model = Delf(None, "retrieval", self.load_paths["retrieval"]).to(self.device)
model.eval()
# create dataset paths
index_dataset = self.dataset["retrieval"].joinpath(Path("index"))
query_dataset = self.dataset["retrieval"].joinpath(Path("query"))
# create pca save path
stage_path = self.experiment_path.joinpath(Path("retrieval"))
# used to check if pca has been provided or is calculated on dedicated data
use_index_for_pca = False
pca = None
with torch.no_grad():
if pca_load:
if self.pca_load:
# load pca from pickle file
pca_load = Path(pca_load)
pca = pickle.load(pca_load.open("rb"))
print(f"loaded pca matrix from {pca_load}")
elif pca_dataset:
print(f"calculating pca matrix on {pca_dataset}")
pca = pickle.load(self.pca_load.open("rb"))
print(f"loaded pca matrix from {self.pca_load}")
elif self.pca_dataset:
print(f"calculating pca matrix on {self.pca_dataset}")
accumulated_features = None
pca_dataset = Path(pca_dataset)
pca_data_loader = get_path_data_loader(pca_dataset)
pca_data_loader = get_path_data_loader(self.pca_dataset, num_workers=self.num_workers["retrieval"])
# iterate over dataset
for ind, batch in enumerate(pca_data_loader):
# get img tensor
......@@ -363,7 +372,9 @@ class ExperimentManager:
# sckit pca needs numpy array as input
accumulated_features = accumulated_features.numpy()
# calculate pca matrix and save to file
pca = calculate_pca(accumulated_features, Path(pca_save))
# filename is keypoint_model_dataset.pca
pca = calculate_pca(accumulated_features, stage_path.joinpath(Path(
f"{self.load_paths['retrieval'].stem}_{self.pca_dataset.stem}.pca")))
else:
# if we do not have a pca matrix yet, use the index set
print(f"calculating pca matrix on {index_dataset}")
......@@ -375,7 +386,7 @@ class ExperimentManager:
accumulated_features = None
# iterate over dataset
index_dataset = Path(index_dataset)
index_data_loader = get_path_data_loader(index_dataset)
index_data_loader = get_path_data_loader(index_dataset, num_workers=self.num_workers["retrieval"])
for ind, batch in enumerate(index_data_loader):
# get all image info from loader
image_input, label, path = batch
......@@ -398,11 +409,12 @@ class ExperimentManager:
print(f"[{ind + 1}/{len(index_data_loader)}] gathering index features")
if use_index_for_pca:
# calculate pca and apply it to already extracted features if we have not done so already
pca = calculate_pca(accumulated_features, Path("pca.pkl"))
pca = calculate_pca(accumulated_features, stage_path.joinpath(
f"{self.load_paths['retrieval'].stem}_{self.dataset['retrieval'].stem}.pca"))
index_extraction_list = [(entry[0], entry[1], pca.transform(entry[2].numpy()), entry[3]) for entry in index_extraction_list]
query_dataset = Path(query_dataset)
query_data_loader = get_path_data_loader(query_dataset)
query_data_loader = get_path_data_loader(query_dataset, num_workers=self.num_workers["retrieval"])
for ind, batch in enumerate(query_data_loader):
# get all image info from loader
image_input, label, path = batch
......@@ -434,23 +446,6 @@ class ExperimentManager:
visualize_match(matching_results[i][3],path[0],matching_results[i][1], matching_results[i][2], matching_results[i][0])
def calculate_pca(data, save_path=None):
pca = PCA(n_components=PCA_COMPONENTS, whiten=True)
pca.fit(data)
print(f"calculated pca matrix. Explained variance is {sum(pca.explained_variance_ratio_):.2f}"
f" over {pca.n_components} components")
if save_path:
print(f"saving pca data to {save_path}")
pickle.dump(pca, save_path.open("wb"))
return pca
def format_time(seconds):
minutes, seconds = divmod(seconds.__int__(), 60)
hours, minutes = divmod(minutes, 60)
return f"[{hours:02d}:{minutes:02d}:{seconds:02d}]"
def check_experiment_wide_parameter(parameter, parameter_name, required_type, allowed_values=None):
if isinstance(parameter, dict):
assert parameter, f"Value of {parameter_name} was empty dict. This is an experiment wide parameter," \
......@@ -470,6 +465,7 @@ def check_experiment_wide_parameter(parameter, parameter_name, required_type, al
torch.backends.cudnn.benchmark = True
exp = ExperimentManager("variable_target_layer", {"retrieval"}, "../../Datasets/Oxford/index", load_from={"retrieval":"../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth"}).perform_retrieval("../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth", "../../Datasets/Oxford/index", "../../Datasets/Oxford/query", pca_load="pca.pkl")
#exp = ExperimentManager("variable target layer", {"finetuning","keypoints"}, "../Datasets/Landmarks", epochs=1)
#exp = ExperimentManager("variable target layer", {"keypoints"}, "../Datasets/Landmarks", epochs=1, load_from={"keypoints":"Experiments/variable target layer/finetuning/5db43e8d.pth"})
#exp = ExperimentManager("variable target layer", {"retrieval"}, "../../Datasets/Oxford/index", load_from={"retrieval":"../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth"}).perform_retrieval("../Experiments/variable target layer/keypoints/5db43e8d_dbb65c50.pth", "../../Datasets/Oxford/index", "../../Datasets/Oxford/query", pca_load="pca.pkl")
#exp = ExperimentManager("all_stages", {"finetuning","keypoints","retrieval"}, {"finetuning": "../../Datasets/Landmarks", "keypoints": "../../Datasets/Landmarks", "retrieval": "../../Datasets/Oxford"}, epochs=5)
#exp = ExperimentManager("all_stages", {"keypoints","retrieval"}, {"finetuning": "../../Datasets/Landmarks", "keypoints":"../../Datasets/Landmarks", "retrieval": "../../Datasets/Oxford"}, epochs=1, load_from={"keypoints":"../Experiments/all_stages/keypoints/c4ad755b_9c629910.pth"})
exp = ExperimentManager("variable target layer", {"keypoints"}, "../../Datasets/Landmarks", epochs=1, load_from={"keypoints":"../Experiments/variable target layer/finetuning/5db43e8d.pth"})
import torch
import torchvision
import pickle
import numpy as np
from sklearn.decomposition import PCA
from skimage.measure import ransac
from scipy.spatial import cKDTree
from skimage.transform import AffineTransform
......@@ -11,6 +13,8 @@ import matplotlib.pyplot as plt
SCALES = [2, 1.4142, 1, 0.7071, 0.5, 0.3536, 0.25]
RF_VALUES = {"layer3": (267, 16, 133), "layer4": (427, 32, 213)}
PCA_COMPONENTS = 40
KD_TREE_DISTANCE_THRESHOLD = 4.5 # has to be evaluated
RANSAC_MIN_SAMPLES = 3
RANSAC_NUM_TRAILS = 1000
......@@ -136,3 +140,20 @@ def visualize_match(index_image_path, query_image_path, index_points, query_poin
plot_matches(ax, index_image, query_image, index_points, query_points, match_pairs)
plt.savefig(filepath, dpi=300)
plt.close(fig)
def calculate_pca(data, save_path=None):
pca = PCA(n_components=PCA_COMPONENTS, whiten=True)
pca.fit(data)
print(f"calculated pca matrix. Explained variance is {sum(pca.explained_variance_ratio_):.2f}"
f" over {pca.n_components} components")
if save_path:
print(f"saving pca data to {save_path}")
pickle.dump(pca, save_path.open("wb"))
return pca
def format_time(seconds):
minutes, seconds = divmod(seconds.__int__(), 60)
hours, minutes = divmod(minutes, 60)
return f"[{hours:02d}:{minutes:02d}:{seconds:02d}]"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment