- added option to log pca statistics

Todos:
process output information
parent 8c4fb566
......@@ -24,7 +24,8 @@ class ExperimentManager:
"""
def __init__(self, experiment_name, stages, dataset, validation_split=0.2, batch_size=8, num_workers=4, epochs=30,
learning_rate=0.008, learning_rate_gamma=0.5, learning_rate_step_size=10, weight_decay=0.0001,
load_from=None, target_layer="layer3", use_l2_normalization=True, pca_dataset=None, pca_load=None):
load_from=None, target_layer="layer3", use_l2_normalization=True, pca_dataset=None, pca_load=None,
pca_log=False):
print(f"Current working directory is {Path.cwd()}")
print(f"Running on CUDA:{torch.cuda.is_available()}")
print("Preparing experiment:")
......@@ -105,6 +106,7 @@ class ExperimentManager:
)
self.pca_dataset = pca_dataset
self.pca_load = pca_load
self.pca_log = pca_log
if "retrieval" in self.stages:
if self.pca_dataset is not None:
self.pca_dataset = Path(pca_dataset)
......@@ -132,7 +134,6 @@ class ExperimentManager:
# create load and save directories for the created/ used models. Automatically infer paths if multiple stages
# are trained in sequence
self.load_paths, self.save_paths = self.get_load_and_save_paths(load_from)
# TODO if retrieval in self.stages and pca not in self.stages: assert pca file is available
if "finetuning" in self.stages:
self.train_model("finetuning")
......@@ -380,7 +381,7 @@ class ExperimentManager:
# calculate pca matrix and save to file
# filename is keypoint_model_dataset.pca
pca = calculate_pca(accumulated_features, stage_path.joinpath(Path(
f"{self.load_paths['retrieval'].stem}_{self.pca_dataset.stem}.pca")))
f"{self.load_paths['retrieval'].stem}_{self.pca_dataset.stem}.pca")), self.pca_log)
else:
# if we do not have a pca matrix yet, use the index set
print(f"calculating pca matrix on {index_dataset}")
......@@ -416,7 +417,7 @@ class ExperimentManager:
if use_index_for_pca:
# calculate pca and apply it to already extracted features if we have not done so already
pca = calculate_pca(accumulated_features, stage_path.joinpath(
f"{self.load_paths['retrieval'].stem}_{self.dataset['retrieval'].stem}.pca"))
f"{self.load_paths['retrieval'].stem}_{self.dataset['retrieval'].stem}.pca"), self.pca_log)
index_extraction_list = [(entry[0], entry[1], pca.transform(entry[2].numpy()), entry[3]) for entry in index_extraction_list]
query_dataset = Path(query_dataset)
......@@ -472,10 +473,11 @@ def check_experiment_wide_parameter(parameter, parameter_name, required_type, al
def fire_experiment(experiment_name, stages, dataset, validation_split=0.2, batch_size=8, num_workers=4, epochs=30,
learning_rate=0.008, learning_rate_gamma=0.5, learning_rate_step_size=10, weight_decay=0.0001,
load_from=None, target_layer="layer3", use_l2_normalization=True, pca_dataset=None, pca_load=None):
load_from=None, target_layer="layer3", use_l2_normalization=True, pca_dataset=None, pca_load=None,
pca_log=False):
ExperimentManager(experiment_name, stages, dataset, validation_split, batch_size, num_workers, epochs,
learning_rate, learning_rate_gamma, learning_rate_step_size, weight_decay, load_from,
target_layer, use_l2_normalization, pca_dataset, pca_load)
target_layer, use_l2_normalization, pca_dataset, pca_load, pca_log)
#torch.backends.cudnn.benchmark = True
#exp = ExperimentManager("30_epoch_run", {"finetuning","keypoints","retrieval"}, {"finetuning": "../Datasets/Landmarks", "keypoints": "../../Datasets/Landmarks", "retrieval": "../../Datasets/Oxford"}, epochs=30)
......
import torch
import torchvision
import pickle
import json
import numpy as np
from sklearn.decomposition import PCA
from skimage.measure import ransac
......@@ -142,14 +143,24 @@ def visualize_match(index_image_path, query_image_path, index_points, query_poin
plt.close(fig)
def calculate_pca(data, save_path=None):
def calculate_pca(data, save_path=None, log_pca=False):
pca = PCA(n_components=PCA_COMPONENTS, whiten=True)
pca.fit(data)
print(f"calculated pca matrix. Explained variance is {sum(pca.explained_variance_ratio_):.2f}"
explained_variance = sum(pca.explained_variance_ratio_)
print(f"calculated pca matrix. Explained variance is {explained_variance:.2f}"
f" over {pca.n_components} components")
if save_path:
print(f"saving pca data to {save_path}")
pickle.dump(pca, save_path.open("wb"))
if log_pca:
print(f"creating pca log file")
pca_logging = PCA(whiten=True)
pca_logging.fit(data)
with Path(save_path).with_suffix(".log").open('w', encoding='utf-8') as json_file:
log = {"num_components": PCA_COMPONENTS,
"explained_var_ratio": explained_variance,
"eigenvalues": pca_logging.explained_variance_.tolist()}
json.dump(log, json_file, ensure_ascii=False)
return pca
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment