wrote short script to test cKD-Tree performance with different feature dimensionality

Todos:
process output information
parent 3ac20708
import numpy as np
from scipy.spatial import cKDTree
from sklearn.preprocessing import normalize
import time
import matplotlib.pyplot as plt
DIM = [40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680, 720]
NUM_SAMPLES = 200
MULTITHREAD = True
def benchmark_kd():
runtimes = []
workers = 1
if MULTITHREAD:
workers = -1
for feature_depth in DIM:
index_data = np.random.random((NUM_SAMPLES*1000, feature_depth))
query_data = np.random.random((NUM_SAMPLES*1000, feature_depth))
index_data = normalize(index_data)
query_data = normalize(query_data)
index_data = np.reshape(index_data, (NUM_SAMPLES, 1000, feature_depth))
query_data = np.reshape(query_data, (NUM_SAMPLES, 1000, feature_depth))
print(index_data.shape)
bench_start = time.time()
for ind in range(NUM_SAMPLES):
# kd_build_time is negligible
tree = cKDTree(index_data[ind])
tree.query(query_data[ind], n_jobs=workers)
print(f"avg_kd_time: {(time.time() - bench_start) / NUM_SAMPLES}")
runtimes.append((time.time()-bench_start)/NUM_SAMPLES)
fig, ax = plt.subplots(figsize=(7, 4))
ax.plot(np.array(DIM), np.array(runtimes), marker='o', markersize=4, alpha=0.65)
ax.set_xlabel("Dimensions")
ax.set_ylabel("Average kd-matching runtime")
ax.set_title("KD-Tree Performance With Varying Featuredimensions, 1000 Featurepoints")
plt.savefig(f"kd_perf{'multi_core' if MULTITHREAD else 'single_core'}", dpi=200, bbox_inches='tight')
plt.close(fig)
benchmark_kd()
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment