masterthesis/codes/node-ranking/plot_with_crawler.py

109 lines
3.8 KiB
Python
Raw Normal View History

2022-04-19 18:38:01 +02:00
#!/usr/bin/env python3
import matplotlib.pyplot as plt
from datetime import datetime
import glob
import json
import reduce_edges
def load_json(path):
with open(path, 'r') as f:
return json.load(f)
def plot_in_out(n_crawlers, prc, data):
times = []
avg_in = []
known_in = []
known_out = []
# same value, independent of algo
algo = 'sr'
for when, d in sorted(data.items(), key=lambda kv: kv[0]):
times.append(when)
avg_in.append(d[algo]['avg_in'])
known_in.append(d[algo]['known_in'])
known_out.append(d[algo]['known_out'])
fig, ax = plt.subplots()
a = 'SensorRank' if algo == 'sr' else 'RageRank'
ax.set_ylabel(f'{a}')
ax.plot(times, avg_in, label='Avg. In')
# ax.plot(times, known_in, label='Known In') # TODO
ax.plot(times, known_in, label='Known In')
ax.plot(times, known_out, label='Known out')
title = f'In And Out after adding {n_crawlers} crawlers with {prc * 100}% edges'
ax.set_title(title)
fig.autofmt_xdate()
fig.legend()
plt.savefig(f'./plot_with_crawler/{n_crawlers:03d}_crawlers/{prc:.02f}_edges/in_out.png')
def plot(n_crawlers, prc, algo, data):
times = []
avg_rank = []
a_avg_in = []
known_rank = []
known_in = []
known_out = []
for when, d in sorted(data.items(), key=lambda kv: kv[0]):
times.append(when)
avg_rank.append(d[algo]['avg_rank'])
a_avg_in.append(d[algo]['avg_in'])
known_rank.append(d[algo]['known_rank'])
known_in.append(d[algo]['known_in'])
known_out.append(d[algo]['known_out'])
# avg_out = sum(known_out) / len(known_out)
# avg_in = sum(known_in) / len(known_in)
fig, ax = plt.subplots()
a = 'SensorRank' if algo == 'sr' else 'RageRank'
ax.set_ylabel(f'{a}')
ax.plot(times, avg_rank, label='Avg. Rank')
# ax.plot(times, known_in, label='Known In') # TODO
ax.plot(times, known_rank, label='Known Rank')
# title = f'{a} after removing {percentage * 100}% edges and adding {added_percentage * 100}%\nin = {avg_in:.02f} out = {avg_out:.02f}'
title = f'{a} with {n_crawlers} crawlers with {prc * 100}% edges each'
ax.set_title(title)
# ax2 = ax.twinx()
# ax2.set_ylabel('Edges')
# ax2.plot(times, known_in, label='Known In', color='red')
# ax2.plot(times, known_out, label='Known Out', color='green')
# # print(f'{len(times)=}, {len(a_avg_in)=}')
# # assert len(times) == len(a_avg_in)
# ax2.plot(times, a_avg_in, label='Avg. In', color='violet')
fig.autofmt_xdate()
fig.legend()
plt.savefig(f'./plot_with_crawler/{n_crawlers:03d}_crawlers/{prc:.02f}_edges/{algo}.png')
# print('created sr plot')
# plt.show()
def main():
for n_crawlers in [5,10,20,50]: #in reduce_edges.percentages:
for prc in [0.1, 0.5, 0.9]:
# for file in glob.glob(f'./edges_reduced/{reduced_percentage:.02f}/*.txt'):
# for file in glob.glob(f'./data_with_crawler/{n_crawlers:03d}_crawlers/{prc:.02f}_edges/*.json'):
# for reduced_percentage in reduce_edges.percentages:
# perc = reduce_edges.percentages.copy()
# perc.append(1.0)
# perc.append(1.2)
# for added_percentage in perc:
data = {}
for file in glob.glob(f'./data_with_crawler/{n_crawlers:03d}_crawlers/{prc:.02f}_edges/*.json'):
# for file in glob.glob(f'./data_with_crawler/{reduced_percentage:.02f}/{added_percentage:.02f}/*.json'):
when = datetime.fromtimestamp(float(file.split('/')[-1][:-5]))
print(f'{n_crawlers=:.02f}, {prc=:.02f}, {when=}')
data[when] = load_json(file)
plot(n_crawlers, prc, 'sr', data)
plot(n_crawlers, prc, 'pr', data)
plot_in_out(n_crawlers, prc, data)
if __name__ == '__main__':
main()