105 lines
2.5 KiB
Python
105 lines
2.5 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import matplotlib.pyplot as plt
|
||
|
import rank_with_crawlers
|
||
|
import statistics
|
||
|
import multiprocessing
|
||
|
from random import sample
|
||
|
import glob
|
||
|
import reduce_edges
|
||
|
import rank_with_churn
|
||
|
from datetime import datetime
|
||
|
import json
|
||
|
from node_ranking import (
|
||
|
# rank as rank_nr,
|
||
|
page_rank,
|
||
|
sensor_rank,
|
||
|
find_rank,
|
||
|
parse_csv,
|
||
|
csv_loader,
|
||
|
build_graph,
|
||
|
Node,
|
||
|
RankedNode,
|
||
|
)
|
||
|
|
||
|
|
||
|
def analyze(g):
|
||
|
known = rank_with_crawlers.find_known(g, rank_with_churn.KNOWN)
|
||
|
# avg_r = rank_with_churn.avg_without_known(g)
|
||
|
# avg_in = rank_with_churn.avg_in(g)
|
||
|
# kn_in = known_in(g, known)
|
||
|
# kn_out = known_out(g, known)
|
||
|
d = list(map(lambda node: node.rank, g.nodes()))
|
||
|
mean = statistics.mean(d)
|
||
|
stddev = statistics.stdev(d)
|
||
|
return {
|
||
|
'known_rank': known.rank,
|
||
|
# 'known_in': kn_in,
|
||
|
# 'known_out': kn_out,
|
||
|
# 'avg_rank': avg_r,
|
||
|
# 'avg_in': avg_in,
|
||
|
'mean': mean,
|
||
|
'stdev': stddev,
|
||
|
}
|
||
|
|
||
|
|
||
|
def perform(path):
|
||
|
when = datetime.fromtimestamp(float(path.split('/')[-1][:-4]))
|
||
|
print(f'{when=}, {path=}')
|
||
|
edges = reduce_edges.load_data(path)
|
||
|
g = build_graph(edges)
|
||
|
g_sr = sensor_rank(sensor_rank(g))
|
||
|
g_pr = page_rank(page_rank(g))
|
||
|
res_sr = analyze(g_sr)
|
||
|
res_pr = analyze(g_pr)
|
||
|
path = f'./mean_and_deriv/{when.timestamp()}.json'
|
||
|
return {
|
||
|
path: {
|
||
|
'sr': res_sr,
|
||
|
'pr': res_pr,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
def load_json(path):
|
||
|
with open(path, 'r') as f:
|
||
|
return json.load(f)
|
||
|
|
||
|
# def plot(path):
|
||
|
# data = load_json(path)
|
||
|
# when = datetime.fromtimestamp(float(path.split('/')[-1][:-5]))
|
||
|
|
||
|
|
||
|
def main_plot():
|
||
|
data = {}
|
||
|
for file in glob.glob('./mean_and_deriv/*.json'):
|
||
|
when = datetime.fromtimestamp(float(file.split('/')[-1][:-5]))
|
||
|
data[when] = load_json(file)
|
||
|
times = []
|
||
|
means_sr = []
|
||
|
stder_sr = []
|
||
|
for t, d in sorted(data.items(), key=lambda kv: kv[0]):
|
||
|
times.append(t)
|
||
|
means_sr.append(d['sr']['mean'])
|
||
|
stder_sr.append(d['sr']['stdev'])
|
||
|
|
||
|
plt.errorbar(times, means_sr, stder_sr)
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
|
||
|
def main():
|
||
|
params = []
|
||
|
for file in glob.glob('./edges/*.txt'):
|
||
|
params.append(file)
|
||
|
|
||
|
with multiprocessing.Pool(processes=8) as pool:
|
||
|
l_path_data = pool.map(perform, params)
|
||
|
for path_data in l_path_data:
|
||
|
for path, data in path_data.items():
|
||
|
with open(path, 'w') as f:
|
||
|
json.dump(data, f)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main_plot()
|