masterthesis/codes/node-ranking/mean_and_deriv.py
Valentin Brandl 0d216a53ff Foo
2022-04-19 18:38:01 +02:00

105 lines
2.5 KiB
Python

#!/usr/bin/env python3
import matplotlib.pyplot as plt
import rank_with_crawlers
import statistics
import multiprocessing
from random import sample
import glob
import reduce_edges
import rank_with_churn
from datetime import datetime
import json
from node_ranking import (
# rank as rank_nr,
page_rank,
sensor_rank,
find_rank,
parse_csv,
csv_loader,
build_graph,
Node,
RankedNode,
)
def analyze(g):
known = rank_with_crawlers.find_known(g, rank_with_churn.KNOWN)
# avg_r = rank_with_churn.avg_without_known(g)
# avg_in = rank_with_churn.avg_in(g)
# kn_in = known_in(g, known)
# kn_out = known_out(g, known)
d = list(map(lambda node: node.rank, g.nodes()))
mean = statistics.mean(d)
stddev = statistics.stdev(d)
return {
'known_rank': known.rank,
# 'known_in': kn_in,
# 'known_out': kn_out,
# 'avg_rank': avg_r,
# 'avg_in': avg_in,
'mean': mean,
'stdev': stddev,
}
def perform(path):
when = datetime.fromtimestamp(float(path.split('/')[-1][:-4]))
print(f'{when=}, {path=}')
edges = reduce_edges.load_data(path)
g = build_graph(edges)
g_sr = sensor_rank(sensor_rank(g))
g_pr = page_rank(page_rank(g))
res_sr = analyze(g_sr)
res_pr = analyze(g_pr)
path = f'./mean_and_deriv/{when.timestamp()}.json'
return {
path: {
'sr': res_sr,
'pr': res_pr,
}
}
def load_json(path):
with open(path, 'r') as f:
return json.load(f)
# def plot(path):
# data = load_json(path)
# when = datetime.fromtimestamp(float(path.split('/')[-1][:-5]))
def main_plot():
data = {}
for file in glob.glob('./mean_and_deriv/*.json'):
when = datetime.fromtimestamp(float(file.split('/')[-1][:-5]))
data[when] = load_json(file)
times = []
means_sr = []
stder_sr = []
for t, d in sorted(data.items(), key=lambda kv: kv[0]):
times.append(t)
means_sr.append(d['sr']['mean'])
stder_sr.append(d['sr']['stdev'])
plt.errorbar(times, means_sr, stder_sr)
plt.show()
def main():
params = []
for file in glob.glob('./edges/*.txt'):
params.append(file)
with multiprocessing.Pool(processes=8) as pool:
l_path_data = pool.map(perform, params)
for path_data in l_path_data:
for path, data in path_data.items():
with open(path, 'w') as f:
json.dump(data, f)
if __name__ == '__main__':
main_plot()