151 lines
4.0 KiB
Python
151 lines
4.0 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import glob
|
||
|
import json
|
||
|
import asyncio
|
||
|
import os
|
||
|
import numpy as np
|
||
|
from random import sample, seed
|
||
|
from collections import defaultdict
|
||
|
from datetime import datetime
|
||
|
from functools import reduce
|
||
|
from node_ranking import (
|
||
|
page_rank,
|
||
|
sensor_rank,
|
||
|
find_rank,
|
||
|
parse_csv,
|
||
|
csv_loader,
|
||
|
build_graph,
|
||
|
Node,
|
||
|
RankedNode,
|
||
|
)
|
||
|
|
||
|
def background(f):
|
||
|
def wrapped(*args, **kwargs):
|
||
|
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
|
||
|
|
||
|
return wrapped
|
||
|
|
||
|
|
||
|
def load_data(path):
|
||
|
data = []
|
||
|
with open(path, 'r') as f:
|
||
|
for line in f.readlines():
|
||
|
data.append(parse_csv(line))
|
||
|
|
||
|
return data
|
||
|
|
||
|
INITIAL_RANK = 0.5
|
||
|
COUNT_NEW_EDGES = 50
|
||
|
KNOWN = RankedNode(Node('34.204.196.211', 9796), INITIAL_RANK)
|
||
|
PR_ITERATIONS = 5
|
||
|
REMOVE_EDGES = 0.5
|
||
|
PERCENT_EDGES = 2.5
|
||
|
|
||
|
def avg_rank(g):
|
||
|
nodes = list(g.nodes())
|
||
|
n = len(nodes)
|
||
|
avg = reduce(lambda acc, x: acc + x.rank, nodes, 0.) / n
|
||
|
return avg
|
||
|
|
||
|
def avg_without_known(g):
|
||
|
nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes()))
|
||
|
n = len(nodes)
|
||
|
avg = reduce(lambda acc, x: acc + x.rank, nodes, 0.) / n
|
||
|
return avg
|
||
|
|
||
|
def avg_in(g):
|
||
|
nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes()))
|
||
|
n = len(nodes)
|
||
|
avg = reduce(lambda acc, x: acc + len(list(g.predecessors(x))), nodes, 0.) / n
|
||
|
return avg
|
||
|
|
||
|
def avg_out(g):
|
||
|
nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes()))
|
||
|
n = len(nodes)
|
||
|
avg = reduce(lambda acc, x: acc + len(list(g.successors(x))), nodes, 0.) / n
|
||
|
return avg
|
||
|
|
||
|
def known_rank(g):
|
||
|
for n in g.nodes():
|
||
|
if n.node == KNOWN.node:
|
||
|
return n.rank
|
||
|
|
||
|
return None
|
||
|
|
||
|
|
||
|
# @background
|
||
|
def do_work(path, initial_rank, percent_edges, remove_edges, known):
|
||
|
print(f'starting {path} {initial_rank} {percent_edges} {known}')
|
||
|
when = datetime.fromtimestamp(float(path.split('/')[-1][:-4]))
|
||
|
edges = load_data(path)
|
||
|
g = build_graph(edges, initial_rank=initial_rank)
|
||
|
|
||
|
edges = list(filter(lambda e: e[1] == known, g.edges()))
|
||
|
for_removal = sample(edges, int(len(edges) * remove_edges))
|
||
|
print(f'removing {len(for_removal)} incoming edges')
|
||
|
for edge in for_removal:
|
||
|
g.remove_edge(edge[0], edge[1])
|
||
|
|
||
|
n_known_in = len(list(filter(lambda e: e[1] == known, g.edges())))
|
||
|
|
||
|
a_in = avg_in(g)
|
||
|
a_out = avg_out(g)
|
||
|
|
||
|
count_new_edges = int(n_known_in * percent_edges)
|
||
|
print(f'adding {count_new_edges} outgoing edges')
|
||
|
candidates = sample(list(g.nodes()), count_new_edges)
|
||
|
for node in candidates:
|
||
|
g.add_edge(known, node)
|
||
|
|
||
|
n_known_out = len(list(filter(lambda e: e[0] == known, g.edges())))
|
||
|
|
||
|
# page ranking
|
||
|
for _ in range(PR_ITERATIONS):
|
||
|
g = page_rank(g)
|
||
|
|
||
|
pr_avg = avg_rank(g)
|
||
|
pr_avg_filtered = avg_without_known(g)
|
||
|
pr_known = known_rank(g)
|
||
|
pr_perc_50 = np.percentile(np.array(sorted(map(lambda n: n.rank, g))), 50)
|
||
|
|
||
|
sr_g = sensor_rank(g)
|
||
|
sr_avg = avg_rank(sr_g)
|
||
|
sr_avg_filtered = avg_without_known(sr_g)
|
||
|
sr_known = known_rank(sr_g)
|
||
|
sr_perc_50 = np.percentile(np.array(sorted(map(lambda n: n.rank, sr_g))), 50)
|
||
|
|
||
|
|
||
|
data = {
|
||
|
'sr_avg': sr_avg,
|
||
|
'sr_avg_filtered': sr_avg_filtered,
|
||
|
'sr_known': sr_known,
|
||
|
'sr_perc_50': sr_perc_50,
|
||
|
'pr_avg': pr_avg,
|
||
|
'pr_avg_filtered': pr_avg_filtered,
|
||
|
'pr_known': pr_known,
|
||
|
'pr_perc_50': pr_perc_50,
|
||
|
'known_in': n_known_in,
|
||
|
'known_out': n_known_out,
|
||
|
'avg_in': a_in,
|
||
|
'avg_out': a_out,
|
||
|
}
|
||
|
|
||
|
base_path = f'percent/{initial_rank}/{percent_edges}'
|
||
|
os.makedirs(base_path, exist_ok=True)
|
||
|
file = f'{base_path}/{when.timestamp()}.json'
|
||
|
with open(file, 'w') as f:
|
||
|
json.dump(data, f)
|
||
|
|
||
|
print(f'finished {path} {initial_rank} {percent_edges} {known}')
|
||
|
|
||
|
|
||
|
def main():
|
||
|
for file in glob.glob('./edges/*.txt'):
|
||
|
for perc in [0.5, 0.75, 0.8, 1.0, 1.5, 2.0, 2.5]:
|
||
|
do_work(file, INITIAL_RANK, perc, REMOVE_EDGES, KNOWN)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|