#!/usr/bin/env python3 import glob import json import asyncio import os import numpy as np from random import sample, seed from collections import defaultdict from datetime import datetime from functools import reduce from node_ranking import ( page_rank, sensor_rank, find_rank, parse_csv, csv_loader, build_graph, Node, RankedNode, ) def background(f): def wrapped(*args, **kwargs): return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs) return wrapped def load_data(path): data = [] with open(path, 'r') as f: for line in f.readlines(): data.append(parse_csv(line)) return data INITIAL_RANK = 0.5 COUNT_NEW_EDGES = 50 KNOWN = RankedNode(Node('34.204.196.211', 9796), INITIAL_RANK) PR_ITERATIONS = 5 REMOVE_EDGES = 0.5 PERCENT_EDGES = 2.5 def avg_rank(g): nodes = list(g.nodes()) n = len(nodes) avg = reduce(lambda acc, x: acc + x.rank, nodes, 0.) / n return avg def avg_without_known(g): nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes())) n = len(nodes) avg = reduce(lambda acc, x: acc + x.rank, nodes, 0.) / n return avg def avg_in(g): nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes())) n = len(nodes) avg = reduce(lambda acc, x: acc + len(list(g.predecessors(x))), nodes, 0.) / n return avg def avg_out(g): nodes = list(filter(lambda n: n.node != KNOWN.node, g.nodes())) n = len(nodes) avg = reduce(lambda acc, x: acc + len(list(g.successors(x))), nodes, 0.) / n return avg def known_out(g): nodes = list(filter(lambda n: n.node == KNOWN.node, g.nodes())) n = len(nodes) assert n == 1 avg = reduce(lambda acc, x: acc + len(list(g.successors(x))), nodes, 0.) / n return avg def known_in(g): nodes = list(filter(lambda n: n.node == KNOWN.node, g.nodes())) n = len(nodes) assert n == 1 avg = reduce(lambda acc, x: acc + len(list(g.predecessors(x))), nodes, 0.) / n return avg def known_rank(g): for n in g.nodes(): if n.node == KNOWN.node: return n.rank return None # @background def do_work(path, initial_rank, percent_edges, remove_edges, known): print(f'starting {path} {initial_rank} {percent_edges} {known}') when = datetime.fromtimestamp(float(path.split('/')[-1][:-4])) edges = load_data(path) g = build_graph(edges, initial_rank=initial_rank) edges = list(filter(lambda e: e[1] == known, g.edges())) for_removal = sample(edges, int(len(edges) * remove_edges)) print(f'removing {len(for_removal)} incoming edges') for edge in for_removal: g.remove_edge(edge[0], edge[1]) n_known_in = len(list(filter(lambda e: e[1] == known, g.edges()))) a_in = avg_in(g) a_out = avg_out(g) count_new_edges = int(n_known_in * percent_edges) print(f'adding {count_new_edges} outgoing edges') candidates = sample(list(g.nodes()), count_new_edges) for node in candidates: g.add_edge(known, node) n_known_out = len(list(filter(lambda e: e[0] == known, g.edges()))) # page ranking for _ in range(PR_ITERATIONS): g = page_rank(g) pr_avg = avg_rank(g) pr_avg_filtered = avg_without_known(g) pr_known = known_rank(g) pr_perc_50 = np.percentile(np.array(sorted(map(lambda n: n.rank, g))), 50) sr_g = sensor_rank(g) sr_avg = avg_rank(sr_g) sr_avg_filtered = avg_without_known(sr_g) sr_known = known_rank(sr_g) sr_perc_50 = np.percentile(np.array(sorted(map(lambda n: n.rank, sr_g))), 50) data = { 'sr_avg': sr_avg, 'sr_avg_filtered': sr_avg_filtered, 'sr_known': sr_known, 'sr_perc_50': sr_perc_50, 'pr_avg': pr_avg, 'pr_avg_filtered': pr_avg_filtered, 'pr_known': pr_known, 'pr_perc_50': pr_perc_50, 'known_in': n_known_in, 'known_out': n_known_out, 'avg_in': a_in, 'avg_out': a_out, } base_path = f'percent/{initial_rank}/{percent_edges}' os.makedirs(base_path, exist_ok=True) file = f'{base_path}/{when.timestamp()}.json' with open(file, 'w') as f: json.dump(data, f) print(f'finished {path} {initial_rank} {percent_edges} {known}') def main(): data = {} # times = [] # avg_in = [] # avg_out = [] # sensor_in = [] # sensor_out = [] initial_rank = 0.5 for file in glob.glob('./edges/*.txt'): when = datetime.fromtimestamp(float(file.split('/')[-1][:-4])) print(when) edges = load_data(file) g = build_graph(edges, initial_rank=initial_rank) navg_in = avg_in(g) navg_out = avg_out(g) nknown_in = known_in(g) nknown_out = known_out(g) x = { 'avg_in': navg_in, 'avg_out': navg_out, 'known_in': nknown_in, 'known_out': nknown_out, 'number_of_nodes': len(g), } data[when.timestamp()] = x # for perc in [0.5, 0.75, 0.8, 1.0, 1.5, 2.0, 2.5]: # do_work(file, INITIAL_RANK, perc, REMOVE_EDGES, KNOWN) with open('./tmp_plot.json', 'w') as f: json.dump(data, f) if __name__ == '__main__': main()