155 lines
6.0 KiB
Python
155 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import numpy as np
|
|
from random import sample, seed
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from functools import reduce
|
|
from node_ranking import (
|
|
page_rank,
|
|
sensor_rank,
|
|
find_rank,
|
|
parse_csv,
|
|
csv_loader,
|
|
build_graph,
|
|
Node,
|
|
RankedNode,
|
|
)
|
|
|
|
def load_data(path):
|
|
data = defaultdict(list)
|
|
with open(path, 'r') as f:
|
|
for line in f.readlines():
|
|
when = datetime.strptime(line.split(',')[0]+'00', '%Y-%m-%d %H:%M:%S%z')
|
|
data[when].append(
|
|
parse_csv(line, source_ip_index=1, source_port_index=2, dest_ip_index=3, dest_port_index=4)
|
|
)
|
|
|
|
return data
|
|
|
|
|
|
# list_churned_peers = [20, 30, 40, 50, 60, 70, 80, 90, 100]
|
|
list_churned_peers = [3, 5, 10]
|
|
iterations = 5
|
|
def main():
|
|
# random but reproducible
|
|
seed(1337)
|
|
initial_rank = 0.5
|
|
data = load_data('./part-dist-edges.csv')
|
|
for churned_peers in list_churned_peers:
|
|
print(f'loaded data. {len(data.keys())} buckets')
|
|
for bucket, edges in data.items():
|
|
edges = list(edges)
|
|
print(f'bucket: {bucket}')
|
|
print(f'edges: {len(edges)}')
|
|
known = Node('34.204.196.211', 9796)
|
|
known_r = RankedNode(known, initial_rank)
|
|
g = build_graph(edges, initial_rank=initial_rank)
|
|
# cnt = 0
|
|
# destinations = []
|
|
# for node in g:
|
|
# destinations.append(node)
|
|
# cnt += 1
|
|
# if cnt >= churned_peers:
|
|
# break
|
|
|
|
destinations = sample(list(g.nodes()), churned_peers)
|
|
print(f'!!!!!! adding destinations: {destinations}')
|
|
for node in destinations:
|
|
g.add_edge(known_r, node)
|
|
# g.add_edge(node, known_r)
|
|
|
|
count_map = {}
|
|
for node in g:
|
|
count_map[node] = len(list(g.successors(node)))
|
|
|
|
sum_out = 0
|
|
for v in count_map.values():
|
|
sum_out += v
|
|
|
|
min_out = min(count_map.items(), key=lambda kv: kv[1])
|
|
max_out = max(count_map.items(), key=lambda kv: kv[1])
|
|
|
|
nodes = len(g)
|
|
initial_rank = 0.5
|
|
with open(f'./churn_rank/{churned_peers}_edges_{bucket.timestamp()}.txt', 'w') as out:
|
|
table = []
|
|
pr_data = []
|
|
sr_data = []
|
|
for iteration in range(iterations):
|
|
g = page_rank(g)
|
|
avg_pr = reduce(lambda acc, n: acc + n.rank, g, 0.) / nodes
|
|
# max_pr = max(g, key = lambda n : n.rank).rank
|
|
max_pr = find_rank(g)
|
|
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
|
|
perc = np.percentile(np.array(sorted(map(lambda n: n.rank, g))), percentiles)
|
|
# pr_data.append(f'PR: InitialRank: {initial_rank}, Iteration: {iteration}')
|
|
for a, b in zip(perc, percentiles):
|
|
pr_data.append(f'PR{iteration}({initial_rank}): Percentile: {a}: {b}')
|
|
# print(f'InitialRank: {initial_rank}, Iteration: {iteration}, Avg. PR: {avg_pr}')
|
|
# print(f'InitialRank: {initial_rank}, Iteration: {iteration}, max PR: {max_pr}')
|
|
sr = sensor_rank(g)
|
|
avg_sr = reduce(lambda acc, n: acc + n.rank, sr, 0.) / nodes
|
|
# max_sr = max(sr, key = lambda n : n.rank).rank
|
|
max_sr = find_rank(sr)
|
|
perc = np.percentile(np.array(sorted(map(lambda n: n.rank, sr))), percentiles)
|
|
# sr_data.append(f'SR: InitialRank: {initial_rank}, Iteration: {iteration}')
|
|
for a, b in zip(perc, percentiles):
|
|
sr_data.append(f'SR{iteration}({initial_rank}): Percentile: {a}: {b}')
|
|
# print(f'InitialRank: {initial_rank}, Iteration: {iteration}, Avg. SR: {avg_sr}')
|
|
# print(f'InitialRank: {initial_rank}, Iteration: {iteration}, max SR: {max_sr}')
|
|
table.append(f'{iteration+1} & {avg_pr:.8f} & {max_pr:.8f} & {avg_sr:.8f} & {max_sr:.8f} \\\\')
|
|
# out.write(f'{iteration+1} & {avg_pr:.8f} & {max_pr:.8f} & {avg_sr:.8f} & {max_sr:.8f} \\\\\n')
|
|
# with open(f'./{initial_rank}_{iteration+1}_pr.txt', 'w') as f:
|
|
# for n in g:
|
|
# f.write(f'{n.rank}' + '\n')
|
|
# with open(f'./{initial_rank}_{iteration+1}_sr.txt', 'w') as f:
|
|
# for n in sr:
|
|
# f.write(f'{n.rank}' + '\n')
|
|
|
|
avg_out = float(sum_out) / len(count_map.keys())
|
|
known_out = count_map[known_r]
|
|
print(f'\tavg_out: {avg_out}')
|
|
print(f'\tmin_out: {min_out}')
|
|
print(f'\tmax_out: {max_out}')
|
|
print(f'\tknown_out: {known} {known_out}')
|
|
out.write(f'bucket: {bucket}\n')
|
|
out.write(f'\tavg_out: {avg_out}\n')
|
|
out.write(f'\tmin_out: {min_out}\n')
|
|
out.write(f'\tmax_out: {max_out}\n')
|
|
out.write(f'\tknown_out: {known} {known_out}\n')
|
|
print()
|
|
print()
|
|
out.write('\n')
|
|
out.write('\n')
|
|
for row in table:
|
|
print(row)
|
|
out.write(f'{row}\n')
|
|
|
|
print()
|
|
print()
|
|
out.write('\n')
|
|
out.write('\n')
|
|
for row in sr_data:
|
|
print(row)
|
|
out.write(f'{row}\n')
|
|
|
|
print()
|
|
print()
|
|
out.write('\n')
|
|
out.write('\n')
|
|
for row in pr_data:
|
|
print(row)
|
|
out.write(f'{row}\n')
|
|
|
|
|
|
print()
|
|
print()
|
|
print()
|
|
print()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|