85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import json
|
||
|
import multiprocessing
|
||
|
from datetime import datetime
|
||
|
from random import sample, seed
|
||
|
import rank_with_churn
|
||
|
import glob
|
||
|
from node_ranking import (
|
||
|
RankedNode,
|
||
|
Node,
|
||
|
# page_rank,
|
||
|
# sensor_rank,
|
||
|
# find_rank,
|
||
|
parse_csv,
|
||
|
# csv_loader,
|
||
|
build_graph,
|
||
|
# Node,
|
||
|
# RankedNode,
|
||
|
)
|
||
|
|
||
|
def load_data(path):
|
||
|
data = []
|
||
|
with open(path, 'r') as f:
|
||
|
for line in f.readlines():
|
||
|
data.append(parse_csv(line))
|
||
|
|
||
|
return data
|
||
|
|
||
|
def edges_from(g, node):
|
||
|
return list(filter(lambda e: e[1] == node, g.edges()))
|
||
|
|
||
|
def create_crawlers(graph, n_crawlers, n_peers):
|
||
|
nodes = list(filter(lambda n: n.node != rank_with_churn.KNOWN.node, graph.nodes()))
|
||
|
crawlers = []
|
||
|
for i in range(n_crawlers):
|
||
|
ip = f'0.0.0.{i+1}'
|
||
|
crawler = RankedNode(Node(ip, 1337), rank_with_churn.INITIAL_RANK)
|
||
|
crawlers.append(crawler)
|
||
|
candidates = sample(nodes, n_peers)
|
||
|
for candidate in candidates:
|
||
|
graph.add_edge(crawler, candidate)
|
||
|
return crawlers
|
||
|
|
||
|
def add_crawlers(when, edges, n_crawlers, prc_edges):
|
||
|
print(f'{n_crawlers=:03d} {prc_edges=:.02f} {when=}')
|
||
|
g = build_graph(edges, initial_rank=rank_with_churn.INITIAL_RANK)
|
||
|
n_nodes = len(list(g.nodes()))
|
||
|
create_crawlers(g, n_crawlers, int(n_nodes * prc_edges))
|
||
|
path = f'./edges_with_crawler/{n_crawlers:03d}_crawlers/{prc_edges:.02f}_edges/{when.timestamp()}.txt'
|
||
|
data = []
|
||
|
for [s, d] in g.edges():
|
||
|
data.append(f'{s.node.ip},{s.node.port},{d.node.ip},{d.node.port}\n')
|
||
|
return {path: data}
|
||
|
|
||
|
def wrapper_add_crawlers(params):
|
||
|
return add_crawlers(params[0], params[1], params[2], params[3])
|
||
|
|
||
|
percentages = [0.0, 0.3, 0.5, 0.75]
|
||
|
# percentages = [0.0]
|
||
|
def main():
|
||
|
params = []
|
||
|
for file in glob.glob('./edges/*.txt'):
|
||
|
edges = load_data(file)
|
||
|
print(f'loaded edges for {file}')
|
||
|
when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
|
||
|
for crawlers in [5, 10, 20, 50]:
|
||
|
for prc_edges in [0.1, 0.5, 0.9]:
|
||
|
params.append([when, edges, crawlers, prc_edges])
|
||
|
print(f'created {len(params)} params')
|
||
|
# add_crawlers(when, edges.copy(), crawlers, prc_edges)
|
||
|
# for percentage in percentages:
|
||
|
# remove_edges(file, percentage)
|
||
|
with multiprocessing.Pool(processes=8) as pool:
|
||
|
l_path_data = pool.map(wrapper_add_crawlers, params)
|
||
|
for path_data in l_path_data:
|
||
|
for path, rows in path_data.items():
|
||
|
with open(path, 'w') as f:
|
||
|
for row in rows:
|
||
|
f.write(row)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|