masterthesis/codes/node-ranking/place_crawlers.py

85 lines
2.6 KiB
Python
Raw Normal View History

2022-04-19 18:38:01 +02:00
#!/usr/bin/env python3
import json
import multiprocessing
from datetime import datetime
from random import sample, seed
import rank_with_churn
import glob
from node_ranking import (
RankedNode,
Node,
# page_rank,
# sensor_rank,
# find_rank,
parse_csv,
# csv_loader,
build_graph,
# Node,
# RankedNode,
)
def load_data(path):
data = []
with open(path, 'r') as f:
for line in f.readlines():
data.append(parse_csv(line))
return data
def edges_from(g, node):
return list(filter(lambda e: e[1] == node, g.edges()))
def create_crawlers(graph, n_crawlers, n_peers):
nodes = list(filter(lambda n: n.node != rank_with_churn.KNOWN.node, graph.nodes()))
crawlers = []
for i in range(n_crawlers):
ip = f'0.0.0.{i+1}'
crawler = RankedNode(Node(ip, 1337), rank_with_churn.INITIAL_RANK)
crawlers.append(crawler)
candidates = sample(nodes, n_peers)
for candidate in candidates:
graph.add_edge(crawler, candidate)
return crawlers
def add_crawlers(when, edges, n_crawlers, prc_edges):
print(f'{n_crawlers=:03d} {prc_edges=:.02f} {when=}')
g = build_graph(edges, initial_rank=rank_with_churn.INITIAL_RANK)
n_nodes = len(list(g.nodes()))
create_crawlers(g, n_crawlers, int(n_nodes * prc_edges))
path = f'./edges_with_crawler/{n_crawlers:03d}_crawlers/{prc_edges:.02f}_edges/{when.timestamp()}.txt'
data = []
for [s, d] in g.edges():
data.append(f'{s.node.ip},{s.node.port},{d.node.ip},{d.node.port}\n')
return {path: data}
def wrapper_add_crawlers(params):
return add_crawlers(params[0], params[1], params[2], params[3])
percentages = [0.0, 0.3, 0.5, 0.75]
# percentages = [0.0]
def main():
params = []
for file in glob.glob('./edges/*.txt'):
edges = load_data(file)
print(f'loaded edges for {file}')
when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
for crawlers in [5, 10, 20, 50]:
for prc_edges in [0.1, 0.5, 0.9]:
params.append([when, edges, crawlers, prc_edges])
print(f'created {len(params)} params')
# add_crawlers(when, edges.copy(), crawlers, prc_edges)
# for percentage in percentages:
# remove_edges(file, percentage)
with multiprocessing.Pool(processes=8) as pool:
l_path_data = pool.map(wrapper_add_crawlers, params)
for path_data in l_path_data:
for path, rows in path_data.items():
with open(path, 'w') as f:
for row in rows:
f.write(row)
if __name__ == '__main__':
main()