masterthesis/codes/node-ranking/reduce_edges.py

80 lines
2.0 KiB
Python
Raw Normal View History

2022-04-19 18:38:01 +02:00
#!/usr/bin/env python3
2022-04-21 12:15:45 +02:00
import os
import multiprocessing
2022-04-19 18:38:01 +02:00
from datetime import datetime
from random import sample, seed
import rank_with_churn
import glob
from node_ranking import (
# page_rank,
# sensor_rank,
# find_rank,
parse_csv,
# csv_loader,
build_graph,
# Node,
# RankedNode,
)
2022-04-21 12:15:45 +02:00
def open_mkdir(path, mode):
os.makedirs(os.path.dirname(path), exist_ok=True)
return open(path, mode)
2022-04-19 18:38:01 +02:00
def load_data(path):
data = []
with open(path, 'r') as f:
2022-04-21 12:15:45 +02:00
for line in f:
2022-04-19 18:38:01 +02:00
data.append(parse_csv(line))
return data
def edges_from(g, node):
return list(filter(lambda e: e[1] == node, g.edges()))
2022-04-21 12:15:45 +02:00
def remove_edges(path, edges, percentage):
2022-04-19 18:38:01 +02:00
when = datetime.fromtimestamp(float(path.split('/')[-1][:-4]))
2022-04-21 12:15:45 +02:00
log = f'{when=}, {percentage=}'
print(log)
# edges = load_data(path)
2022-04-19 18:38:01 +02:00
g = build_graph(edges)
edges = edges_from(g, rank_with_churn.KNOWN)
for_removal = sample(edges, int(len(edges) * percentage))
for edge in for_removal:
g.remove_edge(edge[0], edge[1])
2022-04-21 12:15:45 +02:00
path = f'./edges_reduced/{percentage:.02f}/{when.timestamp()}.txt'
rows = []
with open_mkdir(path, 'w') as f:
2022-04-19 18:38:01 +02:00
for [s, d] in g.edges():
2022-04-21 12:15:45 +02:00
f.write(f'{s.node.ip},{s.node.port},{d.node.ip},{d.node.port}\n')
# f.write(row)
return f'done: {log}'
2022-04-19 18:38:01 +02:00
2022-04-21 12:15:45 +02:00
def work(params):
# print(f'starting work {params=}')
path, edges, percentage = params
remove_edges(path, edges, percentage)
percentages = [0.0, 0.1, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9]
2022-04-19 18:38:01 +02:00
# percentages = [0.0]
def main():
2022-04-21 12:15:45 +02:00
params = []
for file in glob.glob('./edges/*.txt'):
edges = load_data(file)
for percentage in percentages:
params.append([file, edges, percentage])
# remove_edges(file, percentage)
print('created params')
with multiprocessing.Pool(processes=8) as pool:
res = pool.map(work, params)
for r in res:
print(r)
2022-04-19 18:38:01 +02:00
if __name__ == '__main__':
main()