masterthesis/codes/node-ranking/split_dist_edges.py

45 lines
1.1 KiB
Python
Raw Normal View History

2022-04-15 17:21:44 +02:00
#!/usr/bin/env python3
import numpy as np
from random import sample, seed
from collections import defaultdict
from datetime import datetime
from functools import reduce
from node_ranking import (
page_rank,
sensor_rank,
find_rank,
parse_csv,
csv_loader,
build_graph,
Node,
RankedNode,
)
def load_data(path):
data = defaultdict(list)
with open(path, 'r') as f:
for line in f.readlines():
when = datetime.strptime(line.split(',')[0]+'00', '%Y-%m-%d %H:%M:%S%z')
data[when].append(
parse_csv(line, source_ip_index=1, source_port_index=2, dest_ip_index=3, dest_port_index=4)
)
return data
def main():
data = load_data('./part-dist-edges.csv')
print('loaded data')
for when, edges in data.items():
print(f'dumping for {when}')
with open(f'./edges/{int(when.timestamp())}.txt', 'w') as f:
for edge in edges:
row = f'{edge.source.ip},{edge.source.port},{edge.destination.ip},{edge.destination.port}\n'
f.write(row)
if __name__ == "__main__":
main()