#!/usr/bin/env python3 from collections import defaultdict from datetime import datetime from node_ranking import ( parse_csv, csv_loader, build_graph, Node, ) def load_data(path): data = defaultdict(list) with open(path, 'r') as f: for line in f.readlines(): when = datetime.strptime(line.split(',')[0]+'00', '%Y-%m-%d %H:%M:%S%z') data[when].append( parse_csv(line, source_ip_index=1, source_port_index=2, dest_ip_index=3, dest_port_index=4) ) return data def main(): data = load_data('./part-dist-edges.csv') print(f'loaded data. {len(data.keys())} buckets') for bucket, edges in data.items(): edges = list(edges) print(f'bucket: {bucket}') print(f'edges: {len(edges)}') g = build_graph(edges) count_map = {} for node in g: count_map[node.node] = len(list(g.successors(node))) sum_out = 0 known = Node('34.204.196.211', 9796) for v in count_map.values(): sum_out += v min_out = min(count_map.items(), key=lambda kv: kv[1]) max_out = max(count_map.items(), key=lambda kv: kv[1]) avg_out = float(sum_out) / len(count_map.keys()) known_out = count_map[known] print(f'\tavg_out: {avg_out}') print(f'\tmin_out: {min_out}') print(f'\tmax_out: {max_out}') print(f'\tknown_out: {known} {known_out}') with open(f'./avg_out/{bucket.timestamp()}.txt', 'w') as out: out.write(f'bucket: {bucket}\n') out.write(f'\tavg_out: {avg_out}\n') out.write(f'\tmin_out: {min_out}\n') out.write(f'\tmax_out: {max_out}\n') out.write(f'\tknown_out: {known} {known_out}\n') if __name__ == "__main__": main()