#!/usr/bin/env python3 from collections import defaultdict import hashlib import matplotlib.pyplot as plt import numpy as np import variance # int(ipaddress.IPv4Address("192.168.0.1")) def load_ips(path): with open(path, 'r') as f: return f.readlines() def partition(crawlers, ips): mod = len(crawlers) result = defaultdict(list) for ip in ips: h = hashlib.md5(ip.encode('utf-8'), usedforsecurity=False).digest() num = int.from_bytes(h, 'big') pos = num % mod c = crawlers[pos] result[c].append(ip) return result def main(): # ips = load_ips('./ips.csv') ips = load_ips('./random.csv') c2 = ['c0', 'c1'] c4 = ['c0', 'c1', 'c2', 'c3'] c6 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5'] c10 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'] c100 = [] for i in range(100): c100.append(f'c{i:02d}') # for cs in [c2, c4, c6, c10]: for cs in [c100]: print(f'{len(cs)} crawlers') part = partition(cs, ips) counts = [len(v) for k, v in part.items()] variance.variance(counts) # for c, i in sorted(part.items(), key=lambda kv: kv[0]): # ind = np.arange(len(cs)) # fig, ax = plt.subplots() # crawlers = sorted(cs) # ax.invert_yaxis() # ax.set_ylabel('Crawlers') # ax.set_xlabel('Peers') # ax.set_title(f'IP Partitioning for {len(cs)} crawlers') # ax.set_yticks(ind) # # ax.set_xticks(ind) #, labels=cs) # ns = [] # for c, i in sorted(part.items(), key=lambda kv: kv[0]): # print(f'\t{c}: {len(i)}') # ns.append(len(i)) # # p1 = ax.bar(ind, ns, 0.35) # hbars = ax.barh(ind, ns) # ax.bar_label(hbars, label_type='center') # # plt.show() # plt.savefig(f'./ip_part_c{len(cs):02}.png') # # p1 = ax.bar(ind, i, 0.35, label=c) # # people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim') # # hbars = ax.barh(y_pos, performance, xerr=error, align='center') # # ax.set_yticks(y_pos, labels=people) # # ax.invert_yaxis() # labels read top-to-bottom # # ax.set_xlabel('Performance') # # ax.set_title('How fast do you want to go today?') # # # Label with specially formatted floats # # ax.bar_label(hbars, fmt='%.2f') # # ax.set_xlim(right=15) # adjust xlim to fit labels # # plt.show() # print() # print() # print() # print() if __name__ == '__main__': main()