2022-04-15 17:21:44 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
import hashlib
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import numpy as np
|
2022-04-19 18:38:01 +02:00
|
|
|
import variance
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
# int(ipaddress.IPv4Address("192.168.0.1"))
|
|
|
|
|
|
|
|
def load_ips(path):
|
|
|
|
with open(path, 'r') as f:
|
|
|
|
return f.readlines()
|
|
|
|
|
|
|
|
|
|
|
|
def partition(crawlers, ips):
|
|
|
|
mod = len(crawlers)
|
|
|
|
result = defaultdict(list)
|
|
|
|
for ip in ips:
|
|
|
|
h = hashlib.md5(ip.encode('utf-8'), usedforsecurity=False).digest()
|
|
|
|
num = int.from_bytes(h, 'big')
|
|
|
|
pos = num % mod
|
|
|
|
c = crawlers[pos]
|
|
|
|
result[c].append(ip)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2022-04-19 18:38:01 +02:00
|
|
|
# ips = load_ips('./ips.csv')
|
|
|
|
ips = load_ips('./random.csv')
|
2022-04-15 17:21:44 +02:00
|
|
|
c2 = ['c0', 'c1']
|
|
|
|
c4 = ['c0', 'c1', 'c2', 'c3']
|
|
|
|
c6 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5']
|
|
|
|
c10 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
|
2022-04-19 18:38:01 +02:00
|
|
|
c100 = []
|
|
|
|
for i in range(100):
|
|
|
|
c100.append(f'c{i:02d}')
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# for cs in [c2, c4, c6, c10]:
|
|
|
|
for cs in [c100]:
|
2022-04-15 17:21:44 +02:00
|
|
|
print(f'{len(cs)} crawlers')
|
|
|
|
|
|
|
|
part = partition(cs, ips)
|
2022-04-19 18:38:01 +02:00
|
|
|
counts = [len(v) for k, v in part.items()]
|
|
|
|
variance.variance(counts)
|
|
|
|
# for c, i in sorted(part.items(), key=lambda kv: kv[0]):
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# ind = np.arange(len(cs))
|
|
|
|
# fig, ax = plt.subplots()
|
|
|
|
# crawlers = sorted(cs)
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# ax.invert_yaxis()
|
|
|
|
# ax.set_ylabel('Crawlers')
|
|
|
|
# ax.set_xlabel('Peers')
|
|
|
|
# ax.set_title(f'IP Partitioning for {len(cs)} crawlers')
|
|
|
|
# ax.set_yticks(ind)
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # ax.set_xticks(ind) #, labels=cs)
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# ns = []
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# for c, i in sorted(part.items(), key=lambda kv: kv[0]):
|
|
|
|
# print(f'\t{c}: {len(i)}')
|
|
|
|
# ns.append(len(i))
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # p1 = ax.bar(ind, ns, 0.35)
|
|
|
|
# hbars = ax.barh(ind, ns)
|
|
|
|
# ax.bar_label(hbars, label_type='center')
|
|
|
|
# # plt.show()
|
|
|
|
# plt.savefig(f'./ip_part_c{len(cs):02}.png')
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # p1 = ax.bar(ind, i, 0.35, label=c)
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # hbars = ax.barh(y_pos, performance, xerr=error, align='center')
|
|
|
|
# # ax.set_yticks(y_pos, labels=people)
|
|
|
|
# # ax.invert_yaxis() # labels read top-to-bottom
|
|
|
|
# # ax.set_xlabel('Performance')
|
|
|
|
# # ax.set_title('How fast do you want to go today?')
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # # Label with specially formatted floats
|
|
|
|
# # ax.bar_label(hbars, fmt='%.2f')
|
|
|
|
# # ax.set_xlim(right=15) # adjust xlim to fit labels
|
2022-04-15 17:21:44 +02:00
|
|
|
|
2022-04-19 18:38:01 +02:00
|
|
|
# # plt.show()
|
|
|
|
|
|
|
|
# print()
|
|
|
|
# print()
|
|
|
|
# print()
|
|
|
|
# print()
|
2022-04-15 17:21:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|