masterthesis/codes/partition/partition.py

102 lines
2.5 KiB
Python
Raw Normal View History

2022-04-15 17:21:44 +02:00
#!/usr/bin/env python3
from collections import defaultdict
import hashlib
import matplotlib.pyplot as plt
import numpy as np
2022-04-19 18:38:01 +02:00
import variance
2022-04-15 17:21:44 +02:00
# int(ipaddress.IPv4Address("192.168.0.1"))
def load_ips(path):
with open(path, 'r') as f:
return f.readlines()
def partition(crawlers, ips):
mod = len(crawlers)
result = defaultdict(list)
for ip in ips:
h = hashlib.md5(ip.encode('utf-8'), usedforsecurity=False).digest()
num = int.from_bytes(h, 'big')
pos = num % mod
c = crawlers[pos]
result[c].append(ip)
return result
def main():
2022-04-19 18:38:01 +02:00
# ips = load_ips('./ips.csv')
ips = load_ips('./random.csv')
2022-04-15 17:21:44 +02:00
c2 = ['c0', 'c1']
c4 = ['c0', 'c1', 'c2', 'c3']
c6 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5']
c10 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
2022-04-19 18:38:01 +02:00
c100 = []
for i in range(100):
c100.append(f'c{i:02d}')
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# for cs in [c2, c4, c6, c10]:
for cs in [c100]:
2022-04-15 17:21:44 +02:00
print(f'{len(cs)} crawlers')
part = partition(cs, ips)
2022-04-19 18:38:01 +02:00
counts = [len(v) for k, v in part.items()]
variance.variance(counts)
# for c, i in sorted(part.items(), key=lambda kv: kv[0]):
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# ind = np.arange(len(cs))
# fig, ax = plt.subplots()
# crawlers = sorted(cs)
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# ax.invert_yaxis()
# ax.set_ylabel('Crawlers')
# ax.set_xlabel('Peers')
# ax.set_title(f'IP Partitioning for {len(cs)} crawlers')
# ax.set_yticks(ind)
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # ax.set_xticks(ind) #, labels=cs)
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# ns = []
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# for c, i in sorted(part.items(), key=lambda kv: kv[0]):
# print(f'\t{c}: {len(i)}')
# ns.append(len(i))
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # p1 = ax.bar(ind, ns, 0.35)
# hbars = ax.barh(ind, ns)
# ax.bar_label(hbars, label_type='center')
# # plt.show()
# plt.savefig(f'./ip_part_c{len(cs):02}.png')
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # p1 = ax.bar(ind, i, 0.35, label=c)
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # hbars = ax.barh(y_pos, performance, xerr=error, align='center')
# # ax.set_yticks(y_pos, labels=people)
# # ax.invert_yaxis() # labels read top-to-bottom
# # ax.set_xlabel('Performance')
# # ax.set_title('How fast do you want to go today?')
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # # Label with specially formatted floats
# # ax.bar_label(hbars, fmt='%.2f')
# # ax.set_xlim(right=15) # adjust xlim to fit labels
2022-04-15 17:21:44 +02:00
2022-04-19 18:38:01 +02:00
# # plt.show()
# print()
# print()
# print()
# print()
2022-04-15 17:21:44 +02:00
if __name__ == '__main__':
main()