masterthesis/codes/partition/partition.py
2022-04-15 17:21:44 +02:00

92 lines
2.2 KiB
Python

#!/usr/bin/env python3
from collections import defaultdict
import hashlib
import matplotlib.pyplot as plt
import numpy as np
# int(ipaddress.IPv4Address("192.168.0.1"))
def load_ips(path):
with open(path, 'r') as f:
return f.readlines()
def partition(crawlers, ips):
mod = len(crawlers)
result = defaultdict(list)
for ip in ips:
h = hashlib.md5(ip.encode('utf-8'), usedforsecurity=False).digest()
num = int.from_bytes(h, 'big')
pos = num % mod
c = crawlers[pos]
result[c].append(ip)
return result
def main():
ips = load_ips('./ips.csv')
c2 = ['c0', 'c1']
c4 = ['c0', 'c1', 'c2', 'c3']
c6 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5']
c10 = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
for cs in [c2, c4, c6, c10]:
print(f'{len(cs)} crawlers')
part = partition(cs, ips)
ind = np.arange(len(cs))
fig, ax = plt.subplots()
crawlers = sorted(cs)
ax.invert_yaxis()
ax.set_ylabel('Crawlers')
ax.set_xlabel('Peers')
ax.set_title(f'IP Partitioning for {len(cs)} crawlers')
ax.set_yticks(ind)
# ax.set_xticks(ind) #, labels=cs)
ns = []
for c, i in sorted(part.items(), key=lambda kv: kv[0]):
print(f'\t{c}: {len(i)}')
ns.append(len(i))
# p1 = ax.bar(ind, ns, 0.35)
hbars = ax.barh(ind, ns)
ax.bar_label(hbars, label_type='center')
# plt.show()
plt.savefig(f'./ip_part_c{len(cs):02}.png')
# p1 = ax.bar(ind, i, 0.35, label=c)
# people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
# hbars = ax.barh(y_pos, performance, xerr=error, align='center')
# ax.set_yticks(y_pos, labels=people)
# ax.invert_yaxis() # labels read top-to-bottom
# ax.set_xlabel('Performance')
# ax.set_title('How fast do you want to go today?')
# # Label with specially formatted floats
# ax.bar_label(hbars, fmt='%.2f')
# ax.set_xlim(right=15) # adjust xlim to fit labels
# plt.show()
print()
print()
print()
print()
if __name__ == '__main__':
main()