#!/usr/bin/env python3 import numpy as np import statistics from collections import defaultdict from typing import Dict import matplotlib.pyplot as plt import time from datetime import datetime def load_log(path: str) -> Dict[datetime, str]: time_crawler = {} with open(path, 'r') as f: for line in f: unix_nanos, crawler, _ = line.split(' , ') when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000) time_crawler[when] = crawler return time_crawler def plot_devi(data: Dict[datetime, str]): diffs = [] per_crawler = defaultdict(list) sor = list(sorted(data.items(), key=lambda kv: kv[0])) # c = 0 per_diff = defaultdict(list) for prev, next in zip(sor, sor[1:]): # diff = abs(2.5 - (next[0].timestamp() - prev[0].timestamp())) diff = ((next[0].timestamp() - prev[0].timestamp()) - 2.5) diffs.append(diff) per_crawler[prev[1]].append(prev[0]) per_diff[prev[1]].append(diff) # c = (c + 1) % 4 # expected = [2.5] * len(diffs) # x = list(range(len(diffs))) # x = [] x = [2.5 * x for x in range(len(diffs))] fig, ax = plt.subplots() ax.set_title('Deviation between crawl events') # ax.set_ylabel() ax.set_xlabel('Time passed in seconds') ax.set_ylabel('Deviation in seconds') # ax.plot(x, expected, label='Expected difference') ax.scatter(x, diffs, label='Deviation from the expected value', s=10) fig.legend() # plt.show() plt.savefig('./time_devi.png') plt.close() # x = [2.5 * x for x in range(len(diffs))] fig, ax = plt.subplots() ax.set_title('Deviation between crawl events') # ax.set_ylabel() ax.set_xlabel('Time passed in seconds') ax.set_ylabel('Deviation in seconds') # ax.plot(x, expected, label='Expected difference') for c, vals in per_diff.items(): # if not c in ['c0', 'c3']: # continue x = [10 * x for x in range(len(vals))] n = int(c[1:]) ax.scatter(x, vals, label=f'Deviation between c{n} and c{(n+1)%4}', s=10) fig.legend() # plt.show() plt.savefig('./xxx.png') plt.close() for c in per_crawler.keys(): t = per_crawler[c] devi = [] for pre, nex in zip(t, t[1:]): # devi.append(abs(10 - (nex.timestamp() - pre.timestamp()))) devi.append(((nex.timestamp() - pre.timestamp()) - 10)) x = np.array([10 * x for x in range(len(devi))]) devi = np.array(devi) fig, ax = plt.subplots() ax.scatter(x, devi, s=10) m, b = np.polyfit(x, devi, 1) plt.plot(x, m*x+b, color='red') ax.set_title(f'Timedeviation for {c}') ax.set_xlabel('Time passed in seconds') ax.set_ylabel('Deviation in seconds') plt.savefig(f'./time_devi_{c}.png') plt.close() print(f'{c} & \\num{{{statistics.mean(devi)}}} \\\\') # for ts in per_crawler[c]: def main(): data = load_log('./dummy.log') plot_devi(data) if __name__ == '__main__': main()