#!/usr/bin/env python3 from collections import defaultdict from typing import Dict import matplotlib.pyplot as plt import time from datetime import datetime def load_log(path: str) -> Dict[datetime, str]: time_crawler = {} with open(path, 'r') as f: for line in f: unix_nanos, crawler, _ = line.split(' , ') when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000) time_crawler[when] = crawler return time_crawler def plot_deriv(data: Dict[datetime, str]): diffs = [] per_crawler = defaultdict(list) sor = list(sorted(data.items(), key=lambda kv: kv[0])) for prev, next in zip(sor, sor[1:]): diffs.append(abs(2.5 - (next[0].timestamp() - prev[0].timestamp()))) per_crawler[prev[1]].append(prev[0]) # expected = [2.5] * len(diffs) # x = list(range(len(diffs))) # x = [] x = [2.5 * x for x in range(len(diffs))] fig, ax = plt.subplots() ax.set_title('Timedelta between crawl events in seconds') # ax.set_ylabel() ax.set_xlabel('Time passed in seconds') ax.set_ylabel('Deviation in seconds') # ax.plot(x, expected, label='Expected difference') ax.plot(x, diffs, label='Deviation from the expected value') fig.legend() # plt.show() plt.savefig('./time_deriv.png') plt.close() for c in per_crawler.keys(): t = per_crawler[c] devi = [] for pre, nex in zip(t, t[1:]): devi.append(abs(10 - (nex.timestamp() - pre.timestamp()))) x = [10 * x for x in range(len(devi))] fig, ax = plt.subplots() ax.plot(x, devi) ax.set_title(f'Timedeviation for {c}') ax.set_xlabel('Time passed in seconds') ax.set_ylabel('Deviation in seconds') plt.savefig(f'./time_deriv_{c}.png') plt.close() # for ts in per_crawler[c]: def main(): data = load_log('./dummy.log') plot_deriv(data) if __name__ == '__main__': main()