masterthesis/codes/frequency_deriv/frequency_deriv.py

71 lines
1.9 KiB
Python
Raw Normal View History

2022-04-21 23:05:11 +02:00
#!/usr/bin/env python3
from collections import defaultdict
from typing import Dict
import matplotlib.pyplot as plt
import time
from datetime import datetime
def load_log(path: str) -> Dict[datetime, str]:
time_crawler = {}
with open(path, 'r') as f:
for line in f:
unix_nanos, crawler, _ = line.split(' , ')
when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000)
time_crawler[when] = crawler
return time_crawler
def plot_deriv(data: Dict[datetime, str]):
diffs = []
per_crawler = defaultdict(list)
sor = list(sorted(data.items(), key=lambda kv: kv[0]))
for prev, next in zip(sor, sor[1:]):
diffs.append(abs(2.5 - (next[0].timestamp() - prev[0].timestamp())))
per_crawler[prev[1]].append(prev[0])
# expected = [2.5] * len(diffs)
# x = list(range(len(diffs)))
# x = []
x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
ax.set_title('Timedelta between crawl events in seconds')
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
ax.plot(x, diffs, label='Deviation from the expected value')
fig.legend()
# plt.show()
plt.savefig('./time_deriv.png')
plt.close()
for c in per_crawler.keys():
t = per_crawler[c]
devi = []
for pre, nex in zip(t, t[1:]):
devi.append(abs(10 - (nex.timestamp() - pre.timestamp())))
x = [10 * x for x in range(len(devi))]
fig, ax = plt.subplots()
ax.plot(x, devi)
ax.set_title(f'Timedeviation for {c}')
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
plt.savefig(f'./time_deriv_{c}.png')
plt.close()
# for ts in per_crawler[c]:
def main():
data = load_log('./dummy.log')
plot_deriv(data)
if __name__ == '__main__':
main()