masterthesis/codes/frequency_deriv/frequency_deriv.py

104 lines
3.0 KiB
Python
Raw Permalink Normal View History

2022-04-21 23:05:11 +02:00
#!/usr/bin/env python3
2022-04-24 23:23:22 +02:00
import numpy as np
2022-04-22 18:09:22 +02:00
import statistics
2022-04-21 23:05:11 +02:00
from collections import defaultdict
from typing import Dict
import matplotlib.pyplot as plt
import time
from datetime import datetime
def load_log(path: str) -> Dict[datetime, str]:
time_crawler = {}
with open(path, 'r') as f:
for line in f:
unix_nanos, crawler, _ = line.split(' , ')
when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000)
time_crawler[when] = crawler
return time_crawler
2022-04-22 18:09:22 +02:00
def plot_devi(data: Dict[datetime, str]):
2022-04-21 23:05:11 +02:00
diffs = []
per_crawler = defaultdict(list)
sor = list(sorted(data.items(), key=lambda kv: kv[0]))
2022-04-22 18:09:22 +02:00
# c = 0
per_diff = defaultdict(list)
2022-04-21 23:05:11 +02:00
for prev, next in zip(sor, sor[1:]):
2022-04-24 23:23:22 +02:00
# diff = abs(2.5 - (next[0].timestamp() - prev[0].timestamp()))
diff = ((next[0].timestamp() - prev[0].timestamp()) - 2.5)
2022-04-22 18:09:22 +02:00
diffs.append(diff)
2022-04-21 23:05:11 +02:00
per_crawler[prev[1]].append(prev[0])
2022-04-22 18:09:22 +02:00
per_diff[prev[1]].append(diff)
# c = (c + 1) % 4
2022-04-21 23:05:11 +02:00
# expected = [2.5] * len(diffs)
# x = list(range(len(diffs)))
# x = []
x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
2022-04-22 18:09:22 +02:00
ax.set_title('Deviation between crawl events')
2022-04-21 23:05:11 +02:00
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
2022-04-22 18:09:22 +02:00
ax.scatter(x, diffs, label='Deviation from the expected value', s=10)
2022-04-21 23:05:11 +02:00
fig.legend()
# plt.show()
2022-04-22 18:09:22 +02:00
plt.savefig('./time_devi.png')
plt.close()
# x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
ax.set_title('Deviation between crawl events')
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
for c, vals in per_diff.items():
# if not c in ['c0', 'c3']:
# continue
x = [10 * x for x in range(len(vals))]
n = int(c[1:])
ax.scatter(x, vals, label=f'Deviation between c{n} and c{(n+1)%4}', s=10)
fig.legend()
# plt.show()
plt.savefig('./xxx.png')
2022-04-21 23:05:11 +02:00
plt.close()
for c in per_crawler.keys():
t = per_crawler[c]
devi = []
for pre, nex in zip(t, t[1:]):
2022-04-24 23:23:22 +02:00
# devi.append(abs(10 - (nex.timestamp() - pre.timestamp())))
devi.append(((nex.timestamp() - pre.timestamp()) - 10))
x = np.array([10 * x for x in range(len(devi))])
devi = np.array(devi)
2022-04-21 23:05:11 +02:00
fig, ax = plt.subplots()
2022-04-22 18:09:22 +02:00
ax.scatter(x, devi, s=10)
2022-04-24 23:23:22 +02:00
m, b = np.polyfit(x, devi, 1)
plt.plot(x, m*x+b, color='red')
2022-04-21 23:05:11 +02:00
ax.set_title(f'Timedeviation for {c}')
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
2022-04-22 18:09:22 +02:00
plt.savefig(f'./time_devi_{c}.png')
2022-04-21 23:05:11 +02:00
plt.close()
2022-04-24 23:23:22 +02:00
print(f'{c} & \\num{{{statistics.mean(devi)}}} \\\\')
2022-04-21 23:05:11 +02:00
# for ts in per_crawler[c]:
2022-04-22 18:09:22 +02:00
2022-04-21 23:05:11 +02:00
def main():
data = load_log('./dummy.log')
2022-04-22 18:09:22 +02:00
plot_devi(data)
2022-04-21 23:05:11 +02:00
if __name__ == '__main__':
main()