masterthesis/codes/frequency_deriv/frequency_deriv.py
Valentin Brandl 78fd60fc12 Content
2022-04-22 18:09:22 +02:00

98 lines
2.7 KiB
Python

#!/usr/bin/env python3
import statistics
from collections import defaultdict
from typing import Dict
import matplotlib.pyplot as plt
import time
from datetime import datetime
def load_log(path: str) -> Dict[datetime, str]:
time_crawler = {}
with open(path, 'r') as f:
for line in f:
unix_nanos, crawler, _ = line.split(' , ')
when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000)
time_crawler[when] = crawler
return time_crawler
def plot_devi(data: Dict[datetime, str]):
diffs = []
per_crawler = defaultdict(list)
sor = list(sorted(data.items(), key=lambda kv: kv[0]))
# c = 0
per_diff = defaultdict(list)
for prev, next in zip(sor, sor[1:]):
diff = abs(2.5 - (next[0].timestamp() - prev[0].timestamp()))
diffs.append(diff)
per_crawler[prev[1]].append(prev[0])
per_diff[prev[1]].append(diff)
# c = (c + 1) % 4
# expected = [2.5] * len(diffs)
# x = list(range(len(diffs)))
# x = []
x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
ax.set_title('Deviation between crawl events')
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
ax.scatter(x, diffs, label='Deviation from the expected value', s=10)
fig.legend()
# plt.show()
plt.savefig('./time_devi.png')
plt.close()
# x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
ax.set_title('Deviation between crawl events')
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
for c, vals in per_diff.items():
# if not c in ['c0', 'c3']:
# continue
x = [10 * x for x in range(len(vals))]
n = int(c[1:])
ax.scatter(x, vals, label=f'Deviation between c{n} and c{(n+1)%4}', s=10)
fig.legend()
# plt.show()
plt.savefig('./xxx.png')
plt.close()
for c in per_crawler.keys():
t = per_crawler[c]
devi = []
for pre, nex in zip(t, t[1:]):
devi.append(abs(10 - (nex.timestamp() - pre.timestamp())))
x = [10 * x for x in range(len(devi))]
fig, ax = plt.subplots()
ax.scatter(x, devi, s=10)
ax.set_title(f'Timedeviation for {c}')
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
plt.savefig(f'./time_devi_{c}.png')
plt.close()
print(f'{c}: {statistics.mean(devi)}')
# for ts in per_crawler[c]:
def main():
data = load_log('./dummy.log')
plot_devi(data)
if __name__ == '__main__':
main()