Content
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 48 KiB |
BIN
assets/reduced_ranks/0/in_out.png
Normal file
After Width: | Height: | Size: 45 KiB |
BIN
assets/reduced_ranks/0/pr.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
assets/reduced_ranks/0/sr.png
Normal file
After Width: | Height: | Size: 54 KiB |
BIN
assets/reduced_ranks/1/in_out.png
Normal file
After Width: | Height: | Size: 45 KiB |
BIN
assets/reduced_ranks/1/pr.png
Normal file
After Width: | Height: | Size: 53 KiB |
BIN
assets/reduced_ranks/1/sr.png
Normal file
After Width: | Height: | Size: 55 KiB |
BIN
assets/reduced_ranks/2/in_out.png
Normal file
After Width: | Height: | Size: 45 KiB |
BIN
assets/reduced_ranks/2/pr.png
Normal file
After Width: | Height: | Size: 54 KiB |
BIN
assets/reduced_ranks/2/sr.png
Normal file
After Width: | Height: | Size: 50 KiB |
BIN
assets/reduced_ranks/3/in_out.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
assets/reduced_ranks/3/pr.png
Normal file
After Width: | Height: | Size: 56 KiB |
BIN
assets/reduced_ranks/3/sr.png
Normal file
After Width: | Height: | Size: 51 KiB |
BIN
assets/time_deriv.png
Normal file
After Width: | Height: | Size: 44 KiB |
1
codes/frequency_deriv/.envrc
Normal file
@ -0,0 +1 @@
|
|||||||
|
eval "$(lorri direnv)"
|
70
codes/frequency_deriv/frequency_deriv.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from typing import Dict
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def load_log(path: str) -> Dict[datetime, str]:
|
||||||
|
time_crawler = {}
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
unix_nanos, crawler, _ = line.split(' , ')
|
||||||
|
when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000)
|
||||||
|
time_crawler[when] = crawler
|
||||||
|
|
||||||
|
return time_crawler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def plot_deriv(data: Dict[datetime, str]):
|
||||||
|
diffs = []
|
||||||
|
per_crawler = defaultdict(list)
|
||||||
|
sor = list(sorted(data.items(), key=lambda kv: kv[0]))
|
||||||
|
for prev, next in zip(sor, sor[1:]):
|
||||||
|
diffs.append(abs(2.5 - (next[0].timestamp() - prev[0].timestamp())))
|
||||||
|
per_crawler[prev[1]].append(prev[0])
|
||||||
|
|
||||||
|
|
||||||
|
# expected = [2.5] * len(diffs)
|
||||||
|
# x = list(range(len(diffs)))
|
||||||
|
# x = []
|
||||||
|
x = [2.5 * x for x in range(len(diffs))]
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.set_title('Timedelta between crawl events in seconds')
|
||||||
|
# ax.set_ylabel()
|
||||||
|
ax.set_xlabel('Time passed in seconds')
|
||||||
|
ax.set_ylabel('Deviation in seconds')
|
||||||
|
# ax.plot(x, expected, label='Expected difference')
|
||||||
|
ax.plot(x, diffs, label='Deviation from the expected value')
|
||||||
|
fig.legend()
|
||||||
|
# plt.show()
|
||||||
|
plt.savefig('./time_deriv.png')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
for c in per_crawler.keys():
|
||||||
|
t = per_crawler[c]
|
||||||
|
devi = []
|
||||||
|
for pre, nex in zip(t, t[1:]):
|
||||||
|
devi.append(abs(10 - (nex.timestamp() - pre.timestamp())))
|
||||||
|
x = [10 * x for x in range(len(devi))]
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.plot(x, devi)
|
||||||
|
ax.set_title(f'Timedeviation for {c}')
|
||||||
|
ax.set_xlabel('Time passed in seconds')
|
||||||
|
ax.set_ylabel('Deviation in seconds')
|
||||||
|
plt.savefig(f'./time_deriv_{c}.png')
|
||||||
|
plt.close()
|
||||||
|
# for ts in per_crawler[c]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
data = load_log('./dummy.log')
|
||||||
|
plot_deriv(data)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
16
codes/frequency_deriv/shell.nix
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
{ pkgs ? import <nixpkgs> {} }:
|
||||||
|
let
|
||||||
|
py-packages = python-packages: with python-packages; [
|
||||||
|
matplotlib
|
||||||
|
numpy
|
||||||
|
networkx
|
||||||
|
scipy
|
||||||
|
];
|
||||||
|
py-package = pkgs.python3.withPackages py-packages;
|
||||||
|
in
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
buildInputs = [
|
||||||
|
py-package
|
||||||
|
];
|
||||||
|
}
|
BIN
codes/frequency_deriv/time_deriv.png
Normal file
After Width: | Height: | Size: 46 KiB |
BIN
codes/frequency_deriv/time_deriv_c0.png
Normal file
After Width: | Height: | Size: 41 KiB |
BIN
codes/frequency_deriv/time_deriv_c1.png
Normal file
After Width: | Height: | Size: 25 KiB |
BIN
codes/frequency_deriv/time_deriv_c2.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
codes/frequency_deriv/time_deriv_c3.png
Normal file
After Width: | Height: | Size: 46 KiB |
@ -23,7 +23,7 @@ def main():
|
|||||||
avg_in.append(v['avg_in'])
|
avg_in.append(v['avg_in'])
|
||||||
avg_out.append(v['avg_out'])
|
avg_out.append(v['avg_out'])
|
||||||
known_in.append(v['known_in'])
|
known_in.append(v['known_in'])
|
||||||
known_out.append(v['known_out'])
|
# known_out.append(v['known_out'])
|
||||||
number_of_nodes.append(v['number_of_nodes'])
|
number_of_nodes.append(v['number_of_nodes'])
|
||||||
|
|
||||||
|
|
||||||
@ -31,11 +31,12 @@ def main():
|
|||||||
ax.plot(times, avg_in, label='Avg. In')
|
ax.plot(times, avg_in, label='Avg. In')
|
||||||
# ax.plot(times, avg_out, label='Avg. Out')
|
# ax.plot(times, avg_out, label='Avg. Out')
|
||||||
ax.plot(times, known_in, label='Known In')
|
ax.plot(times, known_in, label='Known In')
|
||||||
ax.plot(times, known_out, label='Known Out')
|
# ax.plot(times, known_out, label='Known Out')
|
||||||
ax.plot(times, number_of_nodes, label='Number of nodes')
|
ax.plot(times, number_of_nodes, label='Total number of nodes')
|
||||||
ax.set_title(f'Average edge count per hour')
|
ax.set_title(f'Average edge count per hour')
|
||||||
fig.autofmt_xdate()
|
fig.autofmt_xdate()
|
||||||
fig.legend()
|
fig.legend()
|
||||||
|
ax.set_ylim(ymin=0)
|
||||||
plt.savefig(f'./tmp_plot.png')
|
plt.savefig(f'./tmp_plot.png')
|
||||||
# print('created sr plot')
|
# print('created sr plot')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
@ -124,7 +124,7 @@ def plot2(percentage, algo, data):
|
|||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
a = 'SensorRank' if algo == 'sr' else 'RageRank'
|
a = 'SensorRank' if algo == 'sr' else 'RageRank'
|
||||||
ax.set_ylabel(f'{a}')
|
ax.set_ylabel(f'{a}')
|
||||||
ax.plot(times, mean, label='Avg. Rank')
|
# ax.plot(times, mean, label='Avg. Rank')
|
||||||
# ax.errorbar(times, mean, stdev, label='Avg. Rank')
|
# ax.errorbar(times, mean, stdev, label='Avg. Rank')
|
||||||
ax.plot(times, mean, label='Avg. Rank')
|
ax.plot(times, mean, label='Avg. Rank')
|
||||||
# ax.plot(times, known_in, label='Known In') # TODO
|
# ax.plot(times, known_in, label='Known In') # TODO
|
||||||
@ -156,13 +156,14 @@ def plot_in_out2(percentage, data):
|
|||||||
known_out.append(d[algo]['known_out'])
|
known_out.append(d[algo]['known_out'])
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
a = 'SensorRank' if algo == 'sr' else 'RageRank'
|
# a = 'SensorRank' if algo == 'sr' else 'RageRank'
|
||||||
ax.set_ylabel(f'{a}')
|
ax.set_ylabel('Incoming edges')
|
||||||
ax.plot(times, avg_in, label='Avg. In')
|
ax.plot(times, avg_in, label='Avg. In')
|
||||||
# ax.plot(times, known_in, label='Known In') # TODO
|
# ax.plot(times, known_in, label='Known In') # TODO
|
||||||
ax.plot(times, known_in, label='Known In')
|
ax.plot(times, known_in, label='Known In')
|
||||||
ax.plot(times, known_out, label='Known out')
|
# ax.plot(times, known_out, label='Known out')
|
||||||
title = f'In And Out after removing {percentage * 100}% edges'
|
ax.set_ylim(ymin=0)
|
||||||
|
title = f'In degree after removing {percentage * 100}% edges'
|
||||||
ax.set_title(title)
|
ax.set_title(title)
|
||||||
|
|
||||||
fig.autofmt_xdate()
|
fig.autofmt_xdate()
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import networkx as nx
|
||||||
import statistics
|
import statistics
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
from random import sample
|
from random import sample
|
||||||
@ -34,6 +35,19 @@ def sensor_rank(graph):
|
|||||||
lambda g, node: sr(g, node, number_of_nodes)
|
lambda g, node: sr(g, node, number_of_nodes)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def pr_nx(graph):
|
||||||
|
return nx.algorithms.link_analysis.pagerank(graph, alpha=1.0)
|
||||||
|
|
||||||
|
def sr_nx(graph, pr_nx):
|
||||||
|
sr = {}
|
||||||
|
V = len(list(graph.nodes()))
|
||||||
|
for node, rank in pr_nx.items():
|
||||||
|
succs = len(list(graph.successors(node)))
|
||||||
|
if succs != 0:
|
||||||
|
preds = len(list(graph.predecessors(node)))
|
||||||
|
sr[node] = (rank / succs) * (preds / V)
|
||||||
|
return sr
|
||||||
|
|
||||||
def find_known(g, known):
|
def find_known(g, known):
|
||||||
nodes = list(filter(lambda n: n.node == known.node, g.nodes()))
|
nodes = list(filter(lambda n: n.node == known.node, g.nodes()))
|
||||||
n = len(nodes)
|
n = len(nodes)
|
||||||
@ -133,6 +147,49 @@ def rank(path):
|
|||||||
res = {'sr': res_sr, 'pr': res_pr}
|
res = {'sr': res_sr, 'pr': res_pr}
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def analyze2(g, data):
|
||||||
|
known = find_known(g, rank_with_churn.KNOWN)
|
||||||
|
# avg_r = rank_with_churn.avg_without_known(g)
|
||||||
|
avg_in = rank_with_churn.avg_in(g)
|
||||||
|
kn_in = known_in(g, known)
|
||||||
|
kn_out = known_out(g, known)
|
||||||
|
# d = list(map(lambda node: node.rank, g.nodes()))
|
||||||
|
d = list(map(lambda kv: kv[1], data.items()))
|
||||||
|
mean = statistics.mean(d)
|
||||||
|
stddev = statistics.stdev(d)
|
||||||
|
r_known = None
|
||||||
|
for k, v in data.items():
|
||||||
|
if k.node == known.node:
|
||||||
|
r_known = v
|
||||||
|
break
|
||||||
|
return {
|
||||||
|
'known_rank': r_known,
|
||||||
|
'known_in': kn_in,
|
||||||
|
'known_out': kn_out,
|
||||||
|
# 'avg_rank': avg_r,
|
||||||
|
'avg_in': avg_in,
|
||||||
|
'mean': mean,
|
||||||
|
'stdev': stddev,
|
||||||
|
}
|
||||||
|
|
||||||
|
def rank2(path):
|
||||||
|
edges = reduce_edges.load_data(path)
|
||||||
|
g = build_graph(edges, initial_rank=rank_with_churn.INITIAL_RANK)
|
||||||
|
|
||||||
|
|
||||||
|
print('pr start')
|
||||||
|
g_pr = pr_nx(g)
|
||||||
|
print('sr start')
|
||||||
|
g_sr = sr_nx(g, g_pr)
|
||||||
|
print('analyze pr start')
|
||||||
|
res_pr = analyze2(g, g_pr)
|
||||||
|
print('analyze sr start')
|
||||||
|
res_sr = analyze2(g, g_sr)
|
||||||
|
print('done!')
|
||||||
|
res = {'sr': res_sr, 'pr': res_pr}
|
||||||
|
return res
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# pool = multiprocessing.Pool(processes=4)
|
# pool = multiprocessing.Pool(processes=4)
|
||||||
params = []
|
params = []
|
||||||
@ -150,6 +207,7 @@ def main():
|
|||||||
with multiprocessing.Pool(processes=8) as pool:
|
with multiprocessing.Pool(processes=8) as pool:
|
||||||
l_path_data = pool.map(wohoo, params)
|
l_path_data = pool.map(wohoo, params)
|
||||||
for path_data in l_path_data:
|
for path_data in l_path_data:
|
||||||
|
print(f'{path_data=}')
|
||||||
for path, data in path_data.items():
|
for path, data in path_data.items():
|
||||||
with reduce_edges.open_mkdir(path, 'w') as f:
|
with reduce_edges.open_mkdir(path, 'w') as f:
|
||||||
json.dump(data, f)
|
json.dump(data, f)
|
||||||
@ -171,8 +229,8 @@ def wohoo(p):
|
|||||||
# with open() as f:
|
# with open() as f:
|
||||||
# json.dump(result, f)
|
# json.dump(result, f)
|
||||||
when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
|
when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
|
||||||
path = f'./data_reduced/{reduced_percentage:.02f}/{when.timestamp()}.json'
|
path = f'./data_reduced2/{reduced_percentage:.02f}/{when.timestamp()}.json'
|
||||||
result = rank(file)
|
result = rank2(file)
|
||||||
return {path: result}
|
return {path: result}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ let
|
|||||||
matplotlib
|
matplotlib
|
||||||
numpy
|
numpy
|
||||||
networkx
|
networkx
|
||||||
|
scipy
|
||||||
];
|
];
|
||||||
py-package = pkgs.python3.withPackages py-packages;
|
py-package = pkgs.python3.withPackages py-packages;
|
||||||
in
|
in
|
||||||
|
306
content.tex
@ -445,7 +445,7 @@ It also allows us to get rid of the state in our strategy since we don't have to
|
|||||||
%}}} load balancing
|
%}}} load balancing
|
||||||
|
|
||||||
%{{{ frequency reduction
|
%{{{ frequency reduction
|
||||||
\subsection{Reduction of Request Frequency}
|
\subsection{Reduction of Request Frequency}\label{sec:stratRedReqFreq}
|
||||||
|
|
||||||
The GameOver Zeus botnet limited the number of requests a peer was allowed to perform and blacklisted peers, that exceeded the limit, as an anti-monitoring mechanism~\cite{bib:andriesse_goz_2013}.
|
The GameOver Zeus botnet limited the number of requests a peer was allowed to perform and blacklisted peers, that exceeded the limit, as an anti-monitoring mechanism~\cite{bib:andriesse_goz_2013}.
|
||||||
In an uncoordinated crawler approach, the crawl frequency has to be limited to prevent hitting the request limit.
|
In an uncoordinated crawler approach, the crawl frequency has to be limited to prevent hitting the request limit.
|
||||||
@ -584,7 +584,11 @@ Based on this, \emph{SensorRank} is defined as
|
|||||||
|
|
||||||
Since crawlers never respond to peer list requests, they will always be detectable by the described approach but sensors might benefit from the following technique.
|
Since crawlers never respond to peer list requests, they will always be detectable by the described approach but sensors might benefit from the following technique.
|
||||||
|
|
||||||
By responding to peer list requests with plausible data and thereby producing valid outgoing edges from the sensors, we will try to make those metrics less suspicious.
|
The PageRank and SensorRank metric are calculated over the sum of the ranks of a node's predecessors.
|
||||||
|
We will investigate, how limiting the number of predecessors helps producing inconspicuous ranks for a sensor.
|
||||||
|
|
||||||
|
% By responding to peer list requests with plausible data and thereby producing valid outgoing edges from the sensors, we will try to make those metrics less suspicious.
|
||||||
|
To counter the SensorBuster metric, outgoing edges to valid peers from the botnet are required so the sensor does not build a \ac{wcc}.
|
||||||
The challenge here is deciding which peers can be returned without actually supporting the network.
|
The challenge here is deciding which peers can be returned without actually supporting the network.
|
||||||
The following candidates to place on the neighbor list will be investigated:
|
The following candidates to place on the neighbor list will be investigated:
|
||||||
|
|
||||||
@ -606,10 +610,6 @@ The following candidates to place on the neighbor list will be investigated:
|
|||||||
Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_{\abs{C}}\) among the workers, creates valid outgoing edges.
|
Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_{\abs{C}}\) among the workers, creates valid outgoing edges.
|
||||||
The resulting graph will still form a \ac{wcc} with now edges back into the main network.
|
The resulting graph will still form a \ac{wcc} with now edges back into the main network.
|
||||||
|
|
||||||
PageRank is the sum of a node's predecessors ranks divided by the amount of successors each predecessor's successors.
|
|
||||||
Predecessors with many successors should therefore reduce the rank.
|
|
||||||
By their nature, crawlers have many successors, so they are good candidates to reduce the PageRank of a sensor.
|
|
||||||
\todo{crawlers as predecessors}
|
|
||||||
|
|
||||||
%{{{ churned peers
|
%{{{ churned peers
|
||||||
\subsubsection{Churned Peers After IP Rotation}
|
\subsubsection{Churned Peers After IP Rotation}
|
||||||
@ -630,6 +630,20 @@ Those peers can be used as fake neighbors and create valid-looking outgoing edge
|
|||||||
|
|
||||||
%}}} cg nat
|
%}}} cg nat
|
||||||
|
|
||||||
|
\clearpage{}
|
||||||
|
\todo{clearpage?}
|
||||||
|
In theory, it would be possible to detect churned peers or peers behind carrier-grade \acs{nat}, without coordinating the sensors but the coordination gives us a few advantages:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item A peer might blacklist a sensor which looks exactly the same as a churned peer from the point of view of an uncoordinated sensor.
|
||||||
|
The coordination backend has more knowledge and can detect this, if another sensor is still contacted by the peer in question.
|
||||||
|
|
||||||
|
\item The coordination backend can include different streams of information to decide which peers to place in the sensor's neighborhood.
|
||||||
|
Knowledge about geolocations, \ac{as} and their IP rotation behavior can be consulted to make better informed choices for neighborhood candidates.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
%}}} against graph metrics
|
%}}} against graph metrics
|
||||||
|
|
||||||
%}}} strategies
|
%}}} strategies
|
||||||
@ -650,7 +664,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
|
|||||||
%{{{ fig:ipPartC02
|
%{{{ fig:ipPartC02
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.7\linewidth]{ip_part_c02.png}
|
\includegraphics[width=1\linewidth]{ip_part_c02.png}
|
||||||
\caption{IP based partitioning for 2 crawlers}\label{fig:ipPartC02}
|
\caption{IP based partitioning for 2 crawlers}\label{fig:ipPartC02}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 2 \\
|
n &= 2 \\
|
||||||
@ -668,7 +682,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
|
|||||||
%{{{ fig:ipPartC04
|
%{{{ fig:ipPartC04
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.7\linewidth]{ip_part_c04.png}
|
\includegraphics[width=1\linewidth]{ip_part_c04.png}
|
||||||
\caption{IP based partitioning for 4 crawlers}\label{fig:ipPartC04}
|
\caption{IP based partitioning for 4 crawlers}\label{fig:ipPartC04}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 4 \\
|
n &= 4 \\
|
||||||
@ -687,7 +701,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
|
|||||||
%{{{ fig:ipPartC06
|
%{{{ fig:ipPartC06
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.7\linewidth]{ip_part_c06.png}
|
\includegraphics[width=1\linewidth]{ip_part_c06.png}
|
||||||
\caption{IP based partitioning for 6 crawlers}\label{fig:ipPartC06}
|
\caption{IP based partitioning for 6 crawlers}\label{fig:ipPartC06}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 6 \\
|
n &= 6 \\
|
||||||
@ -706,7 +720,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
|
|||||||
%{{{ fig:ipPartC10
|
%{{{ fig:ipPartC10
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.7\linewidth]{ip_part_c10.png}
|
\includegraphics[width=1\linewidth]{ip_part_c10.png}
|
||||||
\caption{IP based partitioning for 10 crawlers}\label{fig:ipPartC10}
|
\caption{IP based partitioning for 10 crawlers}\label{fig:ipPartC10}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 10 \\
|
n &= 10 \\
|
||||||
@ -742,7 +756,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
|
|||||||
%{{{ fig:randIpPartC02
|
%{{{ fig:randIpPartC02
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.8\linewidth]{rand_ip_part_c02.png}
|
\includegraphics[width=1\linewidth]{rand_ip_part_c02.png}
|
||||||
\caption{IP based partitioning for 2 crawlers on generated dataset}\label{fig:randIpPartC02}
|
\caption{IP based partitioning for 2 crawlers on generated dataset}\label{fig:randIpPartC02}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 2 \\
|
n &= 2 \\
|
||||||
@ -761,7 +775,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
|
|||||||
%{{{ fig:randIpPartC04
|
%{{{ fig:randIpPartC04
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.8\linewidth]{rand_ip_part_c04.png}
|
\includegraphics[width=1\linewidth]{rand_ip_part_c04.png}
|
||||||
\caption{IP based partitioning for 4 crawlers on generated dataset}\label{fig:randIpPartC04}
|
\caption{IP based partitioning for 4 crawlers on generated dataset}\label{fig:randIpPartC04}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 4 \\
|
n &= 4 \\
|
||||||
@ -780,7 +794,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
|
|||||||
%{{{ fig:randIpPartC06
|
%{{{ fig:randIpPartC06
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.8\linewidth]{rand_ip_part_c06.png}
|
\includegraphics[width=1\linewidth]{rand_ip_part_c06.png}
|
||||||
\caption{IP based partitioning for 6 crawlers on generated dataset}\label{fig:randIpPartC06}
|
\caption{IP based partitioning for 6 crawlers on generated dataset}\label{fig:randIpPartC06}
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
n &= 6 \\
|
n &= 6 \\
|
||||||
@ -831,10 +845,74 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
|
|||||||
|
|
||||||
As expected, the work is still not perfectly distributed among the crawlers but evenly enough for our use case.
|
As expected, the work is still not perfectly distributed among the crawlers but evenly enough for our use case.
|
||||||
The derivation for larger botnets is within \SI{0.5}{\percent} of the even distribution.
|
The derivation for larger botnets is within \SI{0.5}{\percent} of the even distribution.
|
||||||
This is good enough for balancing the work among workers.
|
This is good enough for balancing the tasks among workers.
|
||||||
|
|
||||||
%}}} eval load balancing
|
%}}} eval load balancing
|
||||||
|
|
||||||
|
%{{{ eval redu requ freq
|
||||||
|
\subsection{Reduction of Request Frequency}
|
||||||
|
|
||||||
|
To evaluate the request frequency optimization described in \Fref{sec:stratRedReqFreq}, crawl a simulated peer and check if the requests are evenly distributed and how big the deviation from the theoretically optimal result is.
|
||||||
|
To get more realistic results, the crawlers and simulated peer are running on different machines so they are not within the same LAN.
|
||||||
|
We use the same parameters as in the example above:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
n &= 4 \\
|
||||||
|
l &= \SI{6}{\request\per\minute} \\
|
||||||
|
f &= \SI{24}{\request\per\minute} \\
|
||||||
|
o &= \SI{2.5}{\second}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
To recap, this is what the optimal timeline would look like:
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\begin{chronology}[10]{0}{60}{0.9\textwidth}
|
||||||
|
\event{0}{\(C_0\)}
|
||||||
|
\event{10}{\(C_0\)}
|
||||||
|
\event{20}{\(C_0\)}
|
||||||
|
\event{30}{\(C_0\)}
|
||||||
|
\event{40}{\(C_0\)}
|
||||||
|
\event{50}{\(C_0\)}
|
||||||
|
\event{60}{\(C_0\)}
|
||||||
|
|
||||||
|
\event{2.5}{\(C_1\)}
|
||||||
|
\event{12.5}{\(C_1\)}
|
||||||
|
\event{22.5}{\(C_1\)}
|
||||||
|
\event{32.5}{\(C_1\)}
|
||||||
|
\event{42.5}{\(C_1\)}
|
||||||
|
\event{52.5}{\(C_1\)}
|
||||||
|
|
||||||
|
\event{5}{\(C_2\)}
|
||||||
|
\event{15}{\(C_2\)}
|
||||||
|
\event{25}{\(C_2\)}
|
||||||
|
\event{35}{\(C_2\)}
|
||||||
|
\event{45}{\(C_2\)}
|
||||||
|
\event{55}{\(C_2\)}
|
||||||
|
|
||||||
|
\event{7.5}{\(C_3\)}
|
||||||
|
\event{17.5}{\(C_3\)}
|
||||||
|
\event{27.5}{\(C_3\)}
|
||||||
|
\event{37.5}{\(C_3\)}
|
||||||
|
\event{47.5}{\(C_3\)}
|
||||||
|
\event{57.5}{\(C_3\)}
|
||||||
|
\end{chronology}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
||||||
|
The ideal distribution would be \SI{2.5}{\second} between each two events.
|
||||||
|
Due to network latency and load from crawling other peers, we expect the actual result to deviate from the optimal value over time.
|
||||||
|
With this experiment we try to estimate the impact of the latency.
|
||||||
|
If it is existent and measurable the crawlers have to be rescheduled periodically to keep the deviation at an acceptable level.
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=1\linewidth]{time_deriv.png}
|
||||||
|
\caption{Derivation from the expected interval}\label{fig:timeDeriv}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
%}}} eval redu requ freq
|
||||||
|
|
||||||
%{{{ eval creating edges
|
%{{{ eval creating edges
|
||||||
\subsection{Impact of Additional Edges on Graph Metrics}
|
\subsection{Impact of Additional Edges on Graph Metrics}
|
||||||
|
|
||||||
@ -908,8 +986,6 @@ SensorBuster relies on the assumption that sensors don't have any outgoing edges
|
|||||||
|
|
||||||
For the \ac{wcc} metric, it is obvious that even a single edge back into the main network is enough to connect the sensor back to the main graph and therefore beat this metric.
|
For the \ac{wcc} metric, it is obvious that even a single edge back into the main network is enough to connect the sensor back to the main graph and therefore beat this metric.
|
||||||
|
|
||||||
\todo{formulieren}
|
|
||||||
|
|
||||||
\subsubsection{Effectiveness against Page- and SensorRank}
|
\subsubsection{Effectiveness against Page- and SensorRank}
|
||||||
|
|
||||||
In this section we will evaluate how adding outgoing edges to a sensor impacts it's PageRank and SensorRank values.
|
In this section we will evaluate how adding outgoing edges to a sensor impacts it's PageRank and SensorRank values.
|
||||||
@ -1014,59 +1090,158 @@ Looking at the data in smaller buckets of one hour each, the average number of s
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
%}}}fig:avg_out_edges
|
%}}}fig:avg_out_edges
|
||||||
|
|
||||||
|
Experiments were performed, in which the incoming edges for the known sensor are reduced by increasing factors, to see, when the sensor's rank reaches the overall average.
|
||||||
|
|
||||||
|
% \begin{figure}[H]
|
||||||
|
% \centering
|
||||||
|
% \includegraphics[width=1\textwidth]{reduced_ranks/0/in_out.png}
|
||||||
|
% % \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
||||||
|
% \end{figure}%
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/0/pr.png}
|
||||||
|
\caption{PageRank after removing \SI{0}{\percent} of edges}\label{fig:pr0}
|
||||||
|
\end{subfigure}%
|
||||||
|
\hfill
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/0/sr.png}
|
||||||
|
\caption{SensorRank after removing \SI{0}{\percent} of edges}\label{fig:sr0}
|
||||||
|
\end{subfigure}%
|
||||||
|
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\Fref{fig:pr0} and \Fref{fig:sr0} show the situation on the base truth without modifications.
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\textwidth]{reduced_ranks/1/in_out.png}
|
||||||
|
\caption{Incoming edges after removing \SI{10}{\percent} of edges}\label{fig:in1}
|
||||||
|
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
||||||
|
\end{figure}%
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/1/pr.png}
|
||||||
|
\caption{PageRank after removing \SI{10}{\percent} of edges}\label{fig:pr1}
|
||||||
|
\end{subfigure}%
|
||||||
|
\hfill
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/1/sr.png}
|
||||||
|
\caption{SensorRank after removing \SI{10}{\percent} of edges}\label{fig:sr1}
|
||||||
|
\end{subfigure}%
|
||||||
|
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\textwidth]{reduced_ranks/2/in_out.png}
|
||||||
|
\caption{Incoming edges after removing \SI{20}{\percent} of edges}\label{fig:in2}
|
||||||
|
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
||||||
|
\end{figure}%
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/2/pr.png}
|
||||||
|
\caption{PageRank after removing \SI{20}{\percent} of edges}\label{fig:pr2}
|
||||||
|
\end{subfigure}%
|
||||||
|
\hfill
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/2/sr.png}
|
||||||
|
\caption{SensorRank after removing \SI{20}{\percent} of edges}\label{fig:sr2}
|
||||||
|
\end{subfigure}%
|
||||||
|
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\textwidth]{reduced_ranks/3/in_out.png}
|
||||||
|
\caption{Incoming edges after removing \SI{30}{\percent} of edges}\label{fig:in3}
|
||||||
|
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
||||||
|
\end{figure}%
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/3/pr.png}
|
||||||
|
\caption{PageRank after removing \SI{30}{\percent} of edges}\label{fig:pr3}
|
||||||
|
\end{subfigure}%
|
||||||
|
\hfill
|
||||||
|
\begin{subfigure}[b]{1\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{reduced_ranks/3/sr.png}
|
||||||
|
\caption{SensorRank after removing \SI{30}{\percent} of edges}\label{fig:sr3}
|
||||||
|
\end{subfigure}%
|
||||||
|
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
We can see in \Fref{fig:sr2} and \Fref{fig:pr3}, that we have to reduce the incoming edges by \SI{20}{\percent} and \SI{30}{\percent} respectively to get average values for SensorRank and PageRank.
|
||||||
|
This also means, that the amount of incoming edges for a sensor must be about the same as the average about of incoming edges as can be seen in \Fref{fig:in3}.
|
||||||
|
Depending on the protocol details of the botnet (\eg{} how many incoming edges are allowed per peer), this means that a large amount of sensors is needed, if we want to monitor the whole network.
|
||||||
|
|
||||||
% Experiments were performed, in which a percentage of random outgoing edges were added to the known sensor, based on the amount of incoming edges:
|
% Experiments were performed, in which a percentage of random outgoing edges were added to the known sensor, based on the amount of incoming edges:
|
||||||
We evaluate the impact of outgoing edges by picking a percentage of random nodes in each bucket and creating edges from the sensor to each of the sampled peers, thereby evening the ratio between \(\deg^{+}\) and \(\deg^{-}\).
|
% We evaluate the impact of outgoing edges by picking a percentage of random nodes in each bucket and creating edges from the sensor to each of the sampled peers, thereby evening the ratio between \(\deg^{+}\) and \(\deg^{-}\).
|
||||||
|
|
||||||
|
|
||||||
\begin{figure}[H]
|
% \begin{figure}[H]
|
||||||
\centering
|
% \centering
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/pr_75.png}
|
% \includegraphics[width=1\linewidth]{ranks/pr_75.png}
|
||||||
\caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
\hfill
|
% \hfill
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/sr_75.png}
|
% \includegraphics[width=1\linewidth]{ranks/sr_75.png}
|
||||||
\caption{SensorRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr75}
|
% \caption{SensorRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr75}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
% % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
\end{figure}
|
% \end{figure}
|
||||||
|
|
||||||
\begin{figure}[H]
|
% \begin{figure}[H]
|
||||||
\centering
|
% \centering
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/pr_100.png}
|
% \includegraphics[width=1\linewidth]{ranks/pr_100.png}
|
||||||
\caption{PageRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr100}
|
% \caption{PageRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr100}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
\hfill
|
% \hfill
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/sr_100.png}
|
% \includegraphics[width=1\linewidth]{ranks/sr_100.png}
|
||||||
\caption{SensorRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr100}
|
% \caption{SensorRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr100}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
% % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
\end{figure}
|
% \end{figure}
|
||||||
|
|
||||||
\begin{figure}[H]
|
% \begin{figure}[H]
|
||||||
\centering
|
% \centering
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/pr_150.png}
|
% \includegraphics[width=1\linewidth]{ranks/pr_150.png}
|
||||||
\caption{PageRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr150}
|
% \caption{PageRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr150}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
\hfill
|
% \hfill
|
||||||
\begin{subfigure}[b]{.75\textwidth}
|
% \begin{subfigure}[b]{.75\textwidth}
|
||||||
\centering
|
% \centering
|
||||||
\includegraphics[width=1\linewidth]{ranks/sr_150.png}
|
% \includegraphics[width=1\linewidth]{ranks/sr_150.png}
|
||||||
\caption{SensorRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr150}
|
% \caption{SensorRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr150}
|
||||||
\end{subfigure}%
|
% \end{subfigure}%
|
||||||
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
% % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
|
||||||
\end{figure}
|
% \end{figure}
|
||||||
|
|
||||||
These results show, that simply adding new edges is not enough and we need to limit the incoming edges to improve the Page- and SensorRank metrics.
|
% These results show, that simply adding new edges is not enough and we need to limit the incoming edges to improve the Page- and SensorRank metrics.
|
||||||
|
|
||||||
%}}} eval creating edges
|
%}}} eval creating edges
|
||||||
|
|
||||||
@ -1138,6 +1313,11 @@ Collaborative monitoring of \ac{p2p} botnets allows circumventing some anti-moni
|
|||||||
It also enables more effective monitoring systems for larger botnets, since each peer can be visited by only one crawler.
|
It also enables more effective monitoring systems for larger botnets, since each peer can be visited by only one crawler.
|
||||||
The current concept of independent crawlers in \ac{bms} can also use multiple workers but there is no way to ensure a peer is not watched by multiple crawlers thereby using unnecessary resources.
|
The current concept of independent crawlers in \ac{bms} can also use multiple workers but there is no way to ensure a peer is not watched by multiple crawlers thereby using unnecessary resources.
|
||||||
|
|
||||||
|
We were able to show, that a collaborative monitoring approach for \ac{p2p} botnets helps to circumvent anti-monitoring and monitoring detection mechanisms and is helpful to improve resource usage when monitoring large botnets.
|
||||||
|
On the other hand, graph ranking algorithms have been proven to be hard to bypass without requiring large amounts of sensor nodes.
|
||||||
|
|
||||||
|
Luckily most of the anti-monitoring and monitoring detection techniques discussed in this work are of academic nature and have not yet been deployed in real-world botnets.
|
||||||
|
Further investigation and improvements in \ac{p2p} botnet monitoring are required to prevent a situation were a botmaster implements the currently theoretical concepts and renders monitoring as it is currently done, ineffective.
|
||||||
|
|
||||||
%}}} conclusion
|
%}}} conclusion
|
||||||
|
|
||||||
|