This commit is contained in:
Valentin Brandl 2022-04-21 23:05:11 +02:00
parent 9e2d55815e
commit b5b78140aa
28 changed files with 401 additions and 73 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
assets/time_deriv.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@ -0,0 +1 @@
eval "$(lorri direnv)"

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
from collections import defaultdict
from typing import Dict
import matplotlib.pyplot as plt
import time
from datetime import datetime
def load_log(path: str) -> Dict[datetime, str]:
time_crawler = {}
with open(path, 'r') as f:
for line in f:
unix_nanos, crawler, _ = line.split(' , ')
when = datetime.utcfromtimestamp(int(unix_nanos) / 1000000000)
time_crawler[when] = crawler
return time_crawler
def plot_deriv(data: Dict[datetime, str]):
diffs = []
per_crawler = defaultdict(list)
sor = list(sorted(data.items(), key=lambda kv: kv[0]))
for prev, next in zip(sor, sor[1:]):
diffs.append(abs(2.5 - (next[0].timestamp() - prev[0].timestamp())))
per_crawler[prev[1]].append(prev[0])
# expected = [2.5] * len(diffs)
# x = list(range(len(diffs)))
# x = []
x = [2.5 * x for x in range(len(diffs))]
fig, ax = plt.subplots()
ax.set_title('Timedelta between crawl events in seconds')
# ax.set_ylabel()
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
# ax.plot(x, expected, label='Expected difference')
ax.plot(x, diffs, label='Deviation from the expected value')
fig.legend()
# plt.show()
plt.savefig('./time_deriv.png')
plt.close()
for c in per_crawler.keys():
t = per_crawler[c]
devi = []
for pre, nex in zip(t, t[1:]):
devi.append(abs(10 - (nex.timestamp() - pre.timestamp())))
x = [10 * x for x in range(len(devi))]
fig, ax = plt.subplots()
ax.plot(x, devi)
ax.set_title(f'Timedeviation for {c}')
ax.set_xlabel('Time passed in seconds')
ax.set_ylabel('Deviation in seconds')
plt.savefig(f'./time_deriv_{c}.png')
plt.close()
# for ts in per_crawler[c]:
def main():
data = load_log('./dummy.log')
plot_deriv(data)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,16 @@
{ pkgs ? import <nixpkgs> {} }:
let
py-packages = python-packages: with python-packages; [
matplotlib
numpy
networkx
scipy
];
py-package = pkgs.python3.withPackages py-packages;
in
pkgs.mkShell {
buildInputs = [
py-package
];
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -23,7 +23,7 @@ def main():
avg_in.append(v['avg_in']) avg_in.append(v['avg_in'])
avg_out.append(v['avg_out']) avg_out.append(v['avg_out'])
known_in.append(v['known_in']) known_in.append(v['known_in'])
known_out.append(v['known_out']) # known_out.append(v['known_out'])
number_of_nodes.append(v['number_of_nodes']) number_of_nodes.append(v['number_of_nodes'])
@ -31,11 +31,12 @@ def main():
ax.plot(times, avg_in, label='Avg. In') ax.plot(times, avg_in, label='Avg. In')
# ax.plot(times, avg_out, label='Avg. Out') # ax.plot(times, avg_out, label='Avg. Out')
ax.plot(times, known_in, label='Known In') ax.plot(times, known_in, label='Known In')
ax.plot(times, known_out, label='Known Out') # ax.plot(times, known_out, label='Known Out')
ax.plot(times, number_of_nodes, label='Number of nodes') ax.plot(times, number_of_nodes, label='Total number of nodes')
ax.set_title(f'Average edge count per hour') ax.set_title(f'Average edge count per hour')
fig.autofmt_xdate() fig.autofmt_xdate()
fig.legend() fig.legend()
ax.set_ylim(ymin=0)
plt.savefig(f'./tmp_plot.png') plt.savefig(f'./tmp_plot.png')
# print('created sr plot') # print('created sr plot')
plt.show() plt.show()

View File

@ -124,7 +124,7 @@ def plot2(percentage, algo, data):
fig, ax = plt.subplots() fig, ax = plt.subplots()
a = 'SensorRank' if algo == 'sr' else 'RageRank' a = 'SensorRank' if algo == 'sr' else 'RageRank'
ax.set_ylabel(f'{a}') ax.set_ylabel(f'{a}')
ax.plot(times, mean, label='Avg. Rank') # ax.plot(times, mean, label='Avg. Rank')
# ax.errorbar(times, mean, stdev, label='Avg. Rank') # ax.errorbar(times, mean, stdev, label='Avg. Rank')
ax.plot(times, mean, label='Avg. Rank') ax.plot(times, mean, label='Avg. Rank')
# ax.plot(times, known_in, label='Known In') # TODO # ax.plot(times, known_in, label='Known In') # TODO
@ -156,13 +156,14 @@ def plot_in_out2(percentage, data):
known_out.append(d[algo]['known_out']) known_out.append(d[algo]['known_out'])
fig, ax = plt.subplots() fig, ax = plt.subplots()
a = 'SensorRank' if algo == 'sr' else 'RageRank' # a = 'SensorRank' if algo == 'sr' else 'RageRank'
ax.set_ylabel(f'{a}') ax.set_ylabel('Incoming edges')
ax.plot(times, avg_in, label='Avg. In') ax.plot(times, avg_in, label='Avg. In')
# ax.plot(times, known_in, label='Known In') # TODO # ax.plot(times, known_in, label='Known In') # TODO
ax.plot(times, known_in, label='Known In') ax.plot(times, known_in, label='Known In')
ax.plot(times, known_out, label='Known out') # ax.plot(times, known_out, label='Known out')
title = f'In And Out after removing {percentage * 100}% edges' ax.set_ylim(ymin=0)
title = f'In degree after removing {percentage * 100}% edges'
ax.set_title(title) ax.set_title(title)
fig.autofmt_xdate() fig.autofmt_xdate()

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import networkx as nx
import statistics import statistics
import multiprocessing import multiprocessing
from random import sample from random import sample
@ -34,6 +35,19 @@ def sensor_rank(graph):
lambda g, node: sr(g, node, number_of_nodes) lambda g, node: sr(g, node, number_of_nodes)
) )
def pr_nx(graph):
return nx.algorithms.link_analysis.pagerank(graph, alpha=1.0)
def sr_nx(graph, pr_nx):
sr = {}
V = len(list(graph.nodes()))
for node, rank in pr_nx.items():
succs = len(list(graph.successors(node)))
if succs != 0:
preds = len(list(graph.predecessors(node)))
sr[node] = (rank / succs) * (preds / V)
return sr
def find_known(g, known): def find_known(g, known):
nodes = list(filter(lambda n: n.node == known.node, g.nodes())) nodes = list(filter(lambda n: n.node == known.node, g.nodes()))
n = len(nodes) n = len(nodes)
@ -133,6 +147,49 @@ def rank(path):
res = {'sr': res_sr, 'pr': res_pr} res = {'sr': res_sr, 'pr': res_pr}
return res return res
def analyze2(g, data):
known = find_known(g, rank_with_churn.KNOWN)
# avg_r = rank_with_churn.avg_without_known(g)
avg_in = rank_with_churn.avg_in(g)
kn_in = known_in(g, known)
kn_out = known_out(g, known)
# d = list(map(lambda node: node.rank, g.nodes()))
d = list(map(lambda kv: kv[1], data.items()))
mean = statistics.mean(d)
stddev = statistics.stdev(d)
r_known = None
for k, v in data.items():
if k.node == known.node:
r_known = v
break
return {
'known_rank': r_known,
'known_in': kn_in,
'known_out': kn_out,
# 'avg_rank': avg_r,
'avg_in': avg_in,
'mean': mean,
'stdev': stddev,
}
def rank2(path):
edges = reduce_edges.load_data(path)
g = build_graph(edges, initial_rank=rank_with_churn.INITIAL_RANK)
print('pr start')
g_pr = pr_nx(g)
print('sr start')
g_sr = sr_nx(g, g_pr)
print('analyze pr start')
res_pr = analyze2(g, g_pr)
print('analyze sr start')
res_sr = analyze2(g, g_sr)
print('done!')
res = {'sr': res_sr, 'pr': res_pr}
return res
def main(): def main():
# pool = multiprocessing.Pool(processes=4) # pool = multiprocessing.Pool(processes=4)
params = [] params = []
@ -150,6 +207,7 @@ def main():
with multiprocessing.Pool(processes=8) as pool: with multiprocessing.Pool(processes=8) as pool:
l_path_data = pool.map(wohoo, params) l_path_data = pool.map(wohoo, params)
for path_data in l_path_data: for path_data in l_path_data:
print(f'{path_data=}')
for path, data in path_data.items(): for path, data in path_data.items():
with reduce_edges.open_mkdir(path, 'w') as f: with reduce_edges.open_mkdir(path, 'w') as f:
json.dump(data, f) json.dump(data, f)
@ -171,8 +229,8 @@ def wohoo(p):
# with open() as f: # with open() as f:
# json.dump(result, f) # json.dump(result, f)
when = datetime.fromtimestamp(float(file.split('/')[-1][:-4])) when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
path = f'./data_reduced/{reduced_percentage:.02f}/{when.timestamp()}.json' path = f'./data_reduced2/{reduced_percentage:.02f}/{when.timestamp()}.json'
result = rank(file) result = rank2(file)
return {path: result} return {path: result}

View File

@ -4,6 +4,7 @@ let
matplotlib matplotlib
numpy numpy
networkx networkx
scipy
]; ];
py-package = pkgs.python3.withPackages py-packages; py-package = pkgs.python3.withPackages py-packages;
in in

View File

@ -445,7 +445,7 @@ It also allows us to get rid of the state in our strategy since we don't have to
%}}} load balancing %}}} load balancing
%{{{ frequency reduction %{{{ frequency reduction
\subsection{Reduction of Request Frequency} \subsection{Reduction of Request Frequency}\label{sec:stratRedReqFreq}
The GameOver Zeus botnet limited the number of requests a peer was allowed to perform and blacklisted peers, that exceeded the limit, as an anti-monitoring mechanism~\cite{bib:andriesse_goz_2013}. The GameOver Zeus botnet limited the number of requests a peer was allowed to perform and blacklisted peers, that exceeded the limit, as an anti-monitoring mechanism~\cite{bib:andriesse_goz_2013}.
In an uncoordinated crawler approach, the crawl frequency has to be limited to prevent hitting the request limit. In an uncoordinated crawler approach, the crawl frequency has to be limited to prevent hitting the request limit.
@ -584,7 +584,11 @@ Based on this, \emph{SensorRank} is defined as
Since crawlers never respond to peer list requests, they will always be detectable by the described approach but sensors might benefit from the following technique. Since crawlers never respond to peer list requests, they will always be detectable by the described approach but sensors might benefit from the following technique.
By responding to peer list requests with plausible data and thereby producing valid outgoing edges from the sensors, we will try to make those metrics less suspicious. The PageRank and SensorRank metric are calculated over the sum of the ranks of a node's predecessors.
We will investigate, how limiting the number of predecessors helps producing inconspicuous ranks for a sensor.
% By responding to peer list requests with plausible data and thereby producing valid outgoing edges from the sensors, we will try to make those metrics less suspicious.
To counter the SensorBuster metric, outgoing edges to valid peers from the botnet are required so the sensor does not build a \ac{wcc}.
The challenge here is deciding which peers can be returned without actually supporting the network. The challenge here is deciding which peers can be returned without actually supporting the network.
The following candidates to place on the neighbor list will be investigated: The following candidates to place on the neighbor list will be investigated:
@ -606,10 +610,6 @@ The following candidates to place on the neighbor list will be investigated:
Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_{\abs{C}}\) among the workers, creates valid outgoing edges. Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_{\abs{C}}\) among the workers, creates valid outgoing edges.
The resulting graph will still form a \ac{wcc} with now edges back into the main network. The resulting graph will still form a \ac{wcc} with now edges back into the main network.
PageRank is the sum of a node's predecessors ranks divided by the amount of successors each predecessor's successors.
Predecessors with many successors should therefore reduce the rank.
By their nature, crawlers have many successors, so they are good candidates to reduce the PageRank of a sensor.
\todo{crawlers as predecessors}
%{{{ churned peers %{{{ churned peers
\subsubsection{Churned Peers After IP Rotation} \subsubsection{Churned Peers After IP Rotation}
@ -630,6 +630,20 @@ Those peers can be used as fake neighbors and create valid-looking outgoing edge
%}}} cg nat %}}} cg nat
\clearpage{}
\todo{clearpage?}
In theory, it would be possible to detect churned peers or peers behind carrier-grade \acs{nat}, without coordinating the sensors but the coordination gives us a few advantages:
\begin{itemize}
\item A peer might blacklist a sensor which looks exactly the same as a churned peer from the point of view of an uncoordinated sensor.
The coordination backend has more knowledge and can detect this, if another sensor is still contacted by the peer in question.
\item The coordination backend can include different streams of information to decide which peers to place in the sensor's neighborhood.
Knowledge about geolocations, \ac{as} and their IP rotation behavior can be consulted to make better informed choices for neighborhood candidates.
\end{itemize}
%}}} against graph metrics %}}} against graph metrics
%}}} strategies %}}} strategies
@ -650,7 +664,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
%{{{ fig:ipPartC02 %{{{ fig:ipPartC02
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.7\linewidth]{ip_part_c02.png} \includegraphics[width=1\linewidth]{ip_part_c02.png}
\caption{IP based partitioning for 2 crawlers}\label{fig:ipPartC02} \caption{IP based partitioning for 2 crawlers}\label{fig:ipPartC02}
\begin{align*} \begin{align*}
n &= 2 \\ n &= 2 \\
@ -668,7 +682,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
%{{{ fig:ipPartC04 %{{{ fig:ipPartC04
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.7\linewidth]{ip_part_c04.png} \includegraphics[width=1\linewidth]{ip_part_c04.png}
\caption{IP based partitioning for 4 crawlers}\label{fig:ipPartC04} \caption{IP based partitioning for 4 crawlers}\label{fig:ipPartC04}
\begin{align*} \begin{align*}
n &= 4 \\ n &= 4 \\
@ -687,7 +701,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
%{{{ fig:ipPartC06 %{{{ fig:ipPartC06
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.7\linewidth]{ip_part_c06.png} \includegraphics[width=1\linewidth]{ip_part_c06.png}
\caption{IP based partitioning for 6 crawlers}\label{fig:ipPartC06} \caption{IP based partitioning for 6 crawlers}\label{fig:ipPartC06}
\begin{align*} \begin{align*}
n &= 6 \\ n &= 6 \\
@ -706,7 +720,7 @@ We will compare the variance \(\sigma^2\) and standard derivation \(\sigma\) to
%{{{ fig:ipPartC10 %{{{ fig:ipPartC10
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.7\linewidth]{ip_part_c10.png} \includegraphics[width=1\linewidth]{ip_part_c10.png}
\caption{IP based partitioning for 10 crawlers}\label{fig:ipPartC10} \caption{IP based partitioning for 10 crawlers}\label{fig:ipPartC10}
\begin{align*} \begin{align*}
n &= 10 \\ n &= 10 \\
@ -742,7 +756,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
%{{{ fig:randIpPartC02 %{{{ fig:randIpPartC02
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.8\linewidth]{rand_ip_part_c02.png} \includegraphics[width=1\linewidth]{rand_ip_part_c02.png}
\caption{IP based partitioning for 2 crawlers on generated dataset}\label{fig:randIpPartC02} \caption{IP based partitioning for 2 crawlers on generated dataset}\label{fig:randIpPartC02}
\begin{align*} \begin{align*}
n &= 2 \\ n &= 2 \\
@ -761,7 +775,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
%{{{ fig:randIpPartC04 %{{{ fig:randIpPartC04
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.8\linewidth]{rand_ip_part_c04.png} \includegraphics[width=1\linewidth]{rand_ip_part_c04.png}
\caption{IP based partitioning for 4 crawlers on generated dataset}\label{fig:randIpPartC04} \caption{IP based partitioning for 4 crawlers on generated dataset}\label{fig:randIpPartC04}
\begin{align*} \begin{align*}
n &= 4 \\ n &= 4 \\
@ -780,7 +794,7 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
%{{{ fig:randIpPartC06 %{{{ fig:randIpPartC06
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\includegraphics[width=.8\linewidth]{rand_ip_part_c06.png} \includegraphics[width=1\linewidth]{rand_ip_part_c06.png}
\caption{IP based partitioning for 6 crawlers on generated dataset}\label{fig:randIpPartC06} \caption{IP based partitioning for 6 crawlers on generated dataset}\label{fig:randIpPartC06}
\begin{align*} \begin{align*}
n &= 6 \\ n &= 6 \\
@ -831,10 +845,74 @@ Therefore, we simulate the partitioning on a bigger sample of \num{1000000} rand
As expected, the work is still not perfectly distributed among the crawlers but evenly enough for our use case. As expected, the work is still not perfectly distributed among the crawlers but evenly enough for our use case.
The derivation for larger botnets is within \SI{0.5}{\percent} of the even distribution. The derivation for larger botnets is within \SI{0.5}{\percent} of the even distribution.
This is good enough for balancing the work among workers. This is good enough for balancing the tasks among workers.
%}}} eval load balancing %}}} eval load balancing
%{{{ eval redu requ freq
\subsection{Reduction of Request Frequency}
To evaluate the request frequency optimization described in \Fref{sec:stratRedReqFreq}, crawl a simulated peer and check if the requests are evenly distributed and how big the deviation from the theoretically optimal result is.
To get more realistic results, the crawlers and simulated peer are running on different machines so they are not within the same LAN.
We use the same parameters as in the example above:
\begin{align*}
n &= 4 \\
l &= \SI{6}{\request\per\minute} \\
f &= \SI{24}{\request\per\minute} \\
o &= \SI{2.5}{\second}
\end{align*}
To recap, this is what the optimal timeline would look like:
\begin{center}
\begin{chronology}[10]{0}{60}{0.9\textwidth}
\event{0}{\(C_0\)}
\event{10}{\(C_0\)}
\event{20}{\(C_0\)}
\event{30}{\(C_0\)}
\event{40}{\(C_0\)}
\event{50}{\(C_0\)}
\event{60}{\(C_0\)}
\event{2.5}{\(C_1\)}
\event{12.5}{\(C_1\)}
\event{22.5}{\(C_1\)}
\event{32.5}{\(C_1\)}
\event{42.5}{\(C_1\)}
\event{52.5}{\(C_1\)}
\event{5}{\(C_2\)}
\event{15}{\(C_2\)}
\event{25}{\(C_2\)}
\event{35}{\(C_2\)}
\event{45}{\(C_2\)}
\event{55}{\(C_2\)}
\event{7.5}{\(C_3\)}
\event{17.5}{\(C_3\)}
\event{27.5}{\(C_3\)}
\event{37.5}{\(C_3\)}
\event{47.5}{\(C_3\)}
\event{57.5}{\(C_3\)}
\end{chronology}
\end{center}
The ideal distribution would be \SI{2.5}{\second} between each two events.
Due to network latency and load from crawling other peers, we expect the actual result to deviate from the optimal value over time.
With this experiment we try to estimate the impact of the latency.
If it is existent and measurable the crawlers have to be rescheduled periodically to keep the deviation at an acceptable level.
\begin{figure}[H]
\centering
\includegraphics[width=1\linewidth]{time_deriv.png}
\caption{Derivation from the expected interval}\label{fig:timeDeriv}
\end{figure}
%}}} eval redu requ freq
%{{{ eval creating edges %{{{ eval creating edges
\subsection{Impact of Additional Edges on Graph Metrics} \subsection{Impact of Additional Edges on Graph Metrics}
@ -908,8 +986,6 @@ SensorBuster relies on the assumption that sensors don't have any outgoing edges
For the \ac{wcc} metric, it is obvious that even a single edge back into the main network is enough to connect the sensor back to the main graph and therefore beat this metric. For the \ac{wcc} metric, it is obvious that even a single edge back into the main network is enough to connect the sensor back to the main graph and therefore beat this metric.
\todo{formulieren}
\subsubsection{Effectiveness against Page- and SensorRank} \subsubsection{Effectiveness against Page- and SensorRank}
In this section we will evaluate how adding outgoing edges to a sensor impacts it's PageRank and SensorRank values. In this section we will evaluate how adding outgoing edges to a sensor impacts it's PageRank and SensorRank values.
@ -1014,59 +1090,158 @@ Looking at the data in smaller buckets of one hour each, the average number of s
\end{figure} \end{figure}
%}}}fig:avg_out_edges %}}}fig:avg_out_edges
Experiments were performed, in which the incoming edges for the known sensor are reduced by increasing factors, to see, when the sensor's rank reaches the overall average.
% \begin{figure}[H]
% \centering
% \includegraphics[width=1\textwidth]{reduced_ranks/0/in_out.png}
% % \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
% \end{figure}%
\begin{figure}[H]
\centering
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/0/pr.png}
\caption{PageRank after removing \SI{0}{\percent} of edges}\label{fig:pr0}
\end{subfigure}%
\hfill
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/0/sr.png}
\caption{SensorRank after removing \SI{0}{\percent} of edges}\label{fig:sr0}
\end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure}
\Fref{fig:pr0} and \Fref{fig:sr0} show the situation on the base truth without modifications.
\begin{figure}[H]
\centering
\includegraphics[width=.8\textwidth]{reduced_ranks/1/in_out.png}
\caption{Incoming edges after removing \SI{10}{\percent} of edges}\label{fig:in1}
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
\end{figure}%
\begin{figure}[H]
\centering
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/1/pr.png}
\caption{PageRank after removing \SI{10}{\percent} of edges}\label{fig:pr1}
\end{subfigure}%
\hfill
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/1/sr.png}
\caption{SensorRank after removing \SI{10}{\percent} of edges}\label{fig:sr1}
\end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=.8\textwidth]{reduced_ranks/2/in_out.png}
\caption{Incoming edges after removing \SI{20}{\percent} of edges}\label{fig:in2}
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
\end{figure}%
\begin{figure}[H]
\centering
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/2/pr.png}
\caption{PageRank after removing \SI{20}{\percent} of edges}\label{fig:pr2}
\end{subfigure}%
\hfill
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/2/sr.png}
\caption{SensorRank after removing \SI{20}{\percent} of edges}\label{fig:sr2}
\end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=.8\textwidth]{reduced_ranks/3/in_out.png}
\caption{Incoming edges after removing \SI{30}{\percent} of edges}\label{fig:in3}
% \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
\end{figure}%
\begin{figure}[H]
\centering
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/3/pr.png}
\caption{PageRank after removing \SI{30}{\percent} of edges}\label{fig:pr3}
\end{subfigure}%
\hfill
\begin{subfigure}[b]{1\textwidth}
\centering
\includegraphics[width=.8\linewidth]{reduced_ranks/3/sr.png}
\caption{SensorRank after removing \SI{30}{\percent} of edges}\label{fig:sr3}
\end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure}
We can see in \Fref{fig:sr2} and \Fref{fig:pr3}, that we have to reduce the incoming edges by \SI{20}{\percent} and \SI{30}{\percent} respectively to get average values for SensorRank and PageRank.
This also means, that the amount of incoming edges for a sensor must be about the same as the average about of incoming edges as can be seen in \Fref{fig:in3}.
Depending on the protocol details of the botnet (\eg{} how many incoming edges are allowed per peer), this means that a large amount of sensors is needed, if we want to monitor the whole network.
% Experiments were performed, in which a percentage of random outgoing edges were added to the known sensor, based on the amount of incoming edges: % Experiments were performed, in which a percentage of random outgoing edges were added to the known sensor, based on the amount of incoming edges:
We evaluate the impact of outgoing edges by picking a percentage of random nodes in each bucket and creating edges from the sensor to each of the sampled peers, thereby evening the ratio between \(\deg^{+}\) and \(\deg^{-}\). % We evaluate the impact of outgoing edges by picking a percentage of random nodes in each bucket and creating edges from the sensor to each of the sampled peers, thereby evening the ratio between \(\deg^{+}\) and \(\deg^{-}\).
\begin{figure}[H] % \begin{figure}[H]
\centering % \centering
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/pr_75.png} % \includegraphics[width=1\linewidth]{ranks/pr_75.png}
\caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75} % \caption{PageRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr75}
\end{subfigure}% % \end{subfigure}%
\hfill % \hfill
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/sr_75.png} % \includegraphics[width=1\linewidth]{ranks/sr_75.png}
\caption{SensorRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr75} % \caption{SensorRank after adding \(0.75 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr75}
\end{subfigure}% % \end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75} % % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure} % \end{figure}
\begin{figure}[H] % \begin{figure}[H]
\centering % \centering
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/pr_100.png} % \includegraphics[width=1\linewidth]{ranks/pr_100.png}
\caption{PageRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr100} % \caption{PageRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr100}
\end{subfigure}% % \end{subfigure}%
\hfill % \hfill
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/sr_100.png} % \includegraphics[width=1\linewidth]{ranks/sr_100.png}
\caption{SensorRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr100} % \caption{SensorRank after adding \(1.0 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr100}
\end{subfigure}% % \end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75} % % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure} % \end{figure}
\begin{figure}[H] % \begin{figure}[H]
\centering % \centering
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/pr_150.png} % \includegraphics[width=1\linewidth]{ranks/pr_150.png}
\caption{PageRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr150} % \caption{PageRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:pr150}
\end{subfigure}% % \end{subfigure}%
\hfill % \hfill
\begin{subfigure}[b]{.75\textwidth} % \begin{subfigure}[b]{.75\textwidth}
\centering % \centering
\includegraphics[width=1\linewidth]{ranks/sr_150.png} % \includegraphics[width=1\linewidth]{ranks/sr_150.png}
\caption{SensorRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr150} % \caption{SensorRank after adding \(1.5 \times \abs{\text{pred}(v)}\) edges}\label{fig:sr150}
\end{subfigure}% % \end{subfigure}%
% \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75} % % \caption{SensorRank distribution with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:dist_sr_75}
\end{figure} % \end{figure}
These results show, that simply adding new edges is not enough and we need to limit the incoming edges to improve the Page- and SensorRank metrics. % These results show, that simply adding new edges is not enough and we need to limit the incoming edges to improve the Page- and SensorRank metrics.
%}}} eval creating edges %}}} eval creating edges
@ -1138,6 +1313,11 @@ Collaborative monitoring of \ac{p2p} botnets allows circumventing some anti-moni
It also enables more effective monitoring systems for larger botnets, since each peer can be visited by only one crawler. It also enables more effective monitoring systems for larger botnets, since each peer can be visited by only one crawler.
The current concept of independent crawlers in \ac{bms} can also use multiple workers but there is no way to ensure a peer is not watched by multiple crawlers thereby using unnecessary resources. The current concept of independent crawlers in \ac{bms} can also use multiple workers but there is no way to ensure a peer is not watched by multiple crawlers thereby using unnecessary resources.
We were able to show, that a collaborative monitoring approach for \ac{p2p} botnets helps to circumvent anti-monitoring and monitoring detection mechanisms and is helpful to improve resource usage when monitoring large botnets.
On the other hand, graph ranking algorithms have been proven to be hard to bypass without requiring large amounts of sensor nodes.
Luckily most of the anti-monitoring and monitoring detection techniques discussed in this work are of academic nature and have not yet been deployed in real-world botnets.
Further investigation and improvements in \ac{p2p} botnet monitoring are required to prevent a situation were a botmaster implements the currently theoretical concepts and renders monitoring as it is currently done, ineffective.
%}}} conclusion %}}} conclusion

Binary file not shown.