diff --git a/acronyms.tex b/acronyms.tex index b15ed756..d7bad8b0 100644 --- a/acronyms.tex +++ b/acronyms.tex @@ -63,4 +63,9 @@ long = {Botnet Simulation Framework} } +\DeclareAcronym{wcc}{ + short = {WCC}, + long = {weakly connected component} +} + % vim: set filetype=tex ts=2 sw=2 tw=0 et : diff --git a/assets/dot/sensorbuster1.dot b/assets/dot/sensorbuster1.dot new file mode 100644 index 00000000..8b34d1c7 --- /dev/null +++ b/assets/dot/sensorbuster1.dot @@ -0,0 +1,52 @@ +digraph G { + /* splines = false; */ + node [ shape = "circle" ]; + /* c0 */ + /* c1 */ + /* n0 */ + /* n1 */ + /* n2 */ + /* n3 */ + + /* n0 -> n1; */ + /* n0 -> n2; */ + /* n0 -> n3; */ + /* n1 -> n2; */ + /* n1 -> n3; */ + /* n2 -> n3; */ + + /* /1* c0 -> c1; *1/ */ + /* /1* c1 -> c0; *1/ */ + /* n0 -> c0; */ + /* n1 -> c1; */ + /* n1 -> c0; */ + /* n2 -> c0; */ + /* n3 -> c0; */ + /* n3 -> c1; */ + + c0 + c1 + c2 + n0 + n1 + n2 + + n0 -> n1; + n0 -> n2; + n1 -> n2; + + /* c0 -> c1; */ + /* c0 -> c2; */ + /* c1 -> c0; */ + /* c1 -> c2; */ + /* c2 -> c0; */ + /* c2 -> c1; */ + + + n0 -> c0; + n0 -> c2; + n1 -> c1; + n1 -> c0; + n2 -> c0; + n2 -> c2; +} diff --git a/assets/dot/sensorbuster2.dot b/assets/dot/sensorbuster2.dot new file mode 100644 index 00000000..e526cf09 --- /dev/null +++ b/assets/dot/sensorbuster2.dot @@ -0,0 +1,29 @@ +digraph G { + /* splines = false; */ + node [ shape = "circle" ]; + c0 + c1 + c2 + n0 + n1 + n2 + + n0 -> n1; + n0 -> n2; + n1 -> n2; + + c0 -> c1; + c0 -> c2; + c1 -> c0; + c1 -> c2; + c2 -> c0; + c2 -> c1; + + + n0 -> c0; + n0 -> c2; + n1 -> c1; + n1 -> c0; + n2 -> c0; + n2 -> c2; +} diff --git a/bibliography.bib b/bibliography.bib index 83eb619b..e147acff 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -172,4 +172,21 @@ abstract = {The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describ es PageRank, a method for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.} } +@inproceedings{andriesse_goz_2013, + title = {Highly Resilient Peer-to-Peer Botnets Are Here: {{An}} Analysis of {{Gameover Zeus}}}, + shorttitle = {Highly Resilient Peer-to-Peer Botnets Are Here}, + booktitle = {2013 8th {{International Conference}} on {{Malicious}} and {{Unwanted Software}}: "{{The Americas}}" ({{MALWARE}})}, + author = {Andriesse, Dennis and Rossow, Christian and Stone-Gross, Brett and Plohmann, Daniel and Bos, Herbert}, + date = {2013-10}, + pages = {116--123}, + publisher = {{IEEE}}, + location = {{Fajardo, PR, USA}}, + doi = {10.1109/MALWARE.2013.6703693}, + url = {https://ieeexplore.ieee.org/document/6703693/}, + urldate = {2022-02-27}, + eventtitle = {2013 8th {{International Conference}} on {{Malicious}} and {{Unwanted Software}}: "{{The Americas}}" ({{MALWARE}})}, + isbn = {978-1-4799-2534-6 978-1-4799-2535-3}, + file = {/home/me/Zotero/storage/R3AAQR9Q/Andriesse et al. - 2013 - Highly resilient peer-to-peer botnets are here An.pdf} +} + /* vim: set filetype=bib ts=2 sw=2 tw=0 et :*/ diff --git a/content.tex b/content.tex index 3c27ef27..ed96dd1d 100644 --- a/content.tex +++ b/content.tex @@ -153,7 +153,7 @@ The implementation of the concepts of this work will be done as part of \ac{bms} % TODO: reference for page rank In an earlier project, I implemented different node ranking algorithms (among others \enquote{PageRank}~\cite{page_pagerank_1998}) to detect sensors and crawlers in a botnet, as described in \citetitle{karuppayah_sensorbuster_2017}. Both ranking algorithms use the \(\deg^+\) and \(\deg^-\) to weight the nodes. -Another way to enumerate candidates for sensors in a \ac{p2p} botnet is to find weakly connected components in the graph. +Another way to enumerate candidates for sensors in a \ac{p2p} botnet is to find \acp{wcc} in the graph. Sensors will have few to none outgoing edges, since they don't participate actively in the botnet. The goal of this work is to complicate detection mechanisms like this for botmasters, by centralizing the coordination of the system's crawlers and sensors, thereby reducing the node's rank for specific graph metrics. @@ -173,7 +173,7 @@ Further work might even consider autoscaling the monitoring activity using some To validate the result, the old sensor implementation will be compared to the new system using different graph metrics. % TODO: maybe? -If time allows, \ac{bsf}\footnotemark{} will be used to simulate a botnet place sensors in the simulated network and measure the improvement archived by the coordinated monitoring effort. +If time allows, \ac{bsf}\footnotemark{} will be used to simulate a botnet place sensors in the simulated network and measure the improvement achieved by the coordinated monitoring effort. \footnotetext{\url{https://github.com/tklab-tud/BSF}} % TODO: which botnet? @@ -194,23 +194,41 @@ The coordination protocol must allow the following operations: \begin{itemize} % TODO: bestehende session Mechanik verwenden/erweitern - \item \mintinline{go}{registerSensor(capabilities)}: Register new sensor with capabilities (which botnet, available bandwidth, \ldots) + \item \mintinline{go}{registerSensor(capabilities)}: Register new sensor with capabilities (which botnet, available bandwidth, \ldots). This is called periodically and used to determine which crawler is still active, when splitting the workload. - % TODO: failedTries im backend statt eigenem nachrichtentyp + % TODO: failedTries im backend statt eigenem nachrichtentyp: remove? \item \mintinline{go}{unreachable(targets)}: + \item \mintinline{go}{requestTasks() []PeerTask}: Receive a batch of crawl tasks from the coordinator. The tasks consist of the target peer, if the crawler should start or stop the operation, when it should start and stop monitoring and the frequency. + \end{itemize} +\begin{minted}{go} +type Peer struct { + BotID string + IP string + Port uint16 +} +type PeerTask struct { + Peer Peer + StartAt *Time + StopAt *Time + Frequency uint + StopCrawling bool +} +\end{minted} + + %}}} sensor to backend %{{{ backend to sensor +% TODO: remove? \subsubsection{Backend to Sensor} \begin{itemize} - \item \mintinline{go}{startCrawling(targets)}: Start crawling a batch of nodes for a specified time or until stopped, with \mintinline{go}{targets} being a list of targets and each target consists of a botnet identifier, IP address, port, bot identifier, how long and how often this bot should be monitored - \item \mintinline{go}{stopCrawling(targets)}: Stop crawling a batch of nodes + % \item \mintinline{go}{stopCrawling(targets)}: Stop crawling a batch of nodes \end{itemize} @@ -220,5 +238,138 @@ The coordination protocol must allow the following operations: %}}} methodology +%{{{ strategies +\section{Coordination Strategies} + +%{{{ frequency reduction +\subsection{Reduction of Request Frequency} + +The GameOver Zeus botnet deployed a blacklisting mechanism, where crawlers are blocked based in their request frequency~\cite{andriesse_goz_2013}. +In a single crawler approach, the crawler frequency has to be limited to prevent being hitting the request limit. +Using collaborative crawlers, an arbitrarily fast frequency can be achieved without being blacklisted. +With \(L \in \mathbb{N}\) being the frequency limit at which a crawler will be blacklisted, \(F \in \mathbb{N}\) being the crawl frequency that should be achieved. +The amount of crawlers \(C\) required to achieve the frequency \(F\) without being blacklisted and the offset \(O\) between crawlers are defined as + +\begin{align*} + C &= \left\lceil \frac{F}{L} \right\rceil \\ + O &= \frac{1 \si{\request}}{F} +\end{align*} + +Taking advantage of the \mintinline{go}{StartAt} field from the \mintinline{go}{PeerTask} returned by the \mintinline{go}{requestTasks} primitive above, the crawlers can be scheduled offset by \(O\) at a frequency \(L\) to ensure, the overall requests to each peer are evenly distributed over time. + +Given a limit \(L = 5 \si{\request\per 100\second}\), crawling a botnet at \(F = 20 \si{\request\per 100\second}\) requires \(C = \left\lceil \frac{20\si{\request\per 100\second}}{5\si{\request\per 100\second}} \right\rceil = 4\) crawlers. +Those crawlers must be scheduled \(O = \frac{1\si{\request}}{20\si{\request\per 100\second}} = 5 \si{\second}\) apart at a frequency of \(L\) for an even request distribution. + + +%{{{ fig:crawler_timeline +\begin{figure}[h] +\centering +\begin{chronology}[10]{0}{100}{0.9\textwidth} + \event{0}{\(C_0\)} + \event{20}{\(C_0\)} + \event{40}{\(C_0\)} + \event{60}{\(C_0\)} + \event{80}{\(C_0\)} + \event{100}{\(C_0\)} + + \event{5}{\(C_1\)} + \event{25}{\(C_1\)} + \event{45}{\(C_1\)} + \event{65}{\(C_1\)} + \event{85}{\(C_1\)} + + \event{10}{\(C_2\)} + \event{30}{\(C_2\)} + \event{50}{\(C_2\)} + \event{70}{\(C_2\)} + \event{90}{\(C_2\)} + + \event{15}{\(C_3\)} + \event{35}{\(C_3\)} + \event{55}{\(C_3\)} + \event{75}{\(C_3\)} + \event{95}{\(C_3\)} +\end{chronology} +\caption{Timeline of crawler events as seen from a peer}\label{fig:crawler_timeline} +\end{figure} +%}}} fig:crawler_timeline + +As can be seen in~\autoref{fig:crawler_timeline}, each crawler \(C_0\) to \(C_3\) performs only 5 \si{\request\per 100\second} while overall achieving \(20 \si{\request\per 100\second}\). + +Vice versa given an amount of crawlers \(C\) and a request limit \(L\), the effective frequency \(F\) can be maximized to \(F = C \times L\) without hitting the limit \(L\) and being blocked. + +Using the example from above with \(L = 5 \si{\request\per 100\second}\) but now only two crawlers \(C = 2\), it is still possible to achieve an effective frequency of \(F = 2 \times 5 \si{\request\per 100\second} = 10 \si{\request\per 100\second}\) and \(O = \frac{1 \si{\request}}{10 \si{\request\per 100\second}} = 10 \si{s}\): + +%TODO: name +%{{{ fig:crawler_timeline +\begin{figure}[h] +\centering +\begin{chronology}[10]{0}{100}{0.9\textwidth} + \event{0}{\(C_0\)} + \event{20}{\(C_0\)} + \event{40}{\(C_0\)} + \event{60}{\(C_0\)} + \event{80}{\(C_0\)} + \event{100}{\(C_0\)} + + \event{10}{\(C_1\)} + \event{30}{\(C_1\)} + \event{50}{\(C_1\)} + \event{70}{\(C_1\)} + \event{90}{\(C_1\)} + +\end{chronology} +% \caption{Timeline of crawler events as seen from a peer}\label{fig:crawler_timeline} +\end{figure} +%}}} fig:crawler_timeline + +While the effective frequency of the whole system is halved compared to~\autoref{fig:crawler_timeline}, it is still possible to double the frequency over the limit. + +%}}} frequency reduction + +%{{{ against graph metrics +% TODO: sinnvoll? +\subsection{Working Against Suspicious Graph Metrics} + +\citetitle*{karuppayah_sensorbuster_2017} describes different graph metrics to find sensors in \ac{p2p} botnets. +One of those, \enquote{SensorBuster} uses \acp{wcc} since crawlers don't have any edges back to the main network in the graph. +It would be possible to implement the crawlers so they return other crawlers in their peer list responses but this would still produce a disconnected component and as long as this component is smaller than the main network, it is still easily detectable since there is no path from the crawler component back to the main network. + +% TODO: caption, label +\begin{figure}[h] +\centering +\begin{subfigure}[b]{.5\textwidth} + \centering + \includegraphics[width=1\linewidth]{sensorbuster1.pdf} + \caption{\acp{wcc} for independent crawlers}\label{fig:sensorbuster1} +\end{subfigure}% +\begin{subfigure}[b]{.5\textwidth} + \centering + \includegraphics[width=1\linewidth]{sensorbuster2.pdf} + \caption{\acp{wcc} for collaborated crawlers}\label{fig:sensorbuster2} +\end{subfigure}% +\caption{Differences in graph metrics}\label{fig:sensorbuster} +\end{figure} + +% TODO: pagerank, sensorrank calculations +\begin{figure} + \centering +\begin{tabular}{|l|l|l|l|l|} + \hline + Node & \(\deg_a^{+}\) & \(\deg_a^{-}\) & \(\deg_b^+\) & \(\deg_b^-\) \\ + \hline\hline + n0 & 0 & 4 & 0 & 4 \\ + n1 & 1 & 3 & 1 & 3 \\ + n2 & 2 & 2 & 2 & 2 \\ + c0 & 3 & 0 & 5 & 2 \\ + c1 & 1 & 0 & 3 & 2 \\ + c2 & 2 & 0 & 4 & 2 \\ + \hline +\end{tabular} +\end{figure} + +%}}} against graph metrics + +%}}} strategies % vim: set filetype=tex ts=2 sw=2 tw=0 et foldmethod=marker spell : diff --git a/references/2013-Highly_resilient_peer-to-peer_botnets_are_here_An_analysis_of_Gameover_Zeus.pdf b/references/2013-Highly_resilient_peer-to-peer_botnets_are_here_An_analysis_of_Gameover_Zeus.pdf new file mode 100644 index 00000000..ae3481f5 Binary files /dev/null and b/references/2013-Highly_resilient_peer-to-peer_botnets_are_here_An_analysis_of_Gameover_Zeus.pdf differ diff --git a/report.pdf b/report.pdf index 69978a09..d691ee7d 100644 Binary files a/report.pdf and b/report.pdf differ diff --git a/report.tex b/report.tex index 89952529..c0b6b634 100644 --- a/report.tex +++ b/report.tex @@ -23,11 +23,15 @@ headsepline, ]{OTHRartcl} % document language and hyphenation -\usepackage[main=english,german]{babel} +\usepackage[main=english,english,ngerman]{babel} % math stuff \usepackage{amsmath} +\usepackage{amsfonts} \usepackage{mathtools} +% timelines +\usepackage{chronology} + % code listings \usepackage{minted} @@ -36,7 +40,12 @@ headsepline, \usepackage[T1]{fontenc} % language specific quotes and general recommendations for biblatex -\usepackage{lmodern,csquotes,xpatch} +\usepackage{lmodern,xpatch} +\usepackage[autostyle]{csquotes} + +% units +\usepackage{siunitx} +\DeclareSIUnit \request{req} % images \usepackage{graphicx} diff --git a/shell.nix b/shell.nix index 376a64ec..87390fb2 100644 --- a/shell.nix +++ b/shell.nix @@ -13,9 +13,11 @@ let amsmath anyfontsize biblatex + chronology csquotes dejavu latexmk + siunitx # code listings minted