diff --git a/appendix.tex b/appendix.tex index 699d8f7d..fe12b31c 100644 --- a/appendix.tex +++ b/appendix.tex @@ -1,22 +1,24 @@ \appendix % TODO: add to table of contents? -\printbibliography{} +\printbibliography[heading=bibintoc]{} \clearpage % TODO: add to table of contents? +\addcontentsline{toc}{section}{List of Figures} \listoffigures \clearpage % TODO: add to table of contents? +\addcontentsline{toc}{section}{List of Tables} \listoftables \clearpage % TODO: add to table of contents? -\printacronyms{} +\printacronyms[name=List of Acronyms,pages={display=all}]{} \clearpage diff --git a/assets/avg_out_edges.png b/assets/avg_out_edges.png new file mode 100644 index 00000000..602904e2 Binary files /dev/null and b/assets/avg_out_edges.png differ diff --git a/bibliography.bib b/bibliography.bib index 1b70074c..6213d583 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -53,6 +53,17 @@ archivedate = {2021-10-25} } +@online{bib:fbi_takedown_2014, + title = {Taking Down Botnets}, + organization = {Federal Bureau of Investigation}, + author = {Joseph Demarest}, + date = {2014-07-15}, + url = {https://www.fbi.gov/news/testimony/taking-down-botnets}, + urldate = {2022-03-23}, + archiveurl = {https://web.archive.org/web/20220318082034/https://www.fbi.gov/news/testimony/taking-down-botnets}, + archiveurldate = {2022-03-18}, +} + @online{bib:statista_broadband_2021, title = {Availability of broadband internet to households in Germany from 2017 to 2020, by bandwidth class}, organization = {Statista Inc.}, diff --git a/content.tex b/content.tex index 3606b6bd..0d9642ac 100644 --- a/content.tex +++ b/content.tex @@ -22,7 +22,7 @@ In recent years, \ac{iot} botnets have been responsible for some of the biggest A botnet is a network of infected computers with some means of communication to control the infected systems. Classic botnets use one or more central coordinating hosts called \ac{c2} servers. These \ac{c2} servers could use any protocol from \ac{irc} over \ac{http} to Twitter~\cite{bib:pantic_covert_2015} as communication channel with the infected hosts. -Abusive use of infected systems includes several things\todo{things = bad}, \eg{}, \ac{ddos} attacks, banking fraud, as proxies to hide the attacker's identity, send spam emails\dots{} +Abusive use of infected systems includes several things\todo{things = bad}---\ac{ddos} attacks, banking fraud, as proxies to hide the attacker's identity, send spam emails\dots{} Analyzing and shutting down a centralized botnet is comparatively easy since every bot knows the IP address, domain name, Twitter handle or \ac{irc} channel the \ac{c2} servers are using. @@ -48,13 +48,15 @@ To complicate take-down attempts, botnet operators came up with a number of idea \todo{better image for p2p, really needed?} %}}}fig:c2vsp2p -A number of botnet operations were shut down like this~\cite{bib:nadji_beheading_2013} and as the defenders upped their game, so did attackers\todo{too informal?}---the idea of \ac{p2p} botnets came up. +A number of botnet operations were shut down like this~\cite{bib:nadji_beheading_2013} and as the defenders upped their game, so did attackers\todo{too informal?}---the concept of \ac{p2p} botnets came up. The idea is to build a decentralized network without \acp{spof} where the \ac{c2} servers are as shown in \autoref{fig:p2p}. In a \ac{p2p} botnet, each node in the network knows a number of its neighbors and connects to those, each of these neighbors has a list of neighbors on his own, and so on. +Any of the nodes in \autoref{fig:p2p} could be the bot master but they don't even have to be online all the time since the peers will stay connected autonomously. +The bot master only need to join the network to send new commands or receive stolen data. This lack of a \ac{spof} makes \ac{p2p} botnets more resilient to take-down attempts since the communication is not stopped and botmasters can easily rejoin the network and send commands. -The constantly growing damage produced by botnets has many researchers and law enforcement agencies trying to shut down these operations~\cite{bib:nadji_beheading_2013, bib:nadji_still_2017, bib:dittrich_takeover_2012}. +The constantly growing damage produced by botnets has many researchers and law enforcement agencies trying to shut down these operations~\cite{bib:nadji_beheading_2013, bib:nadji_still_2017, bib:dittrich_takeover_2012, bib:fbi_takedown_2014}. The monetary value of these botnets directly correlates with the amount of effort, botmasters are willing to put into implementing defense mechanisms against take-down attempts. Some of these countermeasures include deterrence, which limits the number of allowed bots per IP address or subnet to 1; blacklisting, where known crawlers and sensors are blocked from communicating with other bots in the network (mostly IP based); disinformation, when fake bots are placed in the neighborhood lists, which invalidates the data collected by crawlers; and active retaliation like \ac{ddos} attacks against sensors or crawlers~\cite{bib:andriesse_reliable_2015}. \todo{source for constantly growing, position in text} @@ -64,7 +66,7 @@ Some of these countermeasures include deterrence, which limits the number of all %}}} motivation %{{{ formal model -\subsection{Formal Model of a \ac{p2p} Botnet} +\subsection{Formal Model of a \Acs*{p2p} Botnet} A \ac{p2p} botnet can be modelled as a digraph @@ -255,21 +257,20 @@ type PeerTask struct { Let \(C\) be the set of available crawlers. Without loss of generality, if not stated otherwise, we assume that \(C\) is known when \ac{bms} is started and will not change afterward. There will be no joining or leaving crawlers. +This assumption greatly simplifies the implementation due to the lack of changing state that has to be tracked while still exploring the described strategies. +A production-ready implementation of the described techniques can drop this assumption but might have to recalculate the work distribution once a crawler joins or leaves. %{{{ load balancing -\subsection{Load Balancing} +\subsection{Load Balancing}\label{sec:loadBalancing} -This strategy simply splits the work into even chunks and split it between the available crawlers. -The following sharding conditions come to mind: +This strategy simply splits the work into chunks and distributes the work between the available crawlers. +The following sharding strategy will be investigated: \begin{itemize} + \item Round Robin. See~\autoref{sec:rr} + \item Assuming IP addresses are evenly distributed and so are infections, take the IP address as an \SI{32}{\bit} integer modulo \(\abs{C}\). See~\autoref{sec:ip_part} Problem: reassignment if a crawler joins or leaves - - \item Maintain an internal counter/list of tasks for each available crawler and assign to the crawler with the most available resources. See~\autoref{sec:ewd} - Easy reassignment - - \item Round Robin. See~\autoref{sec:rr} \end{itemize} Load balancing in itself does not help prevent the detection of crawlers but it allows better usage of available resources. @@ -283,10 +284,42 @@ Load balancing allows scaling out, which can be more cost-effective. Work is evenly distributed between crawlers according to their capabilities. For the sake of simplicity, we will only consider the bandwidth as capability but it can be extended by any shared property between the crawlers, \eg{} available memory, CPU speed. -For a given crawler \(c \in C\) let \(B_c\) be the total bandwidth of the crawler. -The total available bandwidth is \(B = \sum\limits_{c \in C} B_c\). -The weight \(W_c = \frac{B}{B_c}\)\todo{proper def for weight} defines which percentage of the work gets assigned to \(c\). -The set of target peers \(P = \), is partitioned into \(|C|\) subsets according to \(W_c\) and each subset is assigned to its crawler \(c\). +For a given crawler \(c_i \in C\) let \(B(c_i)\) be the total bandwidth of the crawler. +The total available bandwidth is \(b = \sum\limits_{c \in C} B(c_i)\). +The weight \(W(c_i) = \frac{B}{B(c_i)}\)\todo{proper def for weight} defines which percentage of the work gets assigned to \(c_i\). +The set of target peers \(P = \), is partitioned into \(|C|\) subsets according to \(W(c_i)\) and each subset is assigned to its crawler \(c_i\). +The mapping \mintedinline{go}{gcd(C)} is the greatest common divisor of all peers in \mintedinline{go}{C}, \(\text{maxWeight}(C) = \max \{ \forall c \in C : W(c) \}\). + +The following weighted round-robin algorithm distributes the work according to the crawlers' capabilities: + +\begin{minted}{go} +work := make(map[string][]strategy.Peer) +commonWeight := 0 +counter := -1 +for _, peer := range peers { + for { + counter += 1 + if counter <= mod { + counter = 0 + } + crawler := crawlers[counter] + if counter == 0 { + commonWeight = commonWeight - gcd(weightList...) + if commonWeight <= 0 { + commonWeight = max(weightList...) + if commonWeight == 0 { + return nil, errors.New("invalid common weight") + } + } + } + if weights[crawler] >= commonWeight { + work[crawler] = append(work[crawler], peer) + break + } + } +} +\end{minted} +\todo{reference for wrr} \begin{table}[H] \center @@ -410,7 +443,7 @@ While the effective frequency of the whole system is halved compared to~\autoref %}}} frequency reduction %{{{ against graph metrics -\subsection{Working Against Suspicious Graph Metrics} +\subsection{Preventing Suspicious Graph Metrics} \citetitle*{bib:karuppayah_sensorbuster_2017} describes different graph metrics to find sensors in \ac{p2p} botnets. These metrics depend on the uneven ratio between incoming and outgoing edges for crawlers. @@ -571,10 +604,19 @@ The distribution graphs in \autoref{fig:dist_sr_25}, \autoref{fig:dist_sr_50} an For all combinations of initial value and PageRank iterations, the rank for a well known crawler is in the \nth{95} percentile, so for our use case, those parameters do not matter. On average, peers in the analyzed dataset have \num{223} successors over the whole week. -Looking at the data in smaller buckets of one hour each, the average number of successors per peer is \num{90}. +Looking at the data in smaller buckets of one hour each, the average number of successors per peer is \num{90}.\todo{timeline with peers per bucket} + +%{{{ fig:avg_out_edges +\begin{figure}[h] +\centering +\includegraphics[width=1\linewidth]{./avg_out_edges.png} +\caption{Average outgoing edges per peer per hour}\label{fig:avg_out_edges} +\end{figure} +\todo{use better data?} +%}}}fig:avg_out_edges Churn describes the dynamics of peer participation of \ac{p2p} systems, \eg{} join and leave events~\cite{bib:stutzbach_churn_2006}.\todo{übergang} -Detecting if a peer just left the system, in combination with knowledge about \acp{as}, peers that just left and came from an \ac{as} with dynamic IP allocation (\eg{} many consumer broadband providers in the US and Europe), can be placed into the crawler's neighbourhood list. +Detecting if a peer just left the system, in combination with knowledge about \acp{as}, peers that just left and came from an \ac{as} with dynamic IP allocation (\eg{} many consumer broadband providers in the US and Europe), can be placed into the crawler's neighbourhood list.\todo{what is an AS} If the timing of the churn event correlates with IP rotation in the \ac{as}, it can be assumed, that the peer left due to being assigned a new IP address---not due to connectivity issues or going offline---and will not return using the same IP address. These peers, when placed in the neighbourhood list of the crawlers, will introduce paths back into the main network and defeat the \ac{wcc} metric. It also helps with the PageRank and SensorRank metrics since the crawlers start to look like regular peers without actually supporting the network by relaying messages or propagating active peers. @@ -585,7 +627,6 @@ This number will differ between different botnets, depending on implementation d Adding edges from the known crawler to \num{90} random peers to simulate the described strategy gives the following rankings:\todo{table, distribution with random edges} - %}}} against graph metrics %}}} strategies @@ -635,8 +676,17 @@ Current report possibilities are \mintinline{go}{LoggingReport} to simply log ne \mintinline{go}{PingPeer} and \mintinline{go}{CrawlPeer} use the implementation of the botnet \mintinline{go}{Protocol} to perform the actual crawling in predefined intervals, which can be overwritten on a per \mintinline{go}{PeerTask} basis. +The server-side part of the system consists of a \ac{grpc} server to handle the client requests, a scheduler to assign new peers, and a \mintinline{go}{Strategy} interface for modularity over how work is assigned to crawlers. + %}}} implementation +%{{{ conclusion +\section{Conclusion, Lessons Learned}\todo{decide} + + + +%}}} + %{{{ further work \section{Further Work} @@ -654,11 +704,13 @@ Doing so would allow a constant crawl interval for even highly volatile botnets. In the end, I would like to thank \begin{itemize} - \item Prof.\ Dr.\ Christoph Skornia for being a helpful supervisor in this and earlier works of mine + \item Prof.\ Dr.\ Christoph Skornia for being a helpful supervisor in this and many earlier works of mine \item Leon Böck for offering the possibility to work on this research project, regular feedback and technical expertise - \item Valentin Sundermann for being available for helpful ad-hoc discussions at any time of day for many years + \item Valentin Sundermann for being available for insightful ad hoc discussions at any time of day for many years + + \item Friends and family who pushed me into continuing this path \end{itemize} %}}} acknowledgments diff --git a/report.pdf b/report.pdf index 9465f853..177cf16f 100644 Binary files a/report.pdf and b/report.pdf differ diff --git a/report.tex b/report.tex index a4394ecc..679820f3 100644 --- a/report.tex +++ b/report.tex @@ -10,7 +10,7 @@ % \documentclass[11pt]{diazessay} \documentclass[a4paper, DIV=13, -12pt, +fontsize=13pt, BCOR=10mm, department=FakIM, % lucida,