diff --git a/.gitignore b/.gitignore index abc2ed71..7b7b0397 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ _minted-paper/ *.fls *.lof *.log +*.lot *.out *.pdf *.run.xml diff --git a/appendix.tex b/appendix.tex index db2e8945..699d8f7d 100644 --- a/appendix.tex +++ b/appendix.tex @@ -10,6 +10,11 @@ \clearpage +% TODO: add to table of contents? +\listoftables + +\clearpage + % TODO: add to table of contents? \printacronyms{} diff --git a/bibliography.bib b/bibliography.bib index 6baca94d..1b70074c 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -284,4 +284,46 @@ langid = {english} } +@article{bib:nadji_still_2017, + title = {Still Beheading Hydras: Botnet Takedowns Then and Now}, + volume = {14}, + issn = {1545-5971}, + url = {http://ieeexplore.ieee.org/document/7312442/}, + doi = {10.1109/TDSC.2015.2496176}, + shorttitle = {Still Beheading Hydras}, + pages = {535--549}, + number = {5}, + journaltitle = {{IEEE} Transactions on Dependable and Secure Computing}, + shortjournal = {{IEEE} Trans. Dependable and Secure Comput.}, + author = {Nadji, Yacin and Perdisci, Roberto and Antonakakis, Manos}, + urldate = {2022-03-17}, + date = {2017-09-01} +} + +@report{bib:falliere_sality_2011, + title = {{Sality: Story of a Peer-to-Peer Viral Network}}, + author = {{Falliere, Nicolas}}, + date = {2011-07}, + organization = {Symantec}, + url = {https://papers.vx-underground.org/archive/Symantec/sality-story-of-peer-to-peer-11-en.pdf}, + urldate = {2022-03-16}, + archiveurl = {https://web.archive.org/web/20161223003320/http://www.symantec.com/content/en/us/enterprise/media/security_response/whitepapers/sality_peer_to_peer_viral_network.pdf}, + archivedate = {2016-12-23}, +} + +@inproceedings{bib:dittrich_takeover_2012, + doi = {10.5555/2228340.2228349}, + author = {Dittrich, David}, + title = {So You Want to Take over a Botnet}, + year = {2012}, + publisher = {USENIX Association}, + address = {USA}, + abstract = {Computer criminals regularly construct large distributed attack networks comprised of many thousands of compromised computers around the globe. Once constituted, these attack networks are used to perform computer crimes, creating yet other sets of victims of secondary computer crimes, such as denial of service attacks, spam delivery, theft of personal and financial information for performing fraud, exfiltration of proprietary information for competitive advantage (industrial espionage), etc.The arms race between criminal actors who create and operate botnets and the computer security industry and research community who are actively trying to take these botnets down is escalating in aggressiveness. As the sophistication level of botnet engineering and operations increases, so does the demand on reverse engineering, understanding weaknesses in design that can be exploited on the defensive (or counter-offensive) side, and the possibility that actions to take down or eradicate the botnet may cause unintended consequences.}, + booktitle = {Proceedings of the 5th USENIX Conference on Large-Scale Exploits and Emergent Threats}, + pages = {6}, + numpages = {1}, + location = {San Jose, CA}, + series = {LEET'12} +} + /* vim: set filetype=bib ts=2 sw=2 tw=0 et :*/ diff --git a/content.tex b/content.tex index 9c132bae..6ff54fc1 100644 --- a/content.tex +++ b/content.tex @@ -3,15 +3,15 @@ The internet has become an irreplaceable part of our day-to-day lives. We are always connected via numerous \enquote{smart} and \ac{iot} devices. -We use the internet to communicate, shop, handle financial transactions and much more. -Many personal and professional workflows are so dependent on the internet, that they won't work when being offline, and with the pandemic we are living through, this dependency grew even bigger. +We use the internet to communicate, shop, handle financial transactions, and much more. +Many personal and professional workflows are so dependent on the internet, that they won't work when being offline, and with the pandemic, we are living through, this dependency grew even bigger. %{{{ motivation \subsection{Motivation} -The number of connected \ac{iot} devices is around 10 billion in 2021 and estimated to be constantly growing over the next years up to 25 billion in 2030~\cite{bib:statista_iot_2020}. -Many of these devices run on outdated software, don't receive any updates and don't follow general security best practices. -While in 2016 only \SI{77}{\percent} of German households had a broadband connection with a bandwidth of \SI{50}{\mega\bit\per\second} or more, in 2020 it were already \SI{95}{\percent} with more than 50 Mbit/s and \SI{59}{\percent} with at least \SI{1000}{\mega\bit\per\second}~\cite{bib:statista_broadband_2021}. +The number of connected \ac{iot} devices is around 10 billion in 2021 and is estimated to be constantly growing over the next years up to 25 billion in 2030~\cite{bib:statista_iot_2020}. +Many of these devices run on outdated software, don't receive any updates, and don't follow general security best practices. +While in 2016 only \SI{77}{\percent} of German households had a broadband connection with a bandwidth of \SI{50}{\mega\bit\per\second} or more, in 2020 it was already \SI{95}{\percent} with more than \SI{50}{\mega\bit\per\second} and \SI{59}{\percent} with at least \SI{1000}{\mega\bit\per\second}~\cite{bib:statista_broadband_2021}\todo{graph as image?}. This makes them an attractive target for botmasters since they are easy to infect, always online, behind internet connections that are getting faster and faster, and due to their nature as small devices, often without any direct user interaction, an infection can go unnoticed for a long time. In recent years, \ac{iot} botnets have been responsible for some of the biggest \ac{ddos} attacks ever recorded---creating up to \SI{1}{\tera\bit\per\second} of traffic~\cite{bib:ars_ddos_2016}. @@ -22,13 +22,13 @@ In recent years, \ac{iot} botnets have been responsible for some of the biggest A botnet is a network of infected computers with some means of communication to control the infected systems. Classic botnets use one or more central coordinating hosts called \ac{c2} servers. These \ac{c2} servers could use any protocol from \ac{irc} over \ac{http} to Twitter~\cite{bib:pantic_covert_2015} as communication channel with the infected hosts. -The infected systems can be abused for a number of things, \eg{} \ac{ddos} attacks, banking fraud, as proxies to hide the attacker's identity, send spam emails\dots{} +Abusive use of infected systems includes several things\todo{things = bad}, \eg{}, \ac{ddos} attacks, banking fraud, as proxies to hide the attacker's identity, send spam emails\dots{} -Analysing and shutting down a centralized botnet is comparatively easily since every bot knows the IP address, domain name, Twitter handle or \ac{irc} channel the \ac{c2} servers are using. +Analyzing and shutting down a centralized botnet is comparatively easy since every bot knows the IP address, domain name, Twitter handle or \ac{irc} channel the \ac{c2} servers are using. -A coordinated operation with help from law enforcement, hosting providers, domain registrars and platform providers could shut down or take over the operation by changing how requests are rooted or simply shutting down the controlling servers/accounts. +A coordinated operation with help from law enforcement, hosting providers, domain registrars, and platform providers could shut down or take over the operation by changing how requests are rooted or simply shutting down the controlling servers/accounts. -To complicate take-down attempts, botnet operators came up with a number of ideas: \acp{dga} use pseudorandomly generated domain names to render simple domain blacklist based approaches ineffective~\cite{bib:antonakakis_dga_2012} or fast-flux \ac{dns}, where a large pool of IP addresses is used assigned randomly to the \ac{c2} domains to prevent IP based blacklisting~\cite{bib:nazario_as_2008}. +To complicate take-down attempts, botnet operators came up with a number of ideas: \acp{dga} use pseudorandomly generated domain names to render simple domain blacklist-based approaches ineffective~\cite{bib:antonakakis_dga_2012} or fast-flux \ac{dns}, where a large pool of IP addresses is used assigned randomly to the \ac{c2} domains to prevent IP based blacklisting~\cite{bib:nazario_as_2008}. %{{{ fig:c2vsp2p \begin{figure}[h] @@ -50,13 +50,13 @@ To complicate take-down attempts, botnet operators came up with a number of idea A number of botnet operations were shut down like this~\cite{bib:nadji_beheading_2013} and as the defenders upped their game, so did attackers\todo{too informal?}---the idea of \ac{p2p} botnets came up. The idea is to build a decentralized network without \acp{spof} where the \ac{c2} servers are as shown in \autoref{fig:p2p}. -In a \ac{p2p} botnet, each node in the network knows a number of its neighbours and connects to those, each of these neighbours has a list of neighbours on his own, and so on. +In a \ac{p2p} botnet, each node in the network knows a number of its neighbors and connects to those, each of these neighbors has a list of neighbors on his own, and so on. This lack of a \ac{spof} makes \ac{p2p} botnets more resilient to take-down attempts since the communication is not stopped and botmasters can easily rejoin the network and send commands. -The damage produced by botnets has been constantly growing and there are many researchers and law enforcement agencies trying to shut down these operations. +The constantly growing damage produced by botnets has many researchers and law enforcement agencies trying to shut down these operations~\cite{bib:nadji_beheading_2013}\cite{bib:nadji_still_2017}\cite{bib:dittrich_takeover_2012}. The monetary value of these botnets directly correlates with the amount of effort, botmasters are willing to put into implementing defense mechanisms against take-down attempts. -Some of these countermeasures include deterrence, which limits the amount of allowed bots per IP address or subnet to 1; blacklisting, where known crawlers and sensors are blocked from communicating with other bots in the network (mostly IP based); disinformation, when fake bots are placed in the neighbourhood lists, which invalidates the data collected by crawlers; and active retaliation like \ac{ddos} attacks against sensors or crawlers~\cite{bib:andriesse_reliable_2015}. +Some of these countermeasures include deterrence, which limits the number of allowed bots per IP address or subnet to 1; blacklisting, where known crawlers and sensors are blocked from communicating with other bots in the network (mostly IP based); disinformation, when fake bots are placed in the neighborhood lists, which invalidates the data collected by crawlers; and active retaliation like \ac{ddos} attacks against sensors or crawlers~\cite{bib:andriesse_reliable_2015}. \todo{source for constantly growing, position in text} \todo{take-down? take down?} @@ -167,7 +167,7 @@ Both ranking algorithms use the \(\deg^+\) and \(\deg^-\) to weight the nodes. Another way to enumerate candidates for sensors in a \ac{p2p} botnet is to find \acp{wcc} in the graph. Sensors will have few to none outgoing edges, since they don't participate actively in the botnet. -The goal of this work is to complicate detection mechanisms like this for botmasters, by centralizing the coordination of the system's crawlers and sensors, thereby reducing the node's rank for specific graph metrics. +The goal of this work is to complicate detection mechanisms like this for botmasters by centralizing the coordination of the system's crawlers and sensors, thereby reducing the node's rank for specific graph metrics. The changes should allow the current sensors to use the new abstraction with as few changes as possible to the existing code. The final result should be as general as possible and not depend on any botnet's specific behaviour, but it assumes, that every \ac{p2p} botnet has some kind of \enquote{getNeighbourList} method in the protocol, that allows other peers to request a list of active nodes to connect to. @@ -270,6 +270,10 @@ The following sharding conditions come to mind: \item Round Robin \end{itemize} +Load balancing in itself does not help prevent the detection of crawlers but it allows better usage of available resources. +No peer will be crawled by more than one crawler and it allows crawling of bigger botnets where the current approach would reach its limit and could also be worked around with scaling up the machine where the crawler is executed. +Load balancing allows scaling out, which can be more cost-effective. + %}}} load balancing %{{{ frequency reduction @@ -391,12 +395,12 @@ With \(v \in V\), \(\text{succ}(v)\) being the set of successors of \(v\) and \( \] For the first iteration, the PageRank of all nodes is set to the same initial value. When iterating often enough, any value can be chosen~\cite{bib:page_pagerank_1998}.\todo{how often? experiments!} -In our experiments on a snapshot of the Sality botnet exported from \ac{bms} over the span of\todo{export timespan}, 3 iterations were enough to get distinct enough values to detect sensors and crawlers. +In our experiments on a snapshot of the Sality~\cite{bib:falliere_sality_2011} botnet exported from \ac{bms} over the span of \daterange{2021-04-22}{2021-04-29}\todo{export timespan}, 3 iterations were enough to get distinct enough values to detect sensors and crawlers. -\begin{figure}[H] +\begin{table}[H] \centering \begin{tabular}{lllll} - Iteration & Avg. PR & Crawler PR & Avg. SR & Crawler SR \\ + \textbf{Iteration} & \textbf{Avg. PR} & \textbf{Crawler PR} & \textbf{Avg. SR} & \textbf{Crawler SR} \\ 1 & wat? & wut? & wit? & wot? \\ 2 & wat? & wut? & wit? & wot? \\ 3 & wat? & wut? & wit? & wot? \\ @@ -404,13 +408,13 @@ In our experiments on a snapshot of the Sality botnet exported from \ac{bms} ove 5 & wat? & wut? & wit? & wot? \\ \end{tabular} \caption{Values for PageRank iterations with initial rank \(\forall v \in V : \text{PR}(v) = 0.25\)}\label{fig:pr_iter_table} -\end{figure} +\end{table} \todo{proper table formatting} -\begin{figure}[H] +\begin{table}[H] \centering \begin{tabular}{lllll} - Iteration & Avg. PR & Crawler PR & Avg. SR & Crawler SR \\ + \textbf{Iteration} & \textbf{Avg. PR} & \textbf{Crawler PR} & \textbf{Avg. SR} & \textbf{Crawler SR} \\ 1 & wat? & wut? & wit? & wot? \\ 2 & wat? & wut? & wit? & wot? \\ 3 & wat? & wut? & wit? & wot? \\ @@ -418,13 +422,13 @@ In our experiments on a snapshot of the Sality botnet exported from \ac{bms} ove 5 & wat? & wut? & wit? & wot? \\ \end{tabular} \caption{Values for PageRank iterations with initial rank \(\forall v \in V : \text{PR}(v) = 0.5\)}\label{fig:pr_iter_table} -\end{figure} +\end{table} \todo{proper table formatting} -\begin{figure}[H] +\begin{table}[H] \centering \begin{tabular}{lllll} - Iteration & Avg. PR & Crawler PR & Avg. SR & Crawler SR \\ + \textbf{Iteration} & \textbf{Avg. PR} & \textbf{Crawler PR} & \textbf{Avg. SR} & \textbf{Crawler SR} \\ 1 & wat? & wut? & wit? & wot? \\ 2 & wat? & wut? & wit? & wot? \\ 3 & wat? & wut? & wit? & wot? \\ @@ -432,7 +436,7 @@ In our experiments on a snapshot of the Sality botnet exported from \ac{bms} ove 5 & wat? & wut? & wit? & wot? \\ \end{tabular} \caption{Values for PageRank iterations with initial rank \(\forall v \in V : \text{PR}(v) = 0.75\)}\label{fig:pr_iter_table} -\end{figure} +\end{table} \todo{proper table formatting} The dampingFactor describes the probability of a person visiting links on the web to continue doing so, when using PageRank to rank websites in search results. @@ -470,7 +474,7 @@ Based on this, SensorRank is defined as Applying SensorRank PageRank once with an initial rank of \(0.25\) once on the example graphs above results in: \todo{pagerank, sensorrank calculations, proper example graphs, proper table formatting} -\begin{figure}[H] +\begin{table}[H] \centering \begin{tabular}{llllll} Node & \(\deg^{+}\) & \(\deg^{-}\) & In \ac{wcc}? & PageRank & SensorRank \\ @@ -482,7 +486,7 @@ Applying SensorRank PageRank once with an initial rank of \(0.25\) once on the e c2 & 2/4 & 0/2 & yes (1/3) & 0.0/0.125 & 0.0/0.0104 \\ \end{tabular} \caption{Values for metrics from~\autoref{fig:sensorbuster} (a/b)}\label{fig:metrics_table} -\end{figure} +\end{table} \todo{big graphs, how many Kn to get significant?} @@ -550,8 +554,11 @@ Current report possibilities are \mintinline{go}{LoggingReport} to simply log ne %{{{ further work \section{Further Work} -Following this work it should be possible to rewrite the existing crawlers to use the new abstraction. -This might bring some performance issues to light which can be solved by investigating the optimizations from the old implementation and apply them to the new one. +Following this work, it should be possible to rewrite the existing crawlers to use the new abstraction. +This might bring some performance issues to light which can be solved by investigating the optimizations from the old implementation and applying them to the new one. + +Another way to expand on this work is automatically scaling the available crawlers up and down, depending on the botnet size and the number of concurrently online peers. +Doing so would allow a constant crawl interval for even highly volatile botnets. %}}} further work diff --git a/references/2011_sality-story-of-peer-to-peer-11-en.pdf b/references/2011_sality-story-of-peer-to-peer-11-en.pdf new file mode 100644 index 00000000..37b9e4fd Binary files /dev/null and b/references/2011_sality-story-of-peer-to-peer-11-en.pdf differ diff --git a/references/2012_dittrick_takeover.pdf b/references/2012_dittrick_takeover.pdf new file mode 100644 index 00000000..c9e0f294 Binary files /dev/null and b/references/2012_dittrick_takeover.pdf differ diff --git a/references/2017_Still_Beheading_Hydras_Botnet_Takedowns_Then_and_Now.pdf b/references/2017_Still_Beheading_Hydras_Botnet_Takedowns_Then_and_Now.pdf new file mode 100644 index 00000000..26b9009e Binary files /dev/null and b/references/2017_Still_Beheading_Hydras_Botnet_Takedowns_Then_and_Now.pdf differ diff --git a/report.tex b/report.tex index 8f796f93..f80b6187 100644 --- a/report.tex +++ b/report.tex @@ -36,6 +36,12 @@ headsepline, \usepackage{todonotes} +% nice tables +\usepackage{booktabs} + +% date formatting +\usepackage[english]{isodate} + % timelines \usepackage{chronology} diff --git a/shell.nix b/shell.nix index 92ec3cd0..947ba118 100644 --- a/shell.nix +++ b/shell.nix @@ -16,8 +16,10 @@ let chronology csquotes dejavu + isodate latexmk siunitx + substr todonotes units