This commit is contained in:
Valentin Brandl 2022-03-15 18:14:22 +01:00
parent 3edb5b2bd1
commit e3142f1938
11 changed files with 212 additions and 70 deletions

1
.gitignore vendored
View File

@ -19,3 +19,4 @@ _minted-report
!report.pdf !report.pdf
!references/*.pdf !references/*.pdf
!assets/*.pdf

View File

@ -50,7 +50,9 @@
\DeclareAcronym{spof}{ \DeclareAcronym{spof}{
short = {SPOF}, short = {SPOF},
long = {single point of failure} long = {single point of failure},
short-plural-form = {SPOF},
long-plural-form = {single points of failure},
} }
\DeclareAcronym{bms}{ \DeclareAcronym{bms}{

Binary file not shown.

View File

@ -8,7 +8,7 @@
pages = {86-124} pages = {86-124}
} }
@article{zhang_building_2014, @article{bib:zhang_building_2014,
title = {Building a Scalable System for Stealthy P2P-Botnet Detection}, title = {Building a Scalable System for Stealthy P2P-Botnet Detection},
volume = {9}, volume = {9},
issn = {1556-6013, 1556-6021}, issn = {1556-6013, 1556-6021},
@ -24,7 +24,7 @@
file = {Full Text:/home/me/Zotero/storage/PFXP8NLV/Zhang et al. - 2014 - Building a Scalable System for Stealthy P2P-Botnet.pdf:application/pdf} file = {Full Text:/home/me/Zotero/storage/PFXP8NLV/Zhang et al. - 2014 - Building a Scalable System for Stealthy P2P-Botnet.pdf:application/pdf}
} }
@incollection{kim_survey_2012, @incollection{bib:kim_survey_2012,
location = {Dordrecht}, location = {Dordrecht},
title = {A Survey on P2P Botnet Detection}, title = {A Survey on P2P Botnet Detection},
volume = {120}, volume = {120},
@ -42,7 +42,7 @@
file = {Full Text:/home/me/Zotero/storage/CMFWF58V/Han and Im - 2012 - A Survey on P2P Botnet Detection.pdf:application/pdf} file = {Full Text:/home/me/Zotero/storage/CMFWF58V/Han and Im - 2012 - A Survey on P2P Botnet Detection.pdf:application/pdf}
} }
@online{statista_iot_2020, @online{bib:statista_iot_2020,
title = {Number of Internet of Things (IoT) Connected Devices Worldwide from 2019 to 2030}, title = {Number of Internet of Things (IoT) Connected Devices Worldwide from 2019 to 2030},
organization = {Statista Inc.}, organization = {Statista Inc.},
publisher = {Transforma Insights}, publisher = {Transforma Insights},
@ -53,7 +53,7 @@
archivedate = {2021-10-25} archivedate = {2021-10-25}
} }
@online{statista_broadband_2021, @online{bib:statista_broadband_2021,
title = {Availability of broadband internet to households in Germany from 2017 to 2020, by bandwidth class}, title = {Availability of broadband internet to households in Germany from 2017 to 2020, by bandwidth class},
organization = {Statista Inc.}, organization = {Statista Inc.},
publisher = {BMVI}, publisher = {BMVI},
@ -64,7 +64,7 @@
archivedate = {2021-03-09} archivedate = {2021-03-09}
} }
@online{ars_ddos_2016, @online{bib:ars_ddos_2016,
title = {Brace yourselves --- source code powering potent IoT DDoSes just went public}, title = {Brace yourselves --- source code powering potent IoT DDoSes just went public},
date = {2016-10-02}, date = {2016-10-02},
author = {Dan Goodin}, author = {Dan Goodin},
@ -75,7 +75,7 @@
archivedate = {2021-10-22}, archivedate = {2021-10-22},
} }
@article{fan_p2p_2014, @article{bib:fan_p2p_2014,
title = {A P2P Botnet Detection Method Used On-line Monitoring and Off-line Detection}, title = {A P2P Botnet Detection Method Used On-line Monitoring and Off-line Detection},
volume = {8}, volume = {8},
issn = {17389976, 17389976}, issn = {17389976, 17389976},
@ -91,7 +91,7 @@
file = {Full Text:/home/me/Zotero/storage/7UI2IFIL/Fan and Xu - 2014 - A P2P Botnet Detection Method Used On-line Monitor.pdf:application/pdf} file = {Full Text:/home/me/Zotero/storage/7UI2IFIL/Fan and Xu - 2014 - A P2P Botnet Detection Method Used On-line Monitor.pdf:application/pdf}
} }
@inproceedings{bock_poster_2019, @inproceedings{bib:bock_poster_2019,
location = {London United Kingdom}, location = {London United Kingdom},
title = {Poster: Challenges of Accurately Measuring Churn in P2P Botnets}, title = {Poster: Challenges of Accurately Measuring Churn in P2P Botnets},
isbn = {978-1-4503-6747-9}, isbn = {978-1-4503-6747-9},
@ -109,7 +109,7 @@
file = {Böck et al. - 2019 - Poster Challenges of Accurately Measuring Churn i.pdf:/home/me/Zotero/storage/FGQXMN3H/Böck et al. - 2019 - Poster Challenges of Accurately Measuring Churn i.pdf:application/pdf} file = {Böck et al. - 2019 - Poster Challenges of Accurately Measuring Churn i.pdf:/home/me/Zotero/storage/FGQXMN3H/Böck et al. - 2019 - Poster Challenges of Accurately Measuring Churn i.pdf:application/pdf}
} }
@inproceedings{karuppayah_boobytrap_2016, @inproceedings{bib:karuppayah_boobytrap_2016,
location = {Kuala Lumpur, Malaysia}, location = {Kuala Lumpur, Malaysia},
title = {{BoobyTrap}: On autonomously detecting and characterizing crawlers in P2P botnets}, title = {{BoobyTrap}: On autonomously detecting and characterizing crawlers in P2P botnets},
isbn = {978-1-4799-6664-6}, isbn = {978-1-4799-6664-6},
@ -126,7 +126,7 @@
file = {Karuppayah et al. - 2016 - BoobyTrap On autonomously detecting and character.pdf:/home/me/Zotero/storage/UAUH5ZAN/Karuppayah et al. - 2016 - BoobyTrap On autonomously detecting and character.pdf:application/pdf} file = {Karuppayah et al. - 2016 - BoobyTrap On autonomously detecting and character.pdf:/home/me/Zotero/storage/UAUH5ZAN/Karuppayah et al. - 2016 - BoobyTrap On autonomously detecting and character.pdf:application/pdf}
} }
@inproceedings{andriesse_reliable_2015, @inproceedings{bib:andriesse_reliable_2015,
location = {Tokyo Japan}, location = {Tokyo Japan},
title = {Reliable Recon in Adversarial Peer-to-Peer Botnets}, title = {Reliable Recon in Adversarial Peer-to-Peer Botnets},
isbn = {978-1-4503-3848-6}, isbn = {978-1-4503-3848-6},
@ -143,7 +143,7 @@
file = {Andriesse et al. - 2015 - Reliable Recon in Adversarial Peer-to-Peer Botnets.pdf:/home/me/Zotero/storage/YJZMYTCB/Andriesse et al. - 2015 - Reliable Recon in Adversarial Peer-to-Peer Botnets.pdf:application/pdf} file = {Andriesse et al. - 2015 - Reliable Recon in Adversarial Peer-to-Peer Botnets.pdf:/home/me/Zotero/storage/YJZMYTCB/Andriesse et al. - 2015 - Reliable Recon in Adversarial Peer-to-Peer Botnets.pdf:application/pdf}
} }
@inproceedings{karuppayah_sensorbuster_2017, @inproceedings{bib:karuppayah_sensorbuster_2017,
title = {{{SensorBuster}}: {{On Identifying Sensor Nodes}} in {{P2P Botnets}}}, title = {{{SensorBuster}}: {{On Identifying Sensor Nodes}} in {{P2P Botnets}}},
shorttitle = {{{SensorBuster}}}, shorttitle = {{{SensorBuster}}},
booktitle = {Proceedings of the 12th {{International Conference}} on {{Availability}}, {{Reliability}} and {{Security}}}, booktitle = {Proceedings of the 12th {{International Conference}} on {{Availability}}, {{Reliability}} and {{Security}}},
@ -162,7 +162,7 @@
series = {{{ARES}} '17} series = {{{ARES}} '17}
} }
@report{page_pagerank_1998, @report{bib:page_pagerank_1998,
title = {{The PageRank Citation Ranking: Bringing Order to the Web}}, title = {{The PageRank Citation Ranking: Bringing Order to the Web}},
shorttitle = {{The PageRank Citation Ranking}}, shorttitle = {{The PageRank Citation Ranking}},
author = {Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry}, author = {Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry},
@ -172,7 +172,7 @@
abstract = {The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describ es PageRank, a method for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.} abstract = {The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describ es PageRank, a method for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.}
} }
@inproceedings{andriesse_goz_2013, @inproceedings{bib:andriesse_goz_2013,
title = {Highly Resilient Peer-to-Peer Botnets Are Here: {{An}} Analysis of {{Gameover Zeus}}}, title = {Highly Resilient Peer-to-Peer Botnets Are Here: {{An}} Analysis of {{Gameover Zeus}}},
shorttitle = {Highly Resilient Peer-to-Peer Botnets Are Here}, shorttitle = {Highly Resilient Peer-to-Peer Botnets Are Here},
booktitle = {2013 8th {{International Conference}} on {{Malicious}} and {{Unwanted Software}}: "{{The Americas}}" ({{MALWARE}})}, booktitle = {2013 8th {{International Conference}} on {{Malicious}} and {{Unwanted Software}}: "{{The Americas}}" ({{MALWARE}})},
@ -189,7 +189,7 @@
file = {/home/me/Zotero/storage/R3AAQR9Q/Andriesse et al. - 2013 - Highly resilient peer-to-peer botnets are here An.pdf} file = {/home/me/Zotero/storage/R3AAQR9Q/Andriesse et al. - 2013 - Highly resilient peer-to-peer botnets are here An.pdf}
} }
@inproceedings{stutzbach_churn_2006, @inproceedings{bib:stutzbach_churn_2006,
title = {Understanding Churn in Peer-to-Peer Networks}, title = {Understanding Churn in Peer-to-Peer Networks},
booktitle = {Proceedings of the 6th {{ACM SIGCOMM}} on {{Internet}} Measurement - {{IMC}} '06}, booktitle = {Proceedings of the 6th {{ACM SIGCOMM}} on {{Internet}} Measurement - {{IMC}} '06},
author = {Stutzbach, Daniel and Rejaie, Reza}, author = {Stutzbach, Daniel and Rejaie, Reza},
@ -205,4 +205,83 @@
langid = {english} langid = {english}
} }
@inproceedings{bib:rossow_sok_2013,
location = {Berkeley, {CA}, {USA}},
title = {{SoK}: P2PWNED - Modeling and Evaluating the Resilience of Peer-to-Peer Botnets},
isbn = {978-1-4673-6166-8 978-0-7695-4977-4},
url = {https://ieeexplore.ieee.org/document/6547104/},
doi = {10.1109/SP.2013.17},
shorttitle = {{SoK}},
eventtitle = {2013 {IEEE} Symposium on Security and Privacy ({SP}) Conference dates subject to change},
pages = {97--111},
booktitle = {2013 {IEEE} Symposium on Security and Privacy},
publisher = {{IEEE}},
author = {Rossow, Christian and Andriesse, Dennis and Werner, Tillmann and Stone-Gross, Brett and Plohmann, Daniel and Dietrich, Christian J. and Bos, Herbert},
urldate = {2022-03-15},
date = {2013-05},
file = {Submitted Version:/home/me/Zotero/storage/7T8RDXXF/Rossow et al. - 2013 - SoK P2PWNED - Modeling and Evaluating the Resilie.pdf:application/pdf}
}
@inproceedings{bib:antonakakis_dga_2012,
author = {Manos Antonakakis and Roberto Perdisci and Yacin Nadji and Nikolaos Vasiloglou and Saeed Abu-Nimeh and Wenke Lee and David Dagon},
title = {From {Throw-Away} Traffic to Bots: Detecting the Rise of {DGA-Based} Malware},
booktitle = {21st USENIX Security Symposium (USENIX Security 12)},
year = {2012},
isbn = {978-931971-95-9},
address = {Bellevue, WA},
pages = {491--506},
url = {https://www.usenix.org/conference/usenixsecurity12/technical-sessions/presentation/antonakakis},
publisher = {USENIX Association},
month = aug,
}
@inproceedings{bib:pantic_covert_2015,
location = {Los Angeles, {CA}, {USA}},
title = {Covert Botnet Command and Control Using Twitter},
isbn = {978-1-4503-3682-6},
url = {http://dl.acm.org/citation.cfm?doid=2818000.2818047},
doi = {10.1145/2818000.2818047},
eventtitle = {the 31st Annual Computer Security Applications Conference},
pages = {171--180},
booktitle = {Proceedings of the 31st Annual Computer Security Applications Conference on - {ACSAC} 2015},
publisher = {{ACM} Press},
author = {Pantic, Nick and Husain, Mohammad I.},
urldate = {2022-03-15},
date = {2015},
langid = {english}
}
@inproceedings{bib:nazario_as_2008,
location = {Fairfax, {VI}},
title = {As the net churns: Fast-flux botnet observations},
isbn = {978-1-4244-3288-2},
url = {https://ieeexplore.ieee.org/document/4690854/},
doi = {10.1109/MALWARE.2008.4690854},
shorttitle = {As the net churns},
eventtitle = {2008 3rd International Conference on Malicious and Unwanted Software ({MALWARE})},
pages = {24--31},
booktitle = {2008 3rd International Conference on Malicious and Unwanted Software ({MALWARE})},
publisher = {{IEEE}},
author = {Nazario, Jose and Holz, Thorsten},
urldate = {2022-03-15},
date = {2008-10}
}
@inproceedings{bib:nadji_beheading_2013,
location = {Berlin, Germany},
title = {Beheading hydras: performing effective botnet takedowns},
isbn = {978-1-4503-2477-9},
url = {http://dl.acm.org/citation.cfm?doid=2508859.2516749},
doi = {10.1145/2508859.2516749},
shorttitle = {Beheading hydras},
eventtitle = {the 2013 {ACM} {SIGSAC} conference},
pages = {121--132},
booktitle = {Proceedings of the 2013 {ACM} {SIGSAC} conference on Computer \& communications security - {CCS} '13},
publisher = {{ACM} Press},
author = {Nadji, Yacin and Antonakakis, Manos and Perdisci, Roberto and Dagon, David and Lee, Wenke},
urldate = {2022-03-15},
date = {2013},
langid = {english}
}
/* vim: set filetype=bib ts=2 sw=2 tw=0 et :*/ /* vim: set filetype=bib ts=2 sw=2 tw=0 et :*/

View File

@ -4,28 +4,31 @@
The internet has become an irreplaceable part of our day-to-day lives. The internet has become an irreplaceable part of our day-to-day lives.
We are always connected via numerous \enquote{smart} and \ac{iot} devices. We are always connected via numerous \enquote{smart} and \ac{iot} devices.
We use the internet to communicate, shop, handle financial transactions and much more. We use the internet to communicate, shop, handle financial transactions and much more.
Many personal and professional workflows are so dependent on the internet, that they won't work when being offline. Many personal and professional workflows are so dependent on the internet, that they won't work when being offline, and with the pandemic we are living through, this dependency grew even bigger.
%{{{ motivation %{{{ motivation
\subsection{Motivation} \subsection{Motivation}
The number of connected \ac{iot} devices is around 10 billion in 2021 and estimated to be constantly growing over the next years up to 25 billion in 2030~\cite{statista_iot_2020}. The number of connected \ac{iot} devices is around 10 billion in 2021 and estimated to be constantly growing over the next years up to 25 billion in 2030~\cite{bib:statista_iot_2020}.
Many of these devices run on outdated software, don't receive any updates and don't follow general security best practices. Many of these devices run on outdated software, don't receive any updates and don't follow general security best practices.
While in 2016 only 77\% of German households had a broadband connection with a bandwidth of 50 Mbit/s or more, in 2020 it were already 95\% with more than 50 Mbit/s and 59\% with at least 1000 Mbit/s~\cite{statista_broadband_2021}. While in 2016 only \SI{77}{\percent} of German households had a broadband connection with a bandwidth of \SI{50}{\mega\bit\per\second} or more, in 2020 it were already \SI{95}{\percent} with more than 50 Mbit/s and \SI{59}{\percent} with at least \SI{1000}{\mega\bit\per\second}~\cite{bib:statista_broadband_2021}.
This makes them an attractive target for botmasters since they are easy to infect, always online, behind internet connections that are getting faster and faster, and due to their nature as small devices, often without any direct user interaction, an infection can go unnoticed for a long time. This makes them an attractive target for botmasters since they are easy to infect, always online, behind internet connections that are getting faster and faster, and due to their nature as small devices, often without any direct user interaction, an infection can go unnoticed for a long time.
In recent years, \ac{iot} botnets have been responsible for some of the biggest \ac{ddos} attacks ever recorded, creating up to 1 Tbit/s of traffic~\cite{ars_ddos_2016}. In recent years, \ac{iot} botnets have been responsible for some of the biggest \ac{ddos} attacks ever recorded---creating up to \SI{1}{\tera\bit\per\second} of traffic~\cite{bib:ars_ddos_2016}.
\todo{what is a bot? Infected systems. Malware. DGA, beispiele, tree vs graph} \todo{what is a bot? Infected systems. Malware. DGA, beispiele, tree vs graph}
A botnet describes a network of connected computers with some way to control the infected systems.
In classic botnets, there are one or more central coordinating hosts called \ac{c2} servers. A botnet is a network of infected computers with some means of communication to control the infected systems.
These \ac{c2} servers could use anything from \ac{irc} over \ac{http} to Twitter as communication channel with the infected systems. Classic botnets use one or more central coordinating hosts called \ac{c2} servers.
The infected systems can be abused for a number of things, \eg{} \ac{ddos} attacks, stealing data from victims, as proxies to hide the attacker's identity, send spam emails\dots{} These \ac{c2} servers could use any protocol from \ac{irc} over \ac{http} to Twitter~\cite{bib:pantic_covert_2015} as communication channel with the infected hosts.
The infected systems can be abused for a number of things, \eg{} \ac{ddos} attacks, banking fraud, as proxies to hide the attacker's identity, send spam emails\dots{}
Analysing and shutting down a centralized botnet is comparatively easily since every bot knows the IP address, domain name, Twitter handle or \ac{irc} channel the \ac{c2} servers are using. Analysing and shutting down a centralized botnet is comparatively easily since every bot knows the IP address, domain name, Twitter handle or \ac{irc} channel the \ac{c2} servers are using.
A targeted operation with help from law enforcement, hosting providers, domain registrars and platform providers could shut down or take over the operation by changing how requests are rooted or simply shutting down the controlling servers/accounts. A coordinated operation with help from law enforcement, hosting providers, domain registrars and platform providers could shut down or take over the operation by changing how requests are rooted or simply shutting down the controlling servers/accounts.
To complicate take-down attempts, botnet operators came up with a number of ideas: \acp{dga} use pseudorandomly generated domain names to render simple domain blacklist based approaches ineffective~\cite{bib:antonakakis_dga_2012} or fast-flux \ac{dns}, where a large pool of IP addresses is used assigned randomly to the \ac{c2} domains to prevent IP based blacklisting~\cite{bib:nazario_as_2008}.
%{{{ fig:c2vsp2p %{{{ fig:c2vsp2p
\begin{figure}[h] \begin{figure}[h]
@ -45,37 +48,47 @@ A targeted operation with help from law enforcement, hosting providers, domain r
\todo{better image for p2p, really needed?} \todo{better image for p2p, really needed?}
%}}}fig:c2vsp2p %}}}fig:c2vsp2p
A number of botnet operations were shut down like this and as the defenders upped their game, so did attackers\todo{too informal?} --- the idea of \ac{p2p} botnets came up. A number of botnet operations were shut down like this~\cite{bib:nadji_beheading_2013} and as the defenders upped their game, so did attackers\todo{too informal?}---the idea of \ac{p2p} botnets came up.
The idea is to build a decentralized network without single points of failure where the \ac{c2} servers are as shown in \autoref{fig:p2p}. The idea is to build a decentralized network without \acp{spof} where the \ac{c2} servers are as shown in \autoref{fig:p2p}.
In a \ac{p2p} botnet, each node in the network knows a number of its neighbours and connects to those, each of these neighbours has a list of neighbours on his own, and so on. In a \ac{p2p} botnet, each node in the network knows a number of its neighbours and connects to those, each of these neighbours has a list of neighbours on his own, and so on.
This lack of a \ac{spof} makes \ac{p2p} botnets more resilient to take-down attempts since the communication is not stopped and botmasters can easily rejoin the network and send commands. This lack of a \ac{spof} makes \ac{p2p} botnets more resilient to take-down attempts since the communication is not stopped and botmasters can easily rejoin the network and send commands.
Formally, a \ac{p2p} botnet can be modelled as a digraph
\begin{align*}
G &= (V, E)
\end{align*}
With the set of vertices \(V\) describing the bots in the network and the set of edges \(E\) describing the \enquote{is neighbour of} relationships between bots.
For a vertex \(v \in V\), the in and out degree \(\deg^{+}\) and \(\deg^{-}\) describe how many bots know \(v\) or are known by \(v\) respectively.
\begin{align*}
\deg^{+}(v) &= \abs{\{ u \in V \mid (u, v) \in E \}} \\
\deg^{-}(v) &= \abs{\{ u \in V \mid (v, u) \in E \}}
\end{align*}
For a vertex \(v \in V\), the in degree \(\deg^{+}(v) = \abs{\{ u \in V \mid (u, v) \in E \}}\) and out degree \(\deg^{-}(v) = \abs{\{ u \in V \mid (v, u) \in E \}}\) describe how many bots know \(v\) and how many nodes \(v\) knows respectively.
The damage produced by botnets has been constantly growing and there are many researchers and law enforcement agencies trying to shut down these operations. The damage produced by botnets has been constantly growing and there are many researchers and law enforcement agencies trying to shut down these operations.
The monetary value of these botnets directly correlates with the amount of effort, botmasters are willing to put into implementing defense mechanisms against take-down attempts. The monetary value of these botnets directly correlates with the amount of effort, botmasters are willing to put into implementing defense mechanisms against take-down attempts.
Some of these countermeasures include deterrence, which limits the amount of allowed bots per IP address or subnet to 1; blacklisting, where known crawlers and sensors are blocked from communicating with other bots in the network (mostly IP based); disinformation, when fake bots are placed in the neighbourhood lists, which invalidates the data collected by crawlers; and active retaliation like \ac{ddos} attacks against sensors or crawlers~\cite{andriesse_reliable_2015}. Some of these countermeasures include deterrence, which limits the amount of allowed bots per IP address or subnet to 1; blacklisting, where known crawlers and sensors are blocked from communicating with other bots in the network (mostly IP based); disinformation, when fake bots are placed in the neighbourhood lists, which invalidates the data collected by crawlers; and active retaliation like \ac{ddos} attacks against sensors or crawlers~\cite{bib:andriesse_reliable_2015}.
\todo{source for constantly growing, position in text} \todo{source for constantly growing, position in text}
\todo{take-down? take down?} \todo{take-down? take down?}
%}}} motivation %}}} motivation
%{{{ formal model
\subsection{Formal Model of a \ac{p2p} Botnet}
A \ac{p2p} botnet can be modelled as a digraph
\begin{align*}
G &= (V, E)
\end{align*}
With the set of vertices \(V\) describing the bots in the network and the set of edges \(E\) describing the communication flow between bots.
\(\forall v \in V\), the predecessors \(\text{pred}(v)\) and successors \(\text{succ}(v)\) are defined as:
\begin{align*}
\text{succ}(v) &= \{ u \in V \mid (u, v) \in E \} \\
\text{pred}(v) &= \{ u \in V \mid (v, u) \in E \}
\end{align*}
For a vertex \(v \in V\), the in and out degree \(\deg^{+}\) and \(\deg^{-}\) describe how many bots know \(v\) or are known by \(v\) respectively.
\begin{align*}
\deg^{+}(v) &= \abs{\text{pred}(v)} \\
\deg^{-}(v) &= \abs{\text{succ}(v)}
\end{align*}
%}}} formal model
%{{{ detection techniques %{{{ detection techniques
\subsection{Detection Techniques for \Acs*{p2p} Botnets} \subsection{Detection Techniques for \Acs*{p2p} Botnets}
@ -87,7 +100,7 @@ There are two distinct methods to map and get an overview of the network topolog
For passive detection, traffic flows are analysed in large amounts of collected network traffic (\eg{} from \acp{isp}). For passive detection, traffic flows are analysed in large amounts of collected network traffic (\eg{} from \acp{isp}).
This has some advantages in that it is not possible for botmasters to detect or prevent data collection of that kind, but it is not trivial to distinguish valid \ac{p2p} application traffic (\eg{} BitTorrent, Skype, cryptocurrencies, \ldots) from \ac{p2p} bots. This has some advantages in that it is not possible for botmasters to detect or prevent data collection of that kind, but it is not trivial to distinguish valid \ac{p2p} application traffic (\eg{} BitTorrent, Skype, cryptocurrencies, \ldots) from \ac{p2p} bots.
\citeauthor{zhang_building_2014} propose a system of statistical analysis to solve some of these problems in~\cite{zhang_building_2014}. \citeauthor{bib:zhang_building_2014} propose a system of statistical analysis to solve some of these problems in~\cite{bib:zhang_building_2014}.
Also getting access to the required datasets might not be possible for everyone. Also getting access to the required datasets might not be possible for everyone.
\todo{no context} \todo{no context}
@ -95,7 +108,7 @@ Also getting access to the required datasets might not be possible for everyone.
\todo{BotMiner (in zhang\_building\_2014)} \todo{BotMiner (in zhang\_building\_2014)}
\begin{itemize} \begin{itemize}
\item Large scale network analysis (hard to differentiate from legitimate \ac{p2p} traffic (\eg{} BitTorrent), hard to get data, knowledge of some known bots required)~\cite{zhang_building_2014} \item Large scale network analysis (hard to differentiate from legitimate \ac{p2p} traffic (\eg{} BitTorrent), hard to get data, knowledge of some known bots required)~\cite{bib:zhang_building_2014}
\item Heuristics: Same traffic patterns, same malicious behaviour \item Heuristics: Same traffic patterns, same malicious behaviour
@ -145,11 +158,11 @@ There are three subtypes of active detection:
%{{{ methodology %{{{ methodology
\section{Methodology} \section{Methodology}
The implementation of the concepts of this work will be done as part of \ac{bms}\footnotemark, a monitoring platform for \ac{p2p} botnets described by \citeauthor{bock_poster_2019} in~\cite{bock_poster_2019}. The implementation of the concepts of this work will be done as part of \ac{bms}\footnotemark, a monitoring platform for \ac{p2p} botnets described by \citeauthor{bib:bock_poster_2019} in~\cite{bib:bock_poster_2019}.
\footnotetext{\url{https://github.com/Telecooperation/BMS}} \footnotetext{\url{https://github.com/Telecooperation/BMS}}
\Ac{bms} uses a hybrid active approach of crawlers and sensors (reimplementations of the \ac{p2p} protocol of a botnet, that won't perform malicious actions) to collect live data from active botnets. \Ac{bms} uses a hybrid active approach of crawlers and sensors (reimplementations of the \ac{p2p} protocol of a botnet, that won't perform malicious actions) to collect live data from active botnets.
In an earlier project, I implemented different node ranking algorithms (among others \enquote{PageRank}~\cite{page_pagerank_1998}) to detect sensors and crawlers in a botnet, as described in \citetitle{karuppayah_sensorbuster_2017}. In an earlier project, I implemented different node ranking algorithms (among others \enquote{PageRank}~\cite{bib:page_pagerank_1998}) to detect sensors and crawlers in a botnet, as described in \citetitle{bib:karuppayah_sensorbuster_2017}.
Both ranking algorithms use the \(\deg^+\) and \(\deg^-\) to weight the nodes. Both ranking algorithms use the \(\deg^+\) and \(\deg^-\) to weight the nodes.
Another way to enumerate candidates for sensors in a \ac{p2p} botnet is to find \acp{wcc} in the graph. Another way to enumerate candidates for sensors in a \ac{p2p} botnet is to find \acp{wcc} in the graph.
Sensors will have few to none outgoing edges, since they don't participate actively in the botnet. Sensors will have few to none outgoing edges, since they don't participate actively in the botnet.
@ -239,10 +252,30 @@ type PeerTask struct {
%{{{ strategies %{{{ strategies
\section{Coordination Strategies} \section{Coordination Strategies}
Let \(C\) be the set of available crawlers.
%{{{ load balancing
\subsection{Load Balancing}
This strategy simply splits the work into even chunks and split it between the available crawlers.
The following sharding conditions come to mind:
\begin{itemize}
\item Assuming IP addresses are evenly distributed and so are infections, take the IP address as an \SI{32}{\bit} integer modulo \(\abs{C}\).
Problem: reassignment if a crawler joins or leaves
\item Maintain an internal counter/list of tasks for each available crawler and assign to the crawler with the most available resources.
Easy reassignment
\item Round Robin
\end{itemize}
%}}} load balancing
%{{{ frequency reduction %{{{ frequency reduction
\subsection{Reduction of Request Frequency} \subsection{Reduction of Request Frequency}
The GameOver Zeus botnet deployed a blacklisting mechanism, where crawlers are blocked based in their request frequency~\cite{andriesse_goz_2013}. The GameOver Zeus botnet deployed a blacklisting mechanism, where crawlers are blocked based in their request frequency~\cite{bib:andriesse_goz_2013}.
In a single crawler approach, the crawler frequency has to be limited to prevent being hitting the request limit. In a single crawler approach, the crawler frequency has to be limited to prevent being hitting the request limit.
%{{{ fig:old_crawler_timeline %{{{ fig:old_crawler_timeline
@ -266,13 +299,13 @@ The amount of crawlers \(C\) required to achieve the frequency \(F\) without bei
\begin{align*} \begin{align*}
C &= \left\lceil \frac{F}{L} \right\rceil \\ C &= \left\lceil \frac{F}{L} \right\rceil \\
O &= \frac{1 \si{\request}}{F} O &= \frac{\SI{1}{\request}}{F}
\end{align*} \end{align*}
Taking advantage of the \mintinline{go}{StartAt} field from the \mintinline{go}{PeerTask} returned by the \mintinline{go}{requestTasks} primitive above, the crawlers can be scheduled offset by \(O\) at a frequency \(L\) to ensure, the overall requests to each peer are evenly distributed over time. Taking advantage of the \mintinline{go}{StartAt} field from the \mintinline{go}{PeerTask} returned by the \mintinline{go}{requestTasks} primitive above, the crawlers can be scheduled offset by \(O\) at a frequency \(L\) to ensure, the overall requests to each peer are evenly distributed over time.
Given a limit \(L = 5 \si{\request\per 100\second}\), crawling a botnet at \(F = 20 \si{\request\per 100\second}\) requires \(C = \left\lceil \frac{20\si{\request\per 100\second}}{5\si{\request\per 100\second}} \right\rceil = 4\) crawlers. Given a limit \(L = \SI{5}{\request\per 100\second}\), crawling a botnet at \(F = \SI{20}{\request\per 100\second}\) requires \(C = \left\lceil \frac{\SI{20}{\request\per 100\second}}{\SI{5}{\request\per 100\second}} \right\rceil = 4\) crawlers.
Those crawlers must be scheduled \(O = \frac{1\si{\request}}{20\si{\request\per 100\second}} = 5 \si{\second}\) apart at a frequency of \(L\) for an even request distribution. Those crawlers must be scheduled \(O = \frac{\SI{1}{\request}}{\SI{20}{\request\per 100\second}} = \SI{5}{\second}\) apart at a frequency of \(L\) for an even request distribution.
%{{{ fig:crawler_timeline %{{{ fig:crawler_timeline
@ -308,11 +341,11 @@ Those crawlers must be scheduled \(O = \frac{1\si{\request}}{20\si{\request\per
\end{figure} \end{figure}
%}}} fig:crawler_timeline %}}} fig:crawler_timeline
As can be seen in~\autoref{fig:crawler_timeline}, each crawler \(C_0\) to \(C_3\) performs only 5 \si{\request\per 100\second} while overall achieving \(20 \si{\request\per 100\second}\). As can be seen in~\autoref{fig:crawler_timeline}, each crawler \(C_0\) to \(C_3\) performs only \SI{5}{\request\per 100\second} while overall achieving \(\SI{20}{\request\per 100\second}\).
Vice versa given an amount of crawlers \(C\) and a request limit \(L\), the effective frequency \(F\) can be maximized to \(F = C \times L\) without hitting the limit \(L\) and being blocked. Vice versa given an amount of crawlers \(C\) and a request limit \(L\), the effective frequency \(F\) can be maximized to \(F = C \times L\) without hitting the limit \(L\) and being blocked.
Using the example from above with \(L = 5 \si{\request\per 100\second}\) but now only two crawlers \(C = 2\), it is still possible to achieve an effective frequency of \(F = 2 \times 5 \si{\request\per 100\second} = 10 \si{\request\per 100\second}\) and \(O = \frac{1 \si{\request}}{10 \si{\request\per 100\second}} = 10 \si{s}\): Using the example from above with \(L = \SI{5}{\request\per 100\second}\) but now only two crawlers \(C = 2\), it is still possible to achieve an effective frequency of \(F = 2 \times \SI{5}{\request\per 100\second} = \SI{10}{\request\per 100\second}\) and \(O = \frac{\SI{1}{\request}}{\SI{10}{\request\per 100\second}} = \SI{10}{s}\):
%TODO: name %TODO: name
%{{{ fig:crawler_timeline %{{{ fig:crawler_timeline
@ -345,13 +378,13 @@ While the effective frequency of the whole system is halved compared to~\autoref
\todo{sinnvoll?} \todo{sinnvoll?}
\subsection{Working Against Suspicious Graph Metrics} \subsection{Working Against Suspicious Graph Metrics}
\citetitle*{karuppayah_sensorbuster_2017} describes different graph metrics to find sensors in \ac{p2p} botnets. \citetitle*{bib:karuppayah_sensorbuster_2017} describes different graph metrics to find sensors in \ac{p2p} botnets.
One of those, \enquote{SensorBuster} uses \acp{wcc} since crawlers don't have any edges back to the main network in the graph. One of those, \enquote{SensorBuster} uses \acp{wcc} since crawlers don't have any edges back to the main network in the graph.
Building a complete graph \(G_C = K_{\abs{C}}\) between the crawlers by making them return the other crawlers on peer list requests would still produce a disconnected component and while being bigger and maybe not as obvious at first glance, it is still easily detectable since there is no path from \(G_C\) back to the main network (see~\autoref{fig:sensorbuster2} and~\autoref{fig:metrics_table}). Building a complete graph \(G_C = K_{\abs{C}}\) between the crawlers by making them return the other crawlers on peer list requests would still produce a disconnected component and while being bigger and maybe not as obvious at first glance, it is still easily detectable since there is no path from \(G_C\) back to the main network (see~\autoref{fig:sensorbuster2} and~\autoref{fig:metrics_table}).
\todo{rank? deg+ - deg-?} \todo{rank? deg+ - deg-?}
With \(v \in V\), \(\text{rank}(v)\), \(\text{succ}(v)\) being the set of successors of \(v\) and \(\text{pred}(v)\) being the set of predecessors of \(v\), PageRank is defined as~\cite{page_pagerank_1998}: With \(v \in V\), \(\text{rank}(v)\), \(\text{succ}(v)\) being the set of successors of \(v\) and \(\text{pred}(v)\) being the set of predecessors of \(v\), PageRank is defined as~\cite{bib:page_pagerank_1998}:
\[ \[
\text{PR}(v) = \text{dampingFactor} \times \sum\limits_{p \in \text{pred}(v)} \frac{\text{rank}(p)}{\abs{\text{succ}(p)}} + \frac{1 - \text{dampingFactor}}{\abs{V}} \text{PR}(v) = \text{dampingFactor} \times \sum\limits_{p \in \text{pred}(v)} \frac{\text{rank}(p)}{\abs{\text{succ}(p)}} + \frac{1 - \text{dampingFactor}}{\abs{V}}
@ -415,7 +448,7 @@ While this works for small networks, the crawlers must account for a significant
\subsubsection{Excurs: Churn} \subsubsection{Excurs: Churn}
Churn describes the dynamics of peer participation of \ac{p2p} systems, \eg{} join and leave events~\cite{stutzbach_churn_2006}. Churn describes the dynamics of peer participation of \ac{p2p} systems, \eg{} join and leave events~\cite{bib:stutzbach_churn_2006}.
Detecting if a peer just left the system, in combination with knowledge about \acp{as}, peers that just left and came from an \ac{as} with dynamic IP allocation (\eg{} many consumer broadband providers in the US and Europe), can be placed into the crawler's neighbourhood list. Detecting if a peer just left the system, in combination with knowledge about \acp{as}, peers that just left and came from an \ac{as} with dynamic IP allocation (\eg{} many consumer broadband providers in the US and Europe), can be placed into the crawler's neighbourhood list.
If the timing if the churn event correlates with IP rotation in the \ac{as}, it can be assumed, that the peer left due to being assigned a new IP address and not due to connectivity issues or going offline, and will not return using the same IP address. If the timing if the churn event correlates with IP rotation in the \ac{as}, it can be assumed, that the peer left due to being assigned a new IP address and not due to connectivity issues or going offline, and will not return using the same IP address.
These peers, when placed in the neighbourhood list of the crawlers, will introduce paths back into the main network and defeat the \ac{wcc} metric. These peers, when placed in the neighbourhood list of the crawlers, will introduce paths back into the main network and defeat the \ac{wcc} metric.
@ -447,29 +480,44 @@ A new crawler abstraction was created with testability, extensibility and most f
The new implementation consists of three main interfaces: The new implementation consists of three main interfaces:
\begin{itemize} \begin{itemize}
\item \textbf{FindPeer}, to receive new crawl tasks from any source \item \mintinline{go}{FindPeer}, to receive new crawl tasks from any source
\item \textbf{ReportPeer}, to report newly found peers \item \mintinline{go}{ReportPeer}, to report newly found peers
\item \textbf{Protocol}, the actual botnet protocol implementation used to ping a peer and request its neighbourhood list \item \mintinline{go}{Protocol}, the actual botnet protocol implementation used to ping a peer and request its neighbourhood list
\end{itemize} \end{itemize}
Currently there are two sources \textbf{FindPeer} can use: read peers from a file on disk or request them from the \ac{grpc} BMS coordinator. Currently there are two sources \mintinline{go}{FindPeer} can use: read peers from a file on disk or request them from the \ac{grpc} BMS coordinator.
The \textbf{ExactlyOnceFinder} delegate can wrap another \textbf{FindPeer} instance and ensures the source is only requested once. The \mintinline{go}{ExactlyOnceFinder} delegate can wrap another \mintinline{go}{FindPeer} instance and ensures the source is only requested once.
This is used to implement the bootstrapping mechanism of the old crawler, where once, when the crawler is started, the list of bootstrap nodes is loaded from a textfile. This is used to implement the bootstrapping mechanism of the old crawler, where once, when the crawler is started, the list of bootstrap nodes is loaded from a textfile.
\textbf{CombinedFinder} can combine any amount of \textbf{FindPeer} instances and will return the sum of requesting all the sources. \mintinline{go}{CombinedFinder} can combine any amount of \mintinline{go}{FindPeer} instances and will return the sum of requesting all the sources.
The \textbf{PeerTask} instances returned by \textbf{FindPeer} contain the IP address and port of the peer, if the crawler should start or stop the operation, when to start and stop crawling and in which interval the peer should be crawled. The \mintinline{go}{PeerTask} instances returned by \mintinline{go}{FindPeer} contain the IP address and port of the peer, if the crawler should start or stop the operation, when to start and stop crawling and in which interval the peer should be crawled.
For each task, a \textbf{CrawlPeer} and \textbf{PingPeer} worker is started or stopped as specified in the received \textbf{PeerTask}. For each task, a \mintinline{go}{CrawlPeer} and \mintinline{go}{PingPeer} worker is started or stopped as specified in the received \mintinline{go}{PeerTask}.
These tasks use the \textbf{ReportPeer} interface to report any new peer that is found. These tasks use the \mintinline{go}{ReportPeer} interface to report any new peer that is found.
Current report possibilities are \textbf{LoggingReport} to simply log new peers to get feedback from the crawler at runtime, and \textbf{BMSReport} which reports back to \ac{bms}. Current report possibilities are \mintinline{go}{LoggingReport} to simply log new peers to get feedback from the crawler at runtime, and \mintinline{go}{BMSReport} which reports back to \ac{bms}.
\textbf{BatchedReport} delegates a \textbf{ReportPeer} instance and batch newly found peers up to a specified batch size and only then flush and actually report. \mintinline{go}{BatchedReport} delegates a \mintinline{go}{ReportPeer} instance and batch newly found peers up to a specified batch size and only then flush and actually report.
\textbf{AutoCommitReport} will automatically flush a delegated \textbf{ReportPeer} instance after a fixed amount of time and is used in combination with \textbf{BatchedReport} to ensure the batches are written regularly, even if the batch limit is not reached yet. \mintinline{go}{AutoCommitReport} will automatically flush a delegated \mintinline{go}{ReportPeer} instance after a fixed amount of time and is used in combination with \mintinline{go}{BatchedReport} to ensure the batches are written regularly, even if the batch limit is not reached yet.
\textbf{CombinedReport} works analogous to \textbf{CombinedFinder} and combines many \textbf{ReportPeer} instances into one. \mintinline{go}{CombinedReport} works analogous to \mintinline{go}{CombinedFinder} and combines many \mintinline{go}{ReportPeer} instances into one.
\textbf{PingPeer} and \textbf{CrawlPeer} use the implementation of the botnet \textbf{Protocol} to perform the actual crawling in predefined intervals, which can be overwritten on a per \textbf{PeerTask} basis. \mintinline{go}{PingPeer} and \mintinline{go}{CrawlPeer} use the implementation of the botnet \mintinline{go}{Protocol} to perform the actual crawling in predefined intervals, which can be overwritten on a per \mintinline{go}{PeerTask} basis.
%}}} implementation %}}} implementation
%{{{ acknowledgments
\section*{Acknowledgments}
In the end, I would like to thank
\begin{itemize}
\item Prof.\ Dr.\ Christoph Skornia for being a helpful supervisor in this and earlier works of mine
\item Leon Böck for offering the possibility to work on this research project, regular feedback and technical expertise
\item Valentin Sundermann for being available for helpful ad-hoc discussions at any time of day for many years
\end{itemize}
%}}} acknowledgments
% vim: set filetype=tex ts=2 sw=2 tw=0 et foldmethod=marker spell : % vim: set filetype=tex ts=2 sw=2 tw=0 et foldmethod=marker spell :

BIN
references/2012_DGA.pdf Normal file

Binary file not shown.

Binary file not shown.

BIN
references/2013_p2pwned.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -50,9 +50,19 @@ headsepline,
\usepackage{lmodern,xpatch} \usepackage{lmodern,xpatch}
\usepackage[autostyle]{csquotes} \usepackage[autostyle]{csquotes}
% formatting numbers
\usepackage{nicefrac}
% units % units
\usepackage{siunitx} \usepackage{siunitx}
\DeclareSIUnit \request{req} \sisetup{%
group-separator={,},
group-minimum-digits=5,
range-phrase={\text{\ensuremath{-}}},
per-mode = fraction,
fraction-function=\nicefrac,
}
\DeclareSIUnit{\request}{req}
\DeclareSIUnit{\bit}{Bit}
% images % images
\usepackage{graphicx} \usepackage{graphicx}
@ -61,6 +71,7 @@ headsepline,
% acronyms % acronyms
\usepackage{acro} \usepackage{acro}
\acsetup{single}
\include{acronyms} \include{acronyms}
% bibliography % bibliography

View File

@ -19,6 +19,7 @@ let
latexmk latexmk
siunitx siunitx
todonotes todonotes
units
# code listings # code listings
minted minted