Updates

2022-04-21 12:15:45 +02:00 · 2022-04-21 12:15:45 +02:00 · 49e45e147f
commit 49e45e147f
parent aef24e4d69
5 changed files with 154 additions and 40 deletions
--- a/codes/node-ranking/plot_reduced.py
+++ b/codes/node-ranking/plot_reduced.py
@ -82,7 +82,7 @@ def plot(percentage, added_percentage, algo, data):
    # plt.show()


-def main():
+def main2():
    for reduced_percentage in reduce_edges.percentages:
        perc = reduce_edges.percentages.copy()
        perc.append(1.0)
@ -99,5 +99,92 @@ def main():
            plot_in_out(reduced_percentage, added_percentage, data)


+
+
+def plot2(percentage, algo, data):
+    times = []
+    mean = []
+    stdev = []
+    a_avg_in = []
+    known_rank = []
+    known_in = []
+    known_out = []
+    for when, d in sorted(data.items(), key=lambda kv: kv[0]):
+        times.append(when)
+        mean.append(d[algo]['mean'])
+        stdev.append(d[algo]['stdev'])
+        a_avg_in.append(d[algo]['avg_in'])
+        known_rank.append(d[algo]['known_rank'])
+        known_in.append(d[algo]['known_in'])
+        known_out.append(d[algo]['known_out'])
+
+    # avg_out = sum(known_out) / len(known_out)
+    # avg_in = sum(known_in) / len(known_in)
+
+    fig, ax = plt.subplots()
+    a = 'SensorRank' if algo == 'sr' else 'RageRank'
+    ax.set_ylabel(f'{a}')
+    ax.plot(times, mean, label='Avg. Rank')
+    # ax.errorbar(times, mean, stdev, label='Avg. Rank')
+    ax.plot(times, mean, label='Avg. Rank')
+    # ax.plot(times, known_in, label='Known In') # TODO
+    ax.plot(times, known_rank, label='Known Rank')
+    # title = f'{a} after removing {percentage * 100}% edges and adding {added_percentage * 100}%\nin = {avg_in:.02f} out = {avg_out:.02f}'
+    title = f'{a} after removing {percentage * 100}% edges'
+    ax.set_title(title)
+
+    fig.autofmt_xdate()
+    fig.legend()
+    path = f'./plot_reduced/{percentage:.02f}/{algo}.png'
+    with reduce_edges.open_mkdir(path, 'w'):
+        print('created')
+    plt.savefig(path)
+    plt.close(fig)
+
+
+def plot_in_out2(percentage, data):
+    times = []
+    avg_in = []
+    known_in = []
+    known_out = []
+    # same value, independent of algo
+    algo = 'sr'
+    for when, d in sorted(data.items(), key=lambda kv: kv[0]):
+        times.append(when)
+        avg_in.append(d[algo]['avg_in'])
+        known_in.append(d[algo]['known_in'])
+        known_out.append(d[algo]['known_out'])
+
+    fig, ax = plt.subplots()
+    a = 'SensorRank' if algo == 'sr' else 'RageRank'
+    ax.set_ylabel(f'{a}')
+    ax.plot(times, avg_in, label='Avg. In')
+    # ax.plot(times, known_in, label='Known In') # TODO
+    ax.plot(times, known_in, label='Known In')
+    ax.plot(times, known_out, label='Known out')
+    title = f'In And Out after removing {percentage * 100}% edges'
+    ax.set_title(title)
+
+    fig.autofmt_xdate()
+    fig.legend()
+    path = f'./plot_reduced/{percentage:.02f}/in_out.png'
+    with reduce_edges.open_mkdir(path, 'w'):
+        print('created')
+    plt.savefig(path)
+    plt.close(fig)
+
+
+def main():
+    for perc in reduce_edges.percentages:
+        data = {}
+        for file in glob.glob(f'./data_reduced/{perc:.02f}/*.json'):
+            when = datetime.fromtimestamp(float(file.split('/')[-1][:-5]))
+            print(f'{perc=:.02f}, {when=}')
+            data[when] = load_json(file)
+
+        plot2(perc, 'sr', data)
+        plot2(perc, 'pr', data)
+        plot_in_out2(perc, data)
+
 if __name__ == '__main__':
    main()
--- a/codes/node-ranking/rank_reduced.py
+++ b/codes/node-ranking/rank_reduced.py
@ -94,7 +94,7 @@ def create_crawlers(graph, n_crawlers, n_edges):



-def rank(path, added_percentage):
+def rank(path):
    edges = reduce_edges.load_data(path)
    g = build_graph(edges, initial_rank=rank_with_churn.INITIAL_RANK)

@ -103,13 +103,13 @@ def rank(path, added_percentage):
    # print(f'removing {len(for_removal)} incoming edges')
    # for edge in for_removal:
    #     g.remove_edge(edge[0], edge[1])
-    n_known_in = len(list(filter(lambda e: e[1] == rank_with_churn.KNOWN, g.edges())))
-    # avg_out = rank_with_churn.avg_out(g)
-    churned_peers = int(n_known_in * added_percentage)
-    known_pred = len(list(g.predecessors(rank_with_churn.KNOWN)))
-    c_out = int(known_pred * added_percentage)
-    crawlers = create_crawlers(g, churned_peers, c_out)
-    print(f'{added_percentage=}, {churned_peers=}')
+    # n_known_in = len(list(filter(lambda e: e[1] == rank_with_churn.KNOWN, g.edges())))
+    # # avg_out = rank_with_churn.avg_out(g)
+    # churned_peers = int(n_known_in * added_percentage)
+    # known_pred = len(list(g.predecessors(rank_with_churn.KNOWN)))
+    # c_out = int(known_pred * added_percentage)
+    # crawlers = create_crawlers(g, churned_peers, c_out)
+    # print(f'{added_percentage=}, {churned_peers=}')
    # assert added_percentage == 0 or churned_peers != 0

    # if churned_peers > 0:
@ -117,9 +117,9 @@ def rank(path, added_percentage):
    #     # destinations = sample(nodes, churned_peers)
    #     destinations = high_succ(g, churned_peers, rank_with_churn.KNOWN)
        # print(f'!!!!!! adding destinations: {destinations}')
-    print(f'adding {len(crawlers)=} crawlers with {c_out=} successors')
-    for node in crawlers:
-        g.add_edge(rank_with_churn.KNOWN, node)
+    # print(f'adding {len(crawlers)=} crawlers with {c_out=} successors')
+    # for node in crawlers:
+    #     g.add_edge(rank_with_churn.KNOWN, node)

    print('pr start')
    g_pr = page_rank(page_rank(g))
@ -136,7 +136,7 @@ def rank(path, added_percentage):
 def main():
    # pool = multiprocessing.Pool(processes=4)
    params = []
-    for reduced_percentage in [0.0]: #in reduce_edges.percentages:
+    for reduced_percentage in reduce_edges.percentages:
        for file in glob.glob(f'./edges_reduced/{reduced_percentage:.02f}/*.txt'):
            params.append([reduced_percentage, file])
            # p = Proc(reduced_percentage, file)
@ -151,29 +151,29 @@ def main():
        l_path_data = pool.map(wohoo, params)
        for path_data in l_path_data:
            for path, data in path_data.items():
-                with open(path, 'w') as f:
+                with reduce_edges.open_mkdir(path, 'w') as f:
                    json.dump(data, f)


 def wohoo(p):
-    reduced_percentage = p[0]
-    file = p[1]
-    path_data = {}
+    reduced_percentage, file = p
+    # path_data = {}
    # ps = reduce_edges.percentages.copy()
-    ps = [0.3, 0.5, 0.75]
-    ps.append(0.1)
-    ps.append(1.0)
-    ps.append(1.2)
+    # ps = [0.3, 0.5, 0.75]
+    # ps.append(0.1)
+    # ps.append(1.0)
+    # ps.append(1.2)
    # ps.append(2.0)
-    for added_percentage in ps:
-        when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
-        print(f'{reduced_percentage=:.02f}, {added_percentage=:.02f}, {when=}')
-        result = rank(file, added_percentage)
-        path = f'./data_reduced/{reduced_percentage:.02f}/{added_percentage:.02f}/{when.timestamp()}.json'
-        path_data[path] = result
+    # for added_percentage in ps:
+    #     print(f'{reduced_percentage=:.02f}, {added_percentage=:.02f}, {when=}')
+    #     result = rank(file, added_percentage)
+    #     path_data[path] = result
        # with open() as f:
        #     json.dump(result, f)
-    return path_data
+    when = datetime.fromtimestamp(float(file.split('/')[-1][:-4]))
+    path = f'./data_reduced/{reduced_percentage:.02f}/{when.timestamp()}.json'
+    result = rank(file)
+    return {path: result}


 if __name__ == '__main__':
--- a/codes/node-ranking/reduce_edges.py
+++ b/codes/node-ranking/reduce_edges.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python3

+import os
+import multiprocessing
 from datetime import datetime
 from random import sample, seed
 import rank_with_churn
@ -15,10 +17,15 @@ from node_ranking import (
    # RankedNode,
 )

+
+def open_mkdir(path, mode):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    return open(path, mode)
+
 def load_data(path):
    data = []
    with open(path, 'r') as f:
-        for line in f.readlines():
+        for line in f:
            data.append(parse_csv(line))

    return data
@ -26,26 +33,46 @@ def load_data(path):
 def edges_from(g, node):
    return list(filter(lambda e: e[1] == node, g.edges()))

-def remove_edges(path, percentage):
+def remove_edges(path, edges, percentage):
    when = datetime.fromtimestamp(float(path.split('/')[-1][:-4]))
-    print(f'{when=}, {percentage=}')
-    edges = load_data(path)
+    log = f'{when=}, {percentage=}'
+    print(log)
+    # edges = load_data(path)
    g = build_graph(edges)
    edges = edges_from(g, rank_with_churn.KNOWN)
    for_removal = sample(edges, int(len(edges) * percentage))
    for edge in for_removal:
        g.remove_edge(edge[0], edge[1])
-    with open(f'./edges_reduced/{percentage:.02f}/{when.timestamp()}.txt', 'w') as f:
+    path = f'./edges_reduced/{percentage:.02f}/{when.timestamp()}.txt'
+    rows = []
+    with open_mkdir(path, 'w') as f:
        for [s, d] in g.edges():
-            row = f'{s.node.ip},{s.node.port},{d.node.ip},{d.node.port}\n'
-            f.write(row)
+            f.write(f'{s.node.ip},{s.node.port},{d.node.ip},{d.node.port}\n')
+        # f.write(row)
+    return f'done: {log}'

-percentages = [0.0, 0.3, 0.5, 0.75]
+
+def work(params):
+    # print(f'starting work {params=}')
+    path, edges, percentage = params
+    remove_edges(path, edges, percentage)
+
+
+percentages = [0.0, 0.1, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9]
 # percentages = [0.0]
 def main():
-    for percentage in percentages:
+    params = []
    for file in glob.glob('./edges/*.txt'):
-            remove_edges(file, percentage)
+        edges = load_data(file)
+        for percentage in percentages:
+            params.append([file, edges, percentage])
+            # remove_edges(file, percentage)
+
+    print('created params')
+    with multiprocessing.Pool(processes=8) as pool:
+        res = pool.map(work, params)
+        for r in res:
+            print(r)


 if __name__ == '__main__':
--- a/content.tex
+++ b/content.tex
@ -603,7 +603,7 @@ The following candidates to place on the neighbor list will be investigated:

 \subsubsection{Other Sensors or Crawlers}

-Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_\abs{C}\) among the workers, creates valid outgoing edges.
+Returning all the other sensors when responding to peer list requests, thereby effectively creating a complete graph \(K_{\abs{C}}\) among the workers, creates valid outgoing edges.
 The resulting graph will still form a \ac{wcc} with now edges back into the main network.

 PageRank is the sum of a node's predecessors ranks divided by the amount of successors each predecessor's successors.
--- a/report.pdf
+++ b/report.pdf