aboutsummaryrefslogtreecommitdiffstats
path: root/extra/dot/levels.py
blob: ab4bf85b41d8ede1094c3611e5449c27b3f42e33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
"""
Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.

    $ python levels.py [RELEASE IDENT]

Throwaway script, expected to be run on the same machine as the ref index currently.
"""

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import sys
import requests

seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
max_nodes_total = 30
max_nodes = 5

client = Elasticsearch("http://localhost:9200")
S = Search(using=client, index="fatcat_ref_v01")


def pretty_label(s, k=10):
    parts = s.split()
    cur, result = [], []
    for p in parts:
        if len(" ".join(cur)) < k:
            cur.append(p)
        else:
            result.append(" ".join(cur))
            cur = []
    result.append(" ".join(cur))
    return "\\n".join(result)


def get_dot_label(ident):
    title = (requests.get("https://api.fatcat.wiki/v0/release/{}".format(
        ident)).json().get("title"))
    return pretty_label(title, k=10)


def unique_targets(ident):
    s = S.query("match", source_release_ident=ident)
    unique_target_idents = set()
    for hit in s.scan():
        unique_target_idents.add(hit["target_release_ident"])
    return list(unique_target_idents)


if __name__ == "__main__":
    queue = set([seed])
    edges = set()
    i = 0
    while queue:
        node = queue.pop()
        i += 1
        if i == max_nodes_total:
            break
        for n in unique_targets(node)[:max_nodes]:
            if n == node:
                print("skipping self ref: {}".format(n), file=sys.stderr)
                continue
            edges.add((node, n))
            queue.add(n)

    nodes = set()
    for a, b in edges:
        nodes.add(a)
        nodes.add(b)

    print("digraph G {")
    # add sensible labels
    for n in nodes:
        print(""" "{}"  [label="{}"]; """.format(n, get_dot_label(n)))

    for a, b in edges:
        print(""" "{}" -> "{}"; """.format(a, b))
    print("}")