aboutsummaryrefslogtreecommitdiffstats
path: root/extra/dot/levels_inbound.py
blob: 290ab6f8787b9d83786eec023a1663a6481feacb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python

"""
Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
"""

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import sys
import requests

seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
max_nodes = 50

client = Elasticsearch("http://localhost:9200")
S = Search(using=client, index="fatcat_ref_v01")

def pretty_label(s, k=10):
    parts = s.split()
    cur, result =[], []
    for p in parts:
        if len(" ".join(cur)) < k:
            cur.append(p)
        else:
            result.append(" ".join(cur))
            cur = []
    result.append(" ".join(cur))
    return "\\n".join(result)

def get_dot_label(ident):
    title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title")
    return pretty_label(title, k=10)


def unique_sources(ident):
    """
    Unique inbound references.
    """
    s = S.query("match", target_release_ident=ident)
    unique_source_idents = set()
    for hit in s.scan():
        unique_source_idents.add(hit["source_release_ident"])
    return list(unique_source_idents)

queue = set([seed])
i = 0
edges = set()
while queue:
    node = queue.pop()
    i += 1
    if i == max_nodes:
        break
    for n in unique_sources(node)[:5]:
        if n == node:
            print("skipping self ref: {}".format(n), file=sys.stderr)
            continue
        edges.add((node, n))
        queue.add(n)

nodes = set()
for a, b in edges:
    nodes.add(a)
    nodes.add(b)

print("digraph G {")
# add sensible labels
for n in nodes:
    print(""" "{}"  [label="{}"]; """.format(n, get_dot_label(n)))

for a, b in edges:
    print(""" "{}" -> "{}"; """.format(a, b))
print("}")