diff options
Diffstat (limited to 'extra/dot/levels_inbound.py')
-rw-r--r-- | extra/dot/levels_inbound.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/extra/dot/levels_inbound.py b/extra/dot/levels_inbound.py new file mode 100644 index 0000000..290ab6f --- /dev/null +++ b/extra/dot/levels_inbound.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +""" +Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. +""" + +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search +import sys +import requests + +seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" +max_nodes = 50 + +client = Elasticsearch("http://localhost:9200") +S = Search(using=client, index="fatcat_ref_v01") + +def pretty_label(s, k=10): + parts = s.split() + cur, result =[], [] + for p in parts: + if len(" ".join(cur)) < k: + cur.append(p) + else: + result.append(" ".join(cur)) + cur = [] + result.append(" ".join(cur)) + return "\\n".join(result) + +def get_dot_label(ident): + title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") + return pretty_label(title, k=10) + + +def unique_sources(ident): + """ + Unique inbound references. + """ + s = S.query("match", target_release_ident=ident) + unique_source_idents = set() + for hit in s.scan(): + unique_source_idents.add(hit["source_release_ident"]) + return list(unique_source_idents) + +queue = set([seed]) +i = 0 +edges = set() +while queue: + node = queue.pop() + i += 1 + if i == max_nodes: + break + for n in unique_sources(node)[:5]: + if n == node: + print("skipping self ref: {}".format(n), file=sys.stderr) + continue + edges.add((node, n)) + queue.add(n) + +nodes = set() +for a, b in edges: + nodes.add(a) + nodes.add(b) + +print("digraph G {") +# add sensible labels +for n in nodes: + print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + +for a, b in edges: + print(""" "{}" -> "{}"; """.format(a, b)) +print("}") + |