#!/usr/bin/env python """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. """ from elasticsearch import Elasticsearch from elasticsearch_dsl import Search import sys import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" max_nodes = 50 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") def pretty_label(s, k=10): parts = s.split() cur, result =[], [] for p in parts: if len(" ".join(cur)) < k: cur.append(p) else: result.append(" ".join(cur)) cur = [] result.append(" ".join(cur)) return "\\n".join(result) def get_dot_label(ident): title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") return pretty_label(title, k=10) def unique_sources(ident): """ Unique inbound references. """ s = S.query("match", target_release_ident=ident) unique_source_idents = set() for hit in s.scan(): unique_source_idents.add(hit["source_release_ident"]) return list(unique_source_idents) queue = set([seed]) i = 0 edges = set() while queue: node = queue.pop() i += 1 if i == max_nodes: break for n in unique_sources(node)[:5]: if n == node: print("skipping self ref: {}".format(n), file=sys.stderr) continue edges.add((node, n)) queue.add(n) nodes = set() for a, b in edges: nodes.add(a) nodes.add(b) print("digraph G {") # add sensible labels for n in nodes: print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) for a, b in edges: print(""" "{}" -> "{}"; """.format(a, b)) print("}")