#!/usr/bin/env python """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. $ python levels.py [RELEASE IDENT] Throwaway script, expected to be run on the same machine as the ref index currently. """ from elasticsearch import Elasticsearch from elasticsearch_dsl import Search import sys import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" max_nodes_total = 30 max_nodes = 5 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") def pretty_label(s, k=10): parts = s.split() cur, result = [], [] for p in parts: if len(" ".join(cur)) < k: cur.append(p) else: result.append(" ".join(cur)) cur = [] result.append(" ".join(cur)) return "\\n".join(result) def get_dot_label(ident): title = (requests.get("https://api.fatcat.wiki/v0/release/{}".format( ident)).json().get("title")) return pretty_label(title, k=10) def unique_targets(ident): s = S.query("match", source_release_ident=ident) unique_target_idents = set() for hit in s.scan(): unique_target_idents.add(hit["target_release_ident"]) return list(unique_target_idents) if __name__ == "__main__": queue = set([seed]) edges = set() i = 0 while queue: node = queue.pop() i += 1 if i == max_nodes_total: break for n in unique_targets(node)[:max_nodes]: if n == node: print("skipping self ref: {}".format(n), file=sys.stderr) continue edges.add((node, n)) queue.add(n) nodes = set() for a, b in edges: nodes.add(a) nodes.add(b) print("digraph G {") # add sensible labels for n in nodes: print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) for a, b in edges: print(""" "{}" -> "{}"; """.format(a, b)) print("}")