#!/usr/bin/env python """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. """ from elasticsearch import Elasticsearch from elasticsearch_dsl import Search import sys seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" max_nodes = 50 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") def unique_targets(ident): s = S.query("match", source_release_ident=ident) unique_target_idents = set() for hit in s.scan(): unique_target_idents.add(hit["target_release_ident"]) return list(unique_target_idents) queue = set([seed]) i = 0 edges = set() while queue: node = queue.pop() i += 1 if i == max_nodes: break for n in unique_targets(node): edges.add((node, n)) queue.add(n) print("digraph G {") for a, b in edges: print("{} -> {};".format(a, b)) print("}") # print(edges) # s = S.query("match", source_release_ident=seed) # for hit in s.scan(): # print(hit["target_release_ident"]) # k = S.query("match", source_release_ident=hit["target_release_ident"]) # for h in k.scan(): # print("\t{}".format(h["target_release_ident"]))