diff options
Diffstat (limited to 'extra/dot/levels.py')
-rw-r--r-- | extra/dot/levels.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/extra/dot/levels.py b/extra/dot/levels.py new file mode 100644 index 0000000..ff1af27 --- /dev/null +++ b/extra/dot/levels.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +""" +Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. +""" + +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search +import sys + +seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" +max_nodes = 50 + +client = Elasticsearch("http://localhost:9200") +S = Search(using=client, index="fatcat_ref_v01") + +def unique_targets(ident): + s = S.query("match", source_release_ident=ident) + unique_target_idents = set() + for hit in s.scan(): + unique_target_idents.add(hit["target_release_ident"]) + return list(unique_target_idents) + +queue = set([seed]) +i = 0 +edges = set() +while queue: + node = queue.pop() + i += 1 + if i == max_nodes: + break + for n in unique_targets(node): + edges.add((node, n)) + queue.add(n) + +print("digraph G {") +for a, b in edges: + print("{} -> {};".format(a, b)) +print("}") +# print(edges) +# s = S.query("match", source_release_ident=seed) +# for hit in s.scan(): +# print(hit["target_release_ident"]) +# k = S.query("match", source_release_ident=hit["target_release_ident"]) +# for h in k.scan(): +# print("\t{}".format(h["target_release_ident"])) |