aboutsummaryrefslogtreecommitdiffstats
path: root/extra/dot/levels.py
diff options
context:
space:
mode:
Diffstat (limited to 'extra/dot/levels.py')
-rw-r--r--extra/dot/levels.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/extra/dot/levels.py b/extra/dot/levels.py
new file mode 100644
index 0000000..ff1af27
--- /dev/null
+++ b/extra/dot/levels.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+"""
+Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
+"""
+
+from elasticsearch import Elasticsearch
+from elasticsearch_dsl import Search
+import sys
+
+seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
+max_nodes = 50
+
+client = Elasticsearch("http://localhost:9200")
+S = Search(using=client, index="fatcat_ref_v01")
+
+def unique_targets(ident):
+ s = S.query("match", source_release_ident=ident)
+ unique_target_idents = set()
+ for hit in s.scan():
+ unique_target_idents.add(hit["target_release_ident"])
+ return list(unique_target_idents)
+
+queue = set([seed])
+i = 0
+edges = set()
+while queue:
+ node = queue.pop()
+ i += 1
+ if i == max_nodes:
+ break
+ for n in unique_targets(node):
+ edges.add((node, n))
+ queue.add(n)
+
+print("digraph G {")
+for a, b in edges:
+ print("{} -> {};".format(a, b))
+print("}")
+# print(edges)
+# s = S.query("match", source_release_ident=seed)
+# for hit in s.scan():
+# print(hit["target_release_ident"])
+# k = S.query("match", source_release_ident=hit["target_release_ident"])
+# for h in k.scan():
+# print("\t{}".format(h["target_release_ident"]))