aboutsummaryrefslogtreecommitdiffstats
path: root/extra/dot/levels_inbound.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-21 17:49:12 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-21 17:49:12 +0200
commit3cb5513cb405af78a01750a29a93be28ac5d90e4 (patch)
tree46774566984606a113a17803da81f3b14ada742f /extra/dot/levels_inbound.py
parentae9e380225be648ced23d814cd1d08d1621976bd (diff)
downloadrefcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.tar.gz
refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.zip
wip: a few dot examples
Diffstat (limited to 'extra/dot/levels_inbound.py')
-rw-r--r--extra/dot/levels_inbound.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/extra/dot/levels_inbound.py b/extra/dot/levels_inbound.py
new file mode 100644
index 0000000..290ab6f
--- /dev/null
+++ b/extra/dot/levels_inbound.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+"""
+Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
+"""
+
+from elasticsearch import Elasticsearch
+from elasticsearch_dsl import Search
+import sys
+import requests
+
+seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
+max_nodes = 50
+
+client = Elasticsearch("http://localhost:9200")
+S = Search(using=client, index="fatcat_ref_v01")
+
+def pretty_label(s, k=10):
+ parts = s.split()
+ cur, result =[], []
+ for p in parts:
+ if len(" ".join(cur)) < k:
+ cur.append(p)
+ else:
+ result.append(" ".join(cur))
+ cur = []
+ result.append(" ".join(cur))
+ return "\\n".join(result)
+
+def get_dot_label(ident):
+ title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title")
+ return pretty_label(title, k=10)
+
+
+def unique_sources(ident):
+ """
+ Unique inbound references.
+ """
+ s = S.query("match", target_release_ident=ident)
+ unique_source_idents = set()
+ for hit in s.scan():
+ unique_source_idents.add(hit["source_release_ident"])
+ return list(unique_source_idents)
+
+queue = set([seed])
+i = 0
+edges = set()
+while queue:
+ node = queue.pop()
+ i += 1
+ if i == max_nodes:
+ break
+ for n in unique_sources(node)[:5]:
+ if n == node:
+ print("skipping self ref: {}".format(n), file=sys.stderr)
+ continue
+ edges.add((node, n))
+ queue.add(n)
+
+nodes = set()
+for a, b in edges:
+ nodes.add(a)
+ nodes.add(b)
+
+print("digraph G {")
+# add sensible labels
+for n in nodes:
+ print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n)))
+
+for a, b in edges:
+ print(""" "{}" -> "{}"; """.format(a, b))
+print("}")
+