diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-04-19 20:21:10 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-04-19 20:29:17 +0200 |
commit | 7b66e7d1c658a26b8dc90ab8a3f7838a6b97870b (patch) | |
tree | 2bff086a98dbaea00dc881bed61fcd6e93c5d093 /extra | |
parent | bb87a43e838ea7003df2ab1ac4ba6602f0f52b00 (diff) | |
download | refcat-7b66e7d1c658a26b8dc90ab8a3f7838a6b97870b.tar.gz refcat-7b66e7d1c658a26b8dc90ab8a3f7838a6b97870b.zip |
update script
Diffstat (limited to 'extra')
-rw-r--r-- | extra/dot/levels.py | 44 |
1 files changed, 34 insertions, 10 deletions
diff --git a/extra/dot/levels.py b/extra/dot/levels.py index ff1af27..96af9c4 100644 --- a/extra/dot/levels.py +++ b/extra/dot/levels.py @@ -7,13 +7,31 @@ Take a document fatcat id and emit the citation structure up to X levels as grap from elasticsearch import Elasticsearch from elasticsearch_dsl import Search import sys +import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" -max_nodes = 50 +max_nodes = 30 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") +def pretty_label(s, k=10): + parts = s.split() + cur, result =[], [] + for p in parts: + if len(" ".join(cur)) < k: + cur.append(p) + else: + result.append(" ".join(cur)) + cur = [] + result.append(" ".join(cur)) + return "\\n".join(result) + +def get_dot_label(ident): + title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") + return pretty_label(title, k=10) + + def unique_targets(ident): s = S.query("match", source_release_ident=ident) unique_target_idents = set() @@ -29,18 +47,24 @@ while queue: i += 1 if i == max_nodes: break - for n in unique_targets(node): + for n in unique_targets(node)[:5]: + if n == node: + print("skipping self ref: {}".format(n), file=sys.stderr) + continue edges.add((node, n)) queue.add(n) +nodes = set() +for a, b in edges: + nodes.add(a) + nodes.add(b) + print("digraph G {") +# add sensible labels +for n in nodes: + print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + for a, b in edges: - print("{} -> {};".format(a, b)) + print(""" "{}" -> "{}"; """.format(a, b)) print("}") -# print(edges) -# s = S.query("match", source_release_ident=seed) -# for hit in s.scan(): -# print(hit["target_release_ident"]) -# k = S.query("match", source_release_ident=hit["target_release_ident"]) -# for h in k.scan(): -# print("\t{}".format(h["target_release_ident"])) + |