diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-03 00:15:20 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-03 00:15:20 +0200 |
commit | c63436986ba0c36671974fe74c48d0d93727caf3 (patch) | |
tree | e2aadc96be3bd186a3e8705854464d9ac0cc8440 /extra/dot/levels_inbound.py | |
parent | df839db51aab1288e4b4ef36c8323e21662d0141 (diff) | |
download | refcat-c63436986ba0c36671974fe74c48d0d93727caf3.tar.gz refcat-c63436986ba0c36671974fe74c48d0d93727caf3.zip |
update notes on dot
Diffstat (limited to 'extra/dot/levels_inbound.py')
-rw-r--r-- | extra/dot/levels_inbound.py | 64 |
1 files changed, 35 insertions, 29 deletions
diff --git a/extra/dot/levels_inbound.py b/extra/dot/levels_inbound.py index 290ab6f..9a5d398 100644 --- a/extra/dot/levels_inbound.py +++ b/extra/dot/levels_inbound.py @@ -1,7 +1,8 @@ #!/usr/bin/env python - """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. + +Copy-pasted from levels.py - throwaway code. """ from elasticsearch import Elasticsearch @@ -10,14 +11,16 @@ import sys import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" -max_nodes = 50 +max_nodes_total = 50 +max_nodes = 5 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") + def pretty_label(s, k=10): parts = s.split() - cur, result =[], [] + cur, result = [], [] for p in parts: if len(" ".join(cur)) < k: cur.append(p) @@ -27,8 +30,10 @@ def pretty_label(s, k=10): result.append(" ".join(cur)) return "\\n".join(result) + def get_dot_label(ident): - title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") + title = requests.get("https://api.fatcat.wiki/v0/release/{}".format( + ident)).json().get("title") return pretty_label(title, k=10) @@ -42,32 +47,33 @@ def unique_sources(ident): unique_source_idents.add(hit["source_release_ident"]) return list(unique_source_idents) -queue = set([seed]) -i = 0 -edges = set() -while queue: - node = queue.pop() - i += 1 - if i == max_nodes: - break - for n in unique_sources(node)[:5]: - if n == node: - print("skipping self ref: {}".format(n), file=sys.stderr) - continue - edges.add((node, n)) - queue.add(n) -nodes = set() -for a, b in edges: - nodes.add(a) - nodes.add(b) +if __name__ == '__main__': + queue = set([seed]) + edges = set() + i = 0 + while queue: + node = queue.pop() + i += 1 + if i == max_nodes_total: + break + for n in unique_sources(node)[:max_nodes]: + if n == node: + print("skipping self ref: {}".format(n), file=sys.stderr) + continue + edges.add((node, n)) + queue.add(n) -print("digraph G {") -# add sensible labels -for n in nodes: - print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + nodes = set() + for a, b in edges: + nodes.add(a) + nodes.add(b) -for a, b in edges: - print(""" "{}" -> "{}"; """.format(a, b)) -print("}") + print("digraph G {") + # add sensible labels + for n in nodes: + print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + for a, b in edges: + print(""" "{}" -> "{}"; """.format(a, b)) + print("}") |