From c63436986ba0c36671974fe74c48d0d93727caf3 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sat, 3 Jul 2021 00:15:20 +0200 Subject: update notes on dot --- extra/dot/README.md | 5 ++++ extra/dot/levels.py | 72 +++++++++++++++++++++++++-------------------- extra/dot/levels_inbound.py | 64 ++++++++++++++++++++++------------------ 3 files changed, 80 insertions(+), 61 deletions(-) (limited to 'extra/dot') diff --git a/extra/dot/README.md b/extra/dot/README.md index 6914dc1..bad41b8 100644 --- a/extra/dot/README.md +++ b/extra/dot/README.md @@ -2,3 +2,8 @@ Examples using ref index plus fatcat API to generate citation graph snippets via graphviz. + +![](example.png) + +![](example_inbound.png) + diff --git a/extra/dot/levels.py b/extra/dot/levels.py index 96af9c4..ab4bf85 100644 --- a/extra/dot/levels.py +++ b/extra/dot/levels.py @@ -1,7 +1,10 @@ #!/usr/bin/env python - """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. + + $ python levels.py [RELEASE IDENT] + +Throwaway script, expected to be run on the same machine as the ref index currently. """ from elasticsearch import Elasticsearch @@ -10,14 +13,16 @@ import sys import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" -max_nodes = 30 +max_nodes_total = 30 +max_nodes = 5 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") + def pretty_label(s, k=10): parts = s.split() - cur, result =[], [] + cur, result = [], [] for p in parts: if len(" ".join(cur)) < k: cur.append(p) @@ -27,8 +32,10 @@ def pretty_label(s, k=10): result.append(" ".join(cur)) return "\\n".join(result) + def get_dot_label(ident): - title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") + title = (requests.get("https://api.fatcat.wiki/v0/release/{}".format( + ident)).json().get("title")) return pretty_label(title, k=10) @@ -39,32 +46,33 @@ def unique_targets(ident): unique_target_idents.add(hit["target_release_ident"]) return list(unique_target_idents) -queue = set([seed]) -i = 0 -edges = set() -while queue: - node = queue.pop() - i += 1 - if i == max_nodes: - break - for n in unique_targets(node)[:5]: - if n == node: - print("skipping self ref: {}".format(n), file=sys.stderr) - continue - edges.add((node, n)) - queue.add(n) - -nodes = set() -for a, b in edges: - nodes.add(a) - nodes.add(b) - -print("digraph G {") -# add sensible labels -for n in nodes: - print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) - -for a, b in edges: - print(""" "{}" -> "{}"; """.format(a, b)) -print("}") +if __name__ == "__main__": + queue = set([seed]) + edges = set() + i = 0 + while queue: + node = queue.pop() + i += 1 + if i == max_nodes_total: + break + for n in unique_targets(node)[:max_nodes]: + if n == node: + print("skipping self ref: {}".format(n), file=sys.stderr) + continue + edges.add((node, n)) + queue.add(n) + + nodes = set() + for a, b in edges: + nodes.add(a) + nodes.add(b) + + print("digraph G {") + # add sensible labels + for n in nodes: + print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + + for a, b in edges: + print(""" "{}" -> "{}"; """.format(a, b)) + print("}") diff --git a/extra/dot/levels_inbound.py b/extra/dot/levels_inbound.py index 290ab6f..9a5d398 100644 --- a/extra/dot/levels_inbound.py +++ b/extra/dot/levels_inbound.py @@ -1,7 +1,8 @@ #!/usr/bin/env python - """ Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. + +Copy-pasted from levels.py - throwaway code. """ from elasticsearch import Elasticsearch @@ -10,14 +11,16 @@ import sys import requests seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" -max_nodes = 50 +max_nodes_total = 50 +max_nodes = 5 client = Elasticsearch("http://localhost:9200") S = Search(using=client, index="fatcat_ref_v01") + def pretty_label(s, k=10): parts = s.split() - cur, result =[], [] + cur, result = [], [] for p in parts: if len(" ".join(cur)) < k: cur.append(p) @@ -27,8 +30,10 @@ def pretty_label(s, k=10): result.append(" ".join(cur)) return "\\n".join(result) + def get_dot_label(ident): - title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title") + title = requests.get("https://api.fatcat.wiki/v0/release/{}".format( + ident)).json().get("title") return pretty_label(title, k=10) @@ -42,32 +47,33 @@ def unique_sources(ident): unique_source_idents.add(hit["source_release_ident"]) return list(unique_source_idents) -queue = set([seed]) -i = 0 -edges = set() -while queue: - node = queue.pop() - i += 1 - if i == max_nodes: - break - for n in unique_sources(node)[:5]: - if n == node: - print("skipping self ref: {}".format(n), file=sys.stderr) - continue - edges.add((node, n)) - queue.add(n) -nodes = set() -for a, b in edges: - nodes.add(a) - nodes.add(b) +if __name__ == '__main__': + queue = set([seed]) + edges = set() + i = 0 + while queue: + node = queue.pop() + i += 1 + if i == max_nodes_total: + break + for n in unique_sources(node)[:max_nodes]: + if n == node: + print("skipping self ref: {}".format(n), file=sys.stderr) + continue + edges.add((node, n)) + queue.add(n) -print("digraph G {") -# add sensible labels -for n in nodes: - print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + nodes = set() + for a, b in edges: + nodes.add(a) + nodes.add(b) -for a, b in edges: - print(""" "{}" -> "{}"; """.format(a, b)) -print("}") + print("digraph G {") + # add sensible labels + for n in nodes: + print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n))) + for a, b in edges: + print(""" "{}" -> "{}"; """.format(a, b)) + print("}") -- cgit v1.2.3