1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#!/usr/bin/env python
"""
Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
$ python levels.py [RELEASE IDENT]
Throwaway script, expected to be run on the same machine as the ref index currently.
"""
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import sys
import requests
seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
max_nodes_total = 30
max_nodes = 5
client = Elasticsearch("http://localhost:9200")
S = Search(using=client, index="fatcat_ref_v01")
def pretty_label(s, k=10):
parts = s.split()
cur, result = [], []
for p in parts:
if len(" ".join(cur)) < k:
cur.append(p)
else:
result.append(" ".join(cur))
cur = []
result.append(" ".join(cur))
return "\\n".join(result)
def get_dot_label(ident):
title = (requests.get("https://api.fatcat.wiki/v0/release/{}".format(
ident)).json().get("title"))
return pretty_label(title, k=10)
def unique_targets(ident):
s = S.query("match", source_release_ident=ident)
unique_target_idents = set()
for hit in s.scan():
unique_target_idents.add(hit["target_release_ident"])
return list(unique_target_idents)
if __name__ == "__main__":
queue = set([seed])
edges = set()
i = 0
while queue:
node = queue.pop()
i += 1
if i == max_nodes_total:
break
for n in unique_targets(node)[:max_nodes]:
if n == node:
print("skipping self ref: {}".format(n), file=sys.stderr)
continue
edges.add((node, n))
queue.add(n)
nodes = set()
for a, b in edges:
nodes.add(a)
nodes.add(b)
print("digraph G {")
# add sensible labels
for n in nodes:
print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n)))
for a, b in edges:
print(""" "{}" -> "{}"; """.format(a, b))
print("}")
|