blob: 290ab6f8787b9d83786eec023a1663a6481feacb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#!/usr/bin/env python
"""
Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
"""
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import sys
import requests
seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
max_nodes = 50
client = Elasticsearch("http://localhost:9200")
S = Search(using=client, index="fatcat_ref_v01")
def pretty_label(s, k=10):
parts = s.split()
cur, result =[], []
for p in parts:
if len(" ".join(cur)) < k:
cur.append(p)
else:
result.append(" ".join(cur))
cur = []
result.append(" ".join(cur))
return "\\n".join(result)
def get_dot_label(ident):
title = requests.get("https://api.fatcat.wiki/v0/release/{}".format(ident)).json().get("title")
return pretty_label(title, k=10)
def unique_sources(ident):
"""
Unique inbound references.
"""
s = S.query("match", target_release_ident=ident)
unique_source_idents = set()
for hit in s.scan():
unique_source_idents.add(hit["source_release_ident"])
return list(unique_source_idents)
queue = set([seed])
i = 0
edges = set()
while queue:
node = queue.pop()
i += 1
if i == max_nodes:
break
for n in unique_sources(node)[:5]:
if n == node:
print("skipping self ref: {}".format(n), file=sys.stderr)
continue
edges.add((node, n))
queue.add(n)
nodes = set()
for a, b in edges:
nodes.add(a)
nodes.add(b)
print("digraph G {")
# add sensible labels
for n in nodes:
print(""" "{}" [label="{}"]; """.format(n, get_dot_label(n)))
for a, b in edges:
print(""" "{}" -> "{}"; """.format(a, b))
print("}")
|