aboutsummaryrefslogtreecommitdiffstats
path: root/extra
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-19 20:20:11 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-19 20:29:17 +0200
commitbb87a43e838ea7003df2ab1ac4ba6602f0f52b00 (patch)
treea1fb6bcac633793d1df8dc1542d51728c0c77b8a /extra
parent6d919ce1c0cf3806945b918f506af9715215319d (diff)
downloadrefcat-bb87a43e838ea7003df2ab1ac4ba6602f0f52b00.tar.gz
refcat-bb87a43e838ea7003df2ab1ac4ba6602f0f52b00.zip
a dot example
Diffstat (limited to 'extra')
-rw-r--r--extra/dot/example.dot80
-rw-r--r--extra/dot/example.pngbin0 -> 508096 bytes
-rw-r--r--extra/dot/levels.py46
3 files changed, 126 insertions, 0 deletions
diff --git a/extra/dot/example.dot b/extra/dot/example.dot
new file mode 100644
index 0000000..337a3f1
--- /dev/null
+++ b/extra/dot/example.dot
@@ -0,0 +1,80 @@
+digraph G {
+ "btc4q4nphzd23olbeacsqop2zu" [label="SMOTE: Synthetic\nOver-sampling\n"];
+ "si3tvuodjrgydnsz226nugqly4" [label="Context-aware\nrecommendation"];
+ "jux5jscohne6zp6x6kiada7wr4" [label="Google's Neural\nTranslation\nBridging the\nbetween Human\nMachine Translation"];
+ "bi4v5ye2czdknawolxzsm2mx4q" [label="Measuring the\nof a Scientific\nthrough Citation\n"];
+ "3a3i7yfkpveiblvcs5m6oxgv5e" [label="How can we\ncitation behavior?\nstudy of reasons\nciting literature\ncommunication"];
+ "srfdnl66kjhyzhb4zle3ko47fa" [label="Enhancing patent\nby citation\n"];
+ "wuqbyy2425em5aq7ppvnyafewy" [label="The norms of\nbehavior: Prolegomena\nthe footnote"];
+ "kaaamu4fhna25o3jl6rvi3ntmq" [label="Joint latent\nmodels for\nand citations"];
+ "efq5y2p7gjfr5hzqs3xf2abyeq" [label="ADADELTA: An\nLearning Rate\n"];
+ "tyk57fziezanfidyhivcb3jhem" [label="NOVELNEURALMODULATORS"];
+ "bouhrcw5drb3rdrmuxsw7qyzyy" [label="No More Pesky\nRates"];
+ "4nelyozvsnbodcyvgspojczfiq" [label="Citation content\n(CCA): A framework\nsyntactic and\nanalysis of\ncontent"];
+ "irc6sv2lqzghziut6vfhevvodq" [label="Citation context\nthe frequency\nof citation\n"];
+ "j5u4qccgvbfs5m2dzvmeesuhce" [label="Cited Documents\nConcept Symbols"];
+ "revexrw5zrcavbf5bviqtewb3q" [label="A Caution Regarding\nof Thumb for\nInflation Factors"];
+ "ucs6zoa6ircdpoo67xc5uicewi" [label="Interpreting\nof science\ncitation context\na preliminary\n"];
+ "7gethjhuzva4nejqnd4uthpogi" [label="Content-based\nanalysis: The\ngeneration\ncitation analysis"];
+ "kkjaguuugfdyvpfjto4zgez5ka" [label="Combining full\nand bibliometric\nin mapping\ndisciplines"];
+ "7icffyrunra6zkjxdgpiikbtx4" [label="Contextualizing\nfor Scientific\nusing Word\nand Domain\n"];
+ "tq2nkp3y6za6dieasvvf3misj4" [label="Highly cited\npapers and\nreasons why\ncontinue to\ncited"];
+ "6pukukj4yfcs5nrv7bhiiokyte" [label="Some Results\nthe Function\nQuality of\n"];
+ "h6q7uauk25ek7otxu3m4riamey" [label="Listen, Attend\nSpell"];
+ "2ii7d36sybgjrezymmob5vips4" [label="ScispaCy: Fast\nRobust Models\nBiomedical\nLanguage Processing"];
+ "d26wfank3zby7cwhuqnz5toucy" [label="From Softmax\nSparsemax:\nSparse Model\nAttention and\nClassification"];
+ "dth3scgitzfejgyffrtktm5qky" [label="BioBERT: a\nbiomedical\nrepresentation\nfor biomedical\nmining"];
+ "3visrjdhwbe2fjhmrhueclzas4" [label="The synthesis\nspecialty narratives\nco-citation\n"];
+ "cqmoemq4kvgj7i4bnks3pivdo4" [label="AllenNLP: A\nSemantic Natural\nProcessing\n"];
+ "svhtgtqz4jfchdncsp4v3tcydq" [label="Hedging in\nResearch Articles"];
+ "ldwfihhp4fdhxkes4faigrijae" [label="The structure\ndynamics of\nclusters: A\ncocitation\n"];
+ "4gigiwingzbs7jqprdshrobvka" [label="ClinicalBERT:\nClinical Notes\nPredicting\nReadmission"];
+ "krqznmpyazhy7c2fobjmlg6twe" [label="OpenNMT: Open-Source\nfor Neural\nTranslation"];
+ "4xwvps4evbcgfdvqkbyxxj656y" [label="Regression\n"];
+ "j7zgtiuzibeulh2fs6p4olzsqa" [label="SciBERT: A\nLanguage Model\nScientific\n"];
+ "wq4t3qsluja7xbrbr7fppwaxnm" [label="Citation structure\nan emerging\narea on the\nof application"];
+ "iffj75ycfvbv3fupmfj5ru3b5y" [label="Toward a consensus\nof science"];
+ "3qoiai4l65fhzd5dlxqsoifdem" [label="Characterizing\ncited method\nnon-method\nusing citation\nThe role of\n"];
+ "bc4inxvcafb6hejs7yi7xuwkbm" [label="Linguistic\nFeatures Improve\nMachine Translation"];
+ "r27xqgwhu5avjilcqgf2faztyi" [label="Syntactic Scaffolds\nSemantic Structures"];
+ "3eohfubqhbgghbjf632gr2zpnq" [label="Structural\nfor Citation\nClassification\nScientific\n"];
+ "ucs6zoa6ircdpoo67xc5uicewi" -> "iffj75ycfvbv3fupmfj5ru3b5y";
+ "3eohfubqhbgghbjf632gr2zpnq" -> "3qoiai4l65fhzd5dlxqsoifdem";
+ "7gethjhuzva4nejqnd4uthpogi" -> "si3tvuodjrgydnsz226nugqly4";
+ "ucs6zoa6ircdpoo67xc5uicewi" -> "wq4t3qsluja7xbrbr7fppwaxnm";
+ "ucs6zoa6ircdpoo67xc5uicewi" -> "svhtgtqz4jfchdncsp4v3tcydq";
+ "3eohfubqhbgghbjf632gr2zpnq" -> "r27xqgwhu5avjilcqgf2faztyi";
+ "j7zgtiuzibeulh2fs6p4olzsqa" -> "2ii7d36sybgjrezymmob5vips4";
+ "7gethjhuzva4nejqnd4uthpogi" -> "srfdnl66kjhyzhb4zle3ko47fa";
+ "bi4v5ye2czdknawolxzsm2mx4q" -> "3a3i7yfkpveiblvcs5m6oxgv5e";
+ "bi4v5ye2czdknawolxzsm2mx4q" -> "btc4q4nphzd23olbeacsqop2zu";
+ "bi4v5ye2czdknawolxzsm2mx4q" -> "revexrw5zrcavbf5bviqtewb3q";
+ "7gethjhuzva4nejqnd4uthpogi" -> "kaaamu4fhna25o3jl6rvi3ntmq";
+ "j7zgtiuzibeulh2fs6p4olzsqa" -> "cqmoemq4kvgj7i4bnks3pivdo4";
+ "3eohfubqhbgghbjf632gr2zpnq" -> "efq5y2p7gjfr5hzqs3xf2abyeq";
+ "krqznmpyazhy7c2fobjmlg6twe" -> "jux5jscohne6zp6x6kiada7wr4";
+ "wq4t3qsluja7xbrbr7fppwaxnm" -> "3visrjdhwbe2fjhmrhueclzas4";
+ "3a3i7yfkpveiblvcs5m6oxgv5e" -> "j5u4qccgvbfs5m2dzvmeesuhce";
+ "revexrw5zrcavbf5bviqtewb3q" -> "4xwvps4evbcgfdvqkbyxxj656y";
+ "krqznmpyazhy7c2fobjmlg6twe" -> "h6q7uauk25ek7otxu3m4riamey";
+ "krqznmpyazhy7c2fobjmlg6twe" -> "bc4inxvcafb6hejs7yi7xuwkbm";
+ "3a3i7yfkpveiblvcs5m6oxgv5e" -> "wuqbyy2425em5aq7ppvnyafewy";
+ "bi4v5ye2czdknawolxzsm2mx4q" -> "ucs6zoa6ircdpoo67xc5uicewi";
+ "krqznmpyazhy7c2fobjmlg6twe" -> "d26wfank3zby7cwhuqnz5toucy";
+ "krqznmpyazhy7c2fobjmlg6twe" -> "tyk57fziezanfidyhivcb3jhem";
+ "3a3i7yfkpveiblvcs5m6oxgv5e" -> "irc6sv2lqzghziut6vfhevvodq";
+ "efq5y2p7gjfr5hzqs3xf2abyeq" -> "bouhrcw5drb3rdrmuxsw7qyzyy";
+ "7gethjhuzva4nejqnd4uthpogi" -> "4nelyozvsnbodcyvgspojczfiq";
+ "7gethjhuzva4nejqnd4uthpogi" -> "kkjaguuugfdyvpfjto4zgez5ka";
+ "3eohfubqhbgghbjf632gr2zpnq" -> "7icffyrunra6zkjxdgpiikbtx4";
+ "3a3i7yfkpveiblvcs5m6oxgv5e" -> "tq2nkp3y6za6dieasvvf3misj4";
+ "ucs6zoa6ircdpoo67xc5uicewi" -> "3visrjdhwbe2fjhmrhueclzas4";
+ "j7zgtiuzibeulh2fs6p4olzsqa" -> "dth3scgitzfejgyffrtktm5qky";
+ "bi4v5ye2czdknawolxzsm2mx4q" -> "7gethjhuzva4nejqnd4uthpogi";
+ "cqmoemq4kvgj7i4bnks3pivdo4" -> "krqznmpyazhy7c2fobjmlg6twe";
+ "j7zgtiuzibeulh2fs6p4olzsqa" -> "4gigiwingzbs7jqprdshrobvka";
+ "3eohfubqhbgghbjf632gr2zpnq" -> "bi4v5ye2czdknawolxzsm2mx4q";
+ "3a3i7yfkpveiblvcs5m6oxgv5e" -> "6pukukj4yfcs5nrv7bhiiokyte";
+ "ucs6zoa6ircdpoo67xc5uicewi" -> "ldwfihhp4fdhxkes4faigrijae";
+ "j7zgtiuzibeulh2fs6p4olzsqa" -> "3eohfubqhbgghbjf632gr2zpnq";
+}
diff --git a/extra/dot/example.png b/extra/dot/example.png
new file mode 100644
index 0000000..1f66422
--- /dev/null
+++ b/extra/dot/example.png
Binary files differ
diff --git a/extra/dot/levels.py b/extra/dot/levels.py
new file mode 100644
index 0000000..ff1af27
--- /dev/null
+++ b/extra/dot/levels.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+"""
+Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file.
+"""
+
+from elasticsearch import Elasticsearch
+from elasticsearch_dsl import Search
+import sys
+
+seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi"
+max_nodes = 50
+
+client = Elasticsearch("http://localhost:9200")
+S = Search(using=client, index="fatcat_ref_v01")
+
+def unique_targets(ident):
+ s = S.query("match", source_release_ident=ident)
+ unique_target_idents = set()
+ for hit in s.scan():
+ unique_target_idents.add(hit["target_release_ident"])
+ return list(unique_target_idents)
+
+queue = set([seed])
+i = 0
+edges = set()
+while queue:
+ node = queue.pop()
+ i += 1
+ if i == max_nodes:
+ break
+ for n in unique_targets(node):
+ edges.add((node, n))
+ queue.add(n)
+
+print("digraph G {")
+for a, b in edges:
+ print("{} -> {};".format(a, b))
+print("}")
+# print(edges)
+# s = S.query("match", source_release_ident=seed)
+# for hit in s.scan():
+# print(hit["target_release_ident"])
+# k = S.query("match", source_release_ident=hit["target_release_ident"])
+# for h in k.scan():
+# print("\t{}".format(h["target_release_ident"]))