diff options
Diffstat (limited to 'extra')
-rw-r--r-- | extra/dot/example.dot | 80 | ||||
-rw-r--r-- | extra/dot/example.png | bin | 0 -> 508096 bytes | |||
-rw-r--r-- | extra/dot/levels.py | 46 |
3 files changed, 126 insertions, 0 deletions
diff --git a/extra/dot/example.dot b/extra/dot/example.dot new file mode 100644 index 0000000..337a3f1 --- /dev/null +++ b/extra/dot/example.dot @@ -0,0 +1,80 @@ +digraph G { + "btc4q4nphzd23olbeacsqop2zu" [label="SMOTE: Synthetic\nOver-sampling\n"]; + "si3tvuodjrgydnsz226nugqly4" [label="Context-aware\nrecommendation"]; + "jux5jscohne6zp6x6kiada7wr4" [label="Google's Neural\nTranslation\nBridging the\nbetween Human\nMachine Translation"]; + "bi4v5ye2czdknawolxzsm2mx4q" [label="Measuring the\nof a Scientific\nthrough Citation\n"]; + "3a3i7yfkpveiblvcs5m6oxgv5e" [label="How can we\ncitation behavior?\nstudy of reasons\nciting literature\ncommunication"]; + "srfdnl66kjhyzhb4zle3ko47fa" [label="Enhancing patent\nby citation\n"]; + "wuqbyy2425em5aq7ppvnyafewy" [label="The norms of\nbehavior: Prolegomena\nthe footnote"]; + "kaaamu4fhna25o3jl6rvi3ntmq" [label="Joint latent\nmodels for\nand citations"]; + "efq5y2p7gjfr5hzqs3xf2abyeq" [label="ADADELTA: An\nLearning Rate\n"]; + "tyk57fziezanfidyhivcb3jhem" [label="NOVELNEURALMODULATORS"]; + "bouhrcw5drb3rdrmuxsw7qyzyy" [label="No More Pesky\nRates"]; + "4nelyozvsnbodcyvgspojczfiq" [label="Citation content\n(CCA): A framework\nsyntactic and\nanalysis of\ncontent"]; + "irc6sv2lqzghziut6vfhevvodq" [label="Citation context\nthe frequency\nof citation\n"]; + "j5u4qccgvbfs5m2dzvmeesuhce" [label="Cited Documents\nConcept Symbols"]; + "revexrw5zrcavbf5bviqtewb3q" [label="A Caution Regarding\nof Thumb for\nInflation Factors"]; + "ucs6zoa6ircdpoo67xc5uicewi" [label="Interpreting\nof science\ncitation context\na preliminary\n"]; + "7gethjhuzva4nejqnd4uthpogi" [label="Content-based\nanalysis: The\ngeneration\ncitation analysis"]; + "kkjaguuugfdyvpfjto4zgez5ka" [label="Combining full\nand bibliometric\nin mapping\ndisciplines"]; + "7icffyrunra6zkjxdgpiikbtx4" [label="Contextualizing\nfor Scientific\nusing Word\nand Domain\n"]; + "tq2nkp3y6za6dieasvvf3misj4" [label="Highly cited\npapers and\nreasons why\ncontinue to\ncited"]; + "6pukukj4yfcs5nrv7bhiiokyte" [label="Some Results\nthe Function\nQuality of\n"]; + "h6q7uauk25ek7otxu3m4riamey" [label="Listen, Attend\nSpell"]; + "2ii7d36sybgjrezymmob5vips4" [label="ScispaCy: Fast\nRobust Models\nBiomedical\nLanguage Processing"]; + "d26wfank3zby7cwhuqnz5toucy" [label="From Softmax\nSparsemax:\nSparse Model\nAttention and\nClassification"]; + "dth3scgitzfejgyffrtktm5qky" [label="BioBERT: a\nbiomedical\nrepresentation\nfor biomedical\nmining"]; + "3visrjdhwbe2fjhmrhueclzas4" [label="The synthesis\nspecialty narratives\nco-citation\n"]; + "cqmoemq4kvgj7i4bnks3pivdo4" [label="AllenNLP: A\nSemantic Natural\nProcessing\n"]; + "svhtgtqz4jfchdncsp4v3tcydq" [label="Hedging in\nResearch Articles"]; + "ldwfihhp4fdhxkes4faigrijae" [label="The structure\ndynamics of\nclusters: A\ncocitation\n"]; + "4gigiwingzbs7jqprdshrobvka" [label="ClinicalBERT:\nClinical Notes\nPredicting\nReadmission"]; + "krqznmpyazhy7c2fobjmlg6twe" [label="OpenNMT: Open-Source\nfor Neural\nTranslation"]; + "4xwvps4evbcgfdvqkbyxxj656y" [label="Regression\n"]; + "j7zgtiuzibeulh2fs6p4olzsqa" [label="SciBERT: A\nLanguage Model\nScientific\n"]; + "wq4t3qsluja7xbrbr7fppwaxnm" [label="Citation structure\nan emerging\narea on the\nof application"]; + "iffj75ycfvbv3fupmfj5ru3b5y" [label="Toward a consensus\nof science"]; + "3qoiai4l65fhzd5dlxqsoifdem" [label="Characterizing\ncited method\nnon-method\nusing citation\nThe role of\n"]; + "bc4inxvcafb6hejs7yi7xuwkbm" [label="Linguistic\nFeatures Improve\nMachine Translation"]; + "r27xqgwhu5avjilcqgf2faztyi" [label="Syntactic Scaffolds\nSemantic Structures"]; + "3eohfubqhbgghbjf632gr2zpnq" [label="Structural\nfor Citation\nClassification\nScientific\n"]; + "ucs6zoa6ircdpoo67xc5uicewi" -> "iffj75ycfvbv3fupmfj5ru3b5y"; + "3eohfubqhbgghbjf632gr2zpnq" -> "3qoiai4l65fhzd5dlxqsoifdem"; + "7gethjhuzva4nejqnd4uthpogi" -> "si3tvuodjrgydnsz226nugqly4"; + "ucs6zoa6ircdpoo67xc5uicewi" -> "wq4t3qsluja7xbrbr7fppwaxnm"; + "ucs6zoa6ircdpoo67xc5uicewi" -> "svhtgtqz4jfchdncsp4v3tcydq"; + "3eohfubqhbgghbjf632gr2zpnq" -> "r27xqgwhu5avjilcqgf2faztyi"; + "j7zgtiuzibeulh2fs6p4olzsqa" -> "2ii7d36sybgjrezymmob5vips4"; + "7gethjhuzva4nejqnd4uthpogi" -> "srfdnl66kjhyzhb4zle3ko47fa"; + "bi4v5ye2czdknawolxzsm2mx4q" -> "3a3i7yfkpveiblvcs5m6oxgv5e"; + "bi4v5ye2czdknawolxzsm2mx4q" -> "btc4q4nphzd23olbeacsqop2zu"; + "bi4v5ye2czdknawolxzsm2mx4q" -> "revexrw5zrcavbf5bviqtewb3q"; + "7gethjhuzva4nejqnd4uthpogi" -> "kaaamu4fhna25o3jl6rvi3ntmq"; + "j7zgtiuzibeulh2fs6p4olzsqa" -> "cqmoemq4kvgj7i4bnks3pivdo4"; + "3eohfubqhbgghbjf632gr2zpnq" -> "efq5y2p7gjfr5hzqs3xf2abyeq"; + "krqznmpyazhy7c2fobjmlg6twe" -> "jux5jscohne6zp6x6kiada7wr4"; + "wq4t3qsluja7xbrbr7fppwaxnm" -> "3visrjdhwbe2fjhmrhueclzas4"; + "3a3i7yfkpveiblvcs5m6oxgv5e" -> "j5u4qccgvbfs5m2dzvmeesuhce"; + "revexrw5zrcavbf5bviqtewb3q" -> "4xwvps4evbcgfdvqkbyxxj656y"; + "krqznmpyazhy7c2fobjmlg6twe" -> "h6q7uauk25ek7otxu3m4riamey"; + "krqznmpyazhy7c2fobjmlg6twe" -> "bc4inxvcafb6hejs7yi7xuwkbm"; + "3a3i7yfkpveiblvcs5m6oxgv5e" -> "wuqbyy2425em5aq7ppvnyafewy"; + "bi4v5ye2czdknawolxzsm2mx4q" -> "ucs6zoa6ircdpoo67xc5uicewi"; + "krqznmpyazhy7c2fobjmlg6twe" -> "d26wfank3zby7cwhuqnz5toucy"; + "krqznmpyazhy7c2fobjmlg6twe" -> "tyk57fziezanfidyhivcb3jhem"; + "3a3i7yfkpveiblvcs5m6oxgv5e" -> "irc6sv2lqzghziut6vfhevvodq"; + "efq5y2p7gjfr5hzqs3xf2abyeq" -> "bouhrcw5drb3rdrmuxsw7qyzyy"; + "7gethjhuzva4nejqnd4uthpogi" -> "4nelyozvsnbodcyvgspojczfiq"; + "7gethjhuzva4nejqnd4uthpogi" -> "kkjaguuugfdyvpfjto4zgez5ka"; + "3eohfubqhbgghbjf632gr2zpnq" -> "7icffyrunra6zkjxdgpiikbtx4"; + "3a3i7yfkpveiblvcs5m6oxgv5e" -> "tq2nkp3y6za6dieasvvf3misj4"; + "ucs6zoa6ircdpoo67xc5uicewi" -> "3visrjdhwbe2fjhmrhueclzas4"; + "j7zgtiuzibeulh2fs6p4olzsqa" -> "dth3scgitzfejgyffrtktm5qky"; + "bi4v5ye2czdknawolxzsm2mx4q" -> "7gethjhuzva4nejqnd4uthpogi"; + "cqmoemq4kvgj7i4bnks3pivdo4" -> "krqznmpyazhy7c2fobjmlg6twe"; + "j7zgtiuzibeulh2fs6p4olzsqa" -> "4gigiwingzbs7jqprdshrobvka"; + "3eohfubqhbgghbjf632gr2zpnq" -> "bi4v5ye2czdknawolxzsm2mx4q"; + "3a3i7yfkpveiblvcs5m6oxgv5e" -> "6pukukj4yfcs5nrv7bhiiokyte"; + "ucs6zoa6ircdpoo67xc5uicewi" -> "ldwfihhp4fdhxkes4faigrijae"; + "j7zgtiuzibeulh2fs6p4olzsqa" -> "3eohfubqhbgghbjf632gr2zpnq"; +} diff --git a/extra/dot/example.png b/extra/dot/example.png Binary files differnew file mode 100644 index 0000000..1f66422 --- /dev/null +++ b/extra/dot/example.png diff --git a/extra/dot/levels.py b/extra/dot/levels.py new file mode 100644 index 0000000..ff1af27 --- /dev/null +++ b/extra/dot/levels.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +""" +Take a document fatcat id and emit the citation structure up to X levels as graphviz dot file. +""" + +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search +import sys + +seed = sys.argv[1] if len(sys.argv) > 1 else "rgzyfbx7t5d53m5amvbllewzqi" +max_nodes = 50 + +client = Elasticsearch("http://localhost:9200") +S = Search(using=client, index="fatcat_ref_v01") + +def unique_targets(ident): + s = S.query("match", source_release_ident=ident) + unique_target_idents = set() + for hit in s.scan(): + unique_target_idents.add(hit["target_release_ident"]) + return list(unique_target_idents) + +queue = set([seed]) +i = 0 +edges = set() +while queue: + node = queue.pop() + i += 1 + if i == max_nodes: + break + for n in unique_targets(node): + edges.add((node, n)) + queue.add(n) + +print("digraph G {") +for a, b in edges: + print("{} -> {};".format(a, b)) +print("}") +# print(edges) +# s = S.query("match", source_release_ident=seed) +# for hit in s.scan(): +# print(hit["target_release_ident"]) +# k = S.query("match", source_release_ident=hit["target_release_ident"]) +# for h in k.scan(): +# print("\t{}".format(h["target_release_ident"])) |