aboutsummaryrefslogtreecommitdiffstats
path: root/extra/wikidata
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-05-06 14:21:39 -0700
committerBryan Newbold <bnewbold@archive.org>2020-05-06 14:21:39 -0700
commit25b4ca85cf535629a7e1cdce02999084e5bc2535 (patch)
tree7aba40cf479c130bc0987ddb9ea5a1204971deb6 /extra/wikidata
parent3e7e3eb8684a91fd7dc97d05f259eec61c525927 (diff)
downloadchocula-25b4ca85cf535629a7e1cdce02999084e5bc2535.tar.gz
chocula-25b4ca85cf535629a7e1cdce02999084e5bc2535.zip
start refactoring files into module
Diffstat (limited to 'extra/wikidata')
-rw-r--r--extra/wikidata/wikidata.sparql35
1 files changed, 35 insertions, 0 deletions
diff --git a/extra/wikidata/wikidata.sparql b/extra/wikidata/wikidata.sparql
new file mode 100644
index 0000000..3f7e2f9
--- /dev/null
+++ b/extra/wikidata/wikidata.sparql
@@ -0,0 +1,35 @@
+# this query fetches basic metadata for about 45k journal from wikidata
+# run at: https://query.wikidata.org/
+#
+# NOTE: the publisher label somtimes falls through to a QID or "t"-prefixed
+# number if the publisher name isn't actually known
+
+# would rather have direct QID instead of URL, but this seems to slow things down too much (query times out)
+#SELECT (REPLACE(STR(?item),".*Q","Q") AS ?wikidata_qid) (SAMPLE(?issn) AS ?issn) (SAMPLE(?title) AS ?title) (SAMPLE(?publisherLabel) AS ?publisher_name) (SAMPLE(?websiteurl) AS ?websiteurl) (str(YEAR(SAMPLE(?inception))) AS ?start_year)
+
+SELECT ?item (SAMPLE(?issn) AS ?issn) (SAMPLE(?title) AS ?title) (SAMPLE(?publisherLabel) AS ?publisher_name) (SAMPLE(?websiteurl) AS ?websiteurl) (str(YEAR(SAMPLE(?inception))) AS ?start_year)
+WHERE
+{
+ # any instance of...
+ { ?item wdt:P31 wd:Q5633421 } # "scientific journal"
+ UNION { ?item wdt:P31 wd:Q773668 } # "open access journal"
+ UNION { ?item wdt:P31 wd:Q737498 } # "academic journal"
+ UNION { ?item wdt:P31 wd:Q49850 } # "journal"
+ UNION { ?item wdt:P31 wd:Q51135530 } # "humanities journal"
+
+ # must have an ISSN
+ ?item wdt:P1476 ?title
+
+ # may have title, website, publisher, inception date
+ OPTIONAL{?item wdt:P236 ?issn}
+ OPTIONAL{?item wdt:P856 ?websiteurl}
+ OPTIONAL{?item wdt:P123 ?publisher}
+ OPTIONAL{?item wdt:P571 ?inception}
+
+ # "label service" to turn publisher entity into name
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "en,pt,es,de,fr,ja".
+ ?publisher rdfs:label ?publisherLabel. }
+} GROUP BY ?item
+
+# could include all known ISSNs instead of just one with:
+# (GROUP_CONCAT(DISTINCT ?issn; SEPARATOR=";") AS ?issns)