aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest/datacite.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-14 23:23:25 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-14 23:23:25 -0800
commit5c47be5b0468c13db868548dccfdf1af50813b0c (patch)
treee1e9c8f5f2cab464f9b0c9cf09a329f3c6c6a270 /python/fatcat_tools/harvest/datacite.py
parentef50d665b31a8cb1b0ddc38671b3334b0fa6fc14 (diff)
downloadfatcat-5c47be5b0468c13db868548dccfdf1af50813b0c.tar.gz
fatcat-5c47be5b0468c13db868548dccfdf1af50813b0c.zip
initial work on metadata harvest bots
Diffstat (limited to 'python/fatcat_tools/harvest/datacite.py')
-rw-r--r--python/fatcat_tools/harvest/datacite.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/python/fatcat_tools/harvest/datacite.py b/python/fatcat_tools/harvest/datacite.py
new file mode 100644
index 00000000..12860810
--- /dev/null
+++ b/python/fatcat_tools/harvest/datacite.py
@@ -0,0 +1,29 @@
+
+"""
+datacite has a REST API as well as OAI-PMH endpoint.
+
+have about 8 million
+
+bulk export notes: https://github.com/datacite/datacite/issues/188
+
+fundamentally, very similar to crossref. don't have a scrape... maybe
+could/should use this script for that, and dump to JSON?
+"""
+
+from fatcat_tools.harvest.ingest_common import DoiApiHarvest
+
+class HarvestDataciteWorker(DoiApiHarvest):
+
+ def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
+ api_host_url="https://api.datacite.org/works",
+ start_date=None, end_date=None):
+ super().__init__(kafka_hosts=kafka_hosts,
+ produce_topic=produce_topic,
+ state_topic=state_topic,
+ api_host_url=api_host_url,
+ contact_email=contact_email,
+ start_date=start_date,
+ end_date=end_date)
+
+ self.update_filter_name = "update"
+