diff options
Diffstat (limited to 'python/fatcat_tools/harvest/datacite.py')
-rw-r--r-- | python/fatcat_tools/harvest/datacite.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/python/fatcat_tools/harvest/datacite.py b/python/fatcat_tools/harvest/datacite.py new file mode 100644 index 00000000..12860810 --- /dev/null +++ b/python/fatcat_tools/harvest/datacite.py @@ -0,0 +1,29 @@ + +""" +datacite has a REST API as well as OAI-PMH endpoint. + +have about 8 million + +bulk export notes: https://github.com/datacite/datacite/issues/188 + +fundamentally, very similar to crossref. don't have a scrape... maybe +could/should use this script for that, and dump to JSON? +""" + +from fatcat_tools.harvest.ingest_common import DoiApiHarvest + +class HarvestDataciteWorker(DoiApiHarvest): + + def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email, + api_host_url="https://api.datacite.org/works", + start_date=None, end_date=None): + super().__init__(kafka_hosts=kafka_hosts, + produce_topic=produce_topic, + state_topic=state_topic, + api_host_url=api_host_url, + contact_email=contact_email, + start_date=start_date, + end_date=end_date) + + self.update_filter_name = "update" + |