From f21d28315aa632cdb9f84ea8787762d1e27b4310 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 15 Nov 2018 12:21:45 -0800 Subject: refactoring harvesters --- python/fatcat_tools/harvest/datacite.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 python/fatcat_tools/harvest/datacite.py (limited to 'python/fatcat_tools/harvest/datacite.py') diff --git a/python/fatcat_tools/harvest/datacite.py b/python/fatcat_tools/harvest/datacite.py deleted file mode 100644 index 12860810..00000000 --- a/python/fatcat_tools/harvest/datacite.py +++ /dev/null @@ -1,29 +0,0 @@ - -""" -datacite has a REST API as well as OAI-PMH endpoint. - -have about 8 million - -bulk export notes: https://github.com/datacite/datacite/issues/188 - -fundamentally, very similar to crossref. don't have a scrape... maybe -could/should use this script for that, and dump to JSON? -""" - -from fatcat_tools.harvest.ingest_common import DoiApiHarvest - -class HarvestDataciteWorker(DoiApiHarvest): - - def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email, - api_host_url="https://api.datacite.org/works", - start_date=None, end_date=None): - super().__init__(kafka_hosts=kafka_hosts, - produce_topic=produce_topic, - state_topic=state_topic, - api_host_url=api_host_url, - contact_email=contact_email, - start_date=start_date, - end_date=end_date) - - self.update_filter_name = "update" - -- cgit v1.2.3