diff options
Diffstat (limited to 'python/fatcat_tools/harvest/crossrefish.py')
| -rw-r--r-- | python/fatcat_tools/harvest/crossrefish.py | 39 | 
1 files changed, 0 insertions, 39 deletions
| diff --git a/python/fatcat_tools/harvest/crossrefish.py b/python/fatcat_tools/harvest/crossrefish.py deleted file mode 100644 index a88cedbd..00000000 --- a/python/fatcat_tools/harvest/crossrefish.py +++ /dev/null @@ -1,39 +0,0 @@ - -""" -Notes on crossref API: - -- from-index-date is the updated time -- is-update can be false, to catch only new or only old works - -https://api.crossref.org/works?filter=from-index-date:2018-11-14,is-update:false&rows=2 - -I think the design is going to have to be a cronjob or long-running job -(with long sleeps) which publishes "success through" to a separate state -queue, as simple YYYY-MM-DD strings. - -Within a day, will need to use a resumption token. Maybe should use a -crossref library... meh. - -will want to have some mechanism in kafka consumer (pushing to fatcat) to group -in batches as well. maybe even pass through as batches? or just use timeouts on -iteration. -""" - -from fatcat_tools.harvest.ingest_common import DoiApiHarvest - -class HarvestCrossrefWorker(DoiApiHarvest): - -    def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email, -            api_host_url="https://api.crossref.org/works", -            is_update_filter=None, -            start_date=None, end_date=None): -        super().__init__(kafka_hosts=kafka_hosts, -                         produce_topic=produce_topic, -                         state_topic=state_topic, -                         api_host_url=api_host_url, -                         contact_email=contact_email, -                         start_date=start_date, -                         end_date=end_date) - -        self.is_update_filter = is_update_filter - | 
