summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest/crossrefish.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-15 12:21:45 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-15 12:21:45 -0800
commitf21d28315aa632cdb9f84ea8787762d1e27b4310 (patch)
tree58c6ad0d34260e1d656247ddffa8ee047a8eb520 /python/fatcat_tools/harvest/crossrefish.py
parent5c47be5b0468c13db868548dccfdf1af50813b0c (diff)
downloadfatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.tar.gz
fatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.zip
refactoring harvesters
Diffstat (limited to 'python/fatcat_tools/harvest/crossrefish.py')
-rw-r--r--python/fatcat_tools/harvest/crossrefish.py39
1 files changed, 0 insertions, 39 deletions
diff --git a/python/fatcat_tools/harvest/crossrefish.py b/python/fatcat_tools/harvest/crossrefish.py
deleted file mode 100644
index a88cedbd..00000000
--- a/python/fatcat_tools/harvest/crossrefish.py
+++ /dev/null
@@ -1,39 +0,0 @@
-
-"""
-Notes on crossref API:
-
-- from-index-date is the updated time
-- is-update can be false, to catch only new or only old works
-
-https://api.crossref.org/works?filter=from-index-date:2018-11-14,is-update:false&rows=2
-
-I think the design is going to have to be a cronjob or long-running job
-(with long sleeps) which publishes "success through" to a separate state
-queue, as simple YYYY-MM-DD strings.
-
-Within a day, will need to use a resumption token. Maybe should use a
-crossref library... meh.
-
-will want to have some mechanism in kafka consumer (pushing to fatcat) to group
-in batches as well. maybe even pass through as batches? or just use timeouts on
-iteration.
-"""
-
-from fatcat_tools.harvest.ingest_common import DoiApiHarvest
-
-class HarvestCrossrefWorker(DoiApiHarvest):
-
- def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
- api_host_url="https://api.crossref.org/works",
- is_update_filter=None,
- start_date=None, end_date=None):
- super().__init__(kafka_hosts=kafka_hosts,
- produce_topic=produce_topic,
- state_topic=state_topic,
- api_host_url=api_host_url,
- contact_email=contact_email,
- start_date=start_date,
- end_date=end_date)
-
- self.is_update_filter = is_update_filter
-