summaryrefslogtreecommitdiffstats
path: root/python/fatcat_harvest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-15 12:21:45 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-15 12:21:45 -0800
commitf21d28315aa632cdb9f84ea8787762d1e27b4310 (patch)
tree58c6ad0d34260e1d656247ddffa8ee047a8eb520 /python/fatcat_harvest.py
parent5c47be5b0468c13db868548dccfdf1af50813b0c (diff)
downloadfatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.tar.gz
fatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.zip
refactoring harvesters
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-xpython/fatcat_harvest.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
new file mode 100755
index 00000000..dd98d22a
--- /dev/null
+++ b/python/fatcat_harvest.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+import datetime
+from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker
+
+def run_crossref(args):
+ worker = HarvestCrossrefWorker(
+ args.kafka_hosts,
+ produce_topic="fatcat-{}.crossref".format(args.env),
+ state_topic="fatcat-{}.crossref-state".format(args.env),
+ contact_email=args.contact_email,
+ start_date=args.start_date,
+ end_date=args.end_date)
+ worker.run_once()
+
+def run_datacite(args):
+ worker = HarvestDataciteWorker(
+ args.kafka_hosts,
+ produce_topic="fatcat-{}.datacite".format(args.env),
+ state_topic="fatcat-{}.datacite-state".format(args.env),
+ contact_email=args.contact_email,
+ start_date=args.start_date,
+ end_date=args.end_date)
+ worker.run_once()
+
+def mkdate(raw):
+ return datetime.datetime.strptime(raw, "%Y-%m-%d").date()
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--debug',
+ action='store_true',
+ help="enable debug logging")
+ parser.add_argument('--kafka-hosts',
+ default="localhost:9092",
+ help="list of Kafka brokers (host/port) to use")
+ parser.add_argument('--env',
+ default="qa",
+ help="Kafka topic namespace to use (eg, prod, qa)")
+ parser.add_argument('--start-date',
+ default=None, type=mkdate,
+ help="begining of harvest period")
+ parser.add_argument('--end-date',
+ default=None, type=mkdate,
+ help="end of harvest period")
+ parser.add_argument('--contact-email',
+ default="undefined", # better?
+ help="contact email to use in API header")
+ subparsers = parser.add_subparsers()
+
+ sub_crossref = subparsers.add_parser('crossref')
+ sub_crossref.set_defaults(func=run_crossref)
+
+ sub_datacite = subparsers.add_parser('datacite')
+ sub_datacite.set_defaults(func=run_datacite)
+
+ args = parser.parse_args()
+ if not args.__dict__.get("func"):
+ print("tell me what to do!")
+ sys.exit(-1)
+ args.func(args)
+
+if __name__ == '__main__':
+ main()