diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-15 12:21:45 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-15 12:21:45 -0800 |
commit | f21d28315aa632cdb9f84ea8787762d1e27b4310 (patch) | |
tree | 58c6ad0d34260e1d656247ddffa8ee047a8eb520 /python/fatcat_harvest.py | |
parent | 5c47be5b0468c13db868548dccfdf1af50813b0c (diff) | |
download | fatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.tar.gz fatcat-f21d28315aa632cdb9f84ea8787762d1e27b4310.zip |
refactoring harvesters
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-x | python/fatcat_harvest.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py new file mode 100755 index 00000000..dd98d22a --- /dev/null +++ b/python/fatcat_harvest.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import datetime +from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker + +def run_crossref(args): + worker = HarvestCrossrefWorker( + args.kafka_hosts, + produce_topic="fatcat-{}.crossref".format(args.env), + state_topic="fatcat-{}.crossref-state".format(args.env), + contact_email=args.contact_email, + start_date=args.start_date, + end_date=args.end_date) + worker.run_once() + +def run_datacite(args): + worker = HarvestDataciteWorker( + args.kafka_hosts, + produce_topic="fatcat-{}.datacite".format(args.env), + state_topic="fatcat-{}.datacite-state".format(args.env), + contact_email=args.contact_email, + start_date=args.start_date, + end_date=args.end_date) + worker.run_once() + +def mkdate(raw): + return datetime.datetime.strptime(raw, "%Y-%m-%d").date() + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--debug', + action='store_true', + help="enable debug logging") + parser.add_argument('--kafka-hosts', + default="localhost:9092", + help="list of Kafka brokers (host/port) to use") + parser.add_argument('--env', + default="qa", + help="Kafka topic namespace to use (eg, prod, qa)") + parser.add_argument('--start-date', + default=None, type=mkdate, + help="begining of harvest period") + parser.add_argument('--end-date', + default=None, type=mkdate, + help="end of harvest period") + parser.add_argument('--contact-email', + default="undefined", # better? + help="contact email to use in API header") + subparsers = parser.add_subparsers() + + sub_crossref = subparsers.add_parser('crossref') + sub_crossref.set_defaults(func=run_crossref) + + sub_datacite = subparsers.add_parser('datacite') + sub_datacite.set_defaults(func=run_datacite) + + args = parser.parse_args() + if not args.__dict__.get("func"): + print("tell me what to do!") + sys.exit(-1) + args.func(args) + +if __name__ == '__main__': + main() |