aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_harvest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-11 17:24:11 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-11 17:24:14 -0800
commit7831f78cc9ccef7331c9176dbecb34f8afc9b54f (patch)
tree53d82b7e044fd228d18eef9a5b682ecb6ce22bb4 /python/fatcat_harvest.py
parente5199300f8c4be2d2c60c18e341d774ae44a1def (diff)
downloadfatcat-7831f78cc9ccef7331c9176dbecb34f8afc9b54f.tar.gz
fatcat-7831f78cc9ccef7331c9176dbecb34f8afc9b54f.zip
improve argparse usage
Use --fatcat-api-url instead of (ambiguous) --host-url for commands that aren't deployed/running via systemd. TODO: update the other --host-url usage, and either roll-out change consistently or support the old arg as an alias during cut-over Use argparse.ArgumentDefaultsHelpFormatter (thanks Martin!) Add help messages for all sub-commands, both as documentation and as a way to get argparse to print available commands in a more readable format.
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-xpython/fatcat_harvest.py18
1 files changed, 10 insertions, 8 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index 1b92a5fd..58bef9ca 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -73,10 +73,8 @@ def mkdate(raw):
return datetime.datetime.strptime(raw, "%Y-%m-%d").date()
def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('--debug',
- action='store_true',
- help="enable debug logging")
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--kafka-hosts',
default="localhost:9092",
help="list of Kafka brokers (host/port) to use")
@@ -97,16 +95,20 @@ def main():
help="continue harvesting indefinitely in a loop?")
subparsers = parser.add_subparsers()
- sub_crossref = subparsers.add_parser('crossref')
+ sub_crossref = subparsers.add_parser('crossref',
+ help="harvest DOI metadata from Crossref API (JSON)")
sub_crossref.set_defaults(func=run_crossref)
- sub_datacite = subparsers.add_parser('datacite')
+ sub_datacite = subparsers.add_parser('datacite',
+ help="harvest DOI metadata from Datacite API (JSON)")
sub_datacite.set_defaults(func=run_datacite)
- sub_arxiv = subparsers.add_parser('arxiv')
+ sub_arxiv = subparsers.add_parser('arxiv',
+ help="harvest metadata from arxiv.org OAI-PMH endpoint (XML)")
sub_arxiv.set_defaults(func=run_arxiv)
- sub_pubmed = subparsers.add_parser('pubmed')
+ sub_pubmed = subparsers.add_parser('pubmed',
+ help="harvest MEDLINE/PubMed metadata from daily FTP updates (XML)")
sub_pubmed.set_defaults(func=run_pubmed)
# DOAJ stuff disabled because API range-requests are broken