diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 18:01:48 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 18:01:48 -0800 |
commit | 0a427dae89357bef0c45830b22b5f18e894747ba (patch) | |
tree | 2efa636f1489b230c392e44edd90640df5388b4c /python/fatcat_import.py | |
parent | d7d42a21b0b652496d26a10457a23fe6b615da90 (diff) | |
download | fatcat-0a427dae89357bef0c45830b22b5f18e894747ba.tar.gz fatcat-0a427dae89357bef0c45830b22b5f18e894747ba.zip |
batch size as a general import param
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 17 |
1 files changed, 4 insertions, 13 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 23f7e869..eeb80b01 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -52,6 +52,9 @@ def main(): parser.add_argument('--kafka-env', default="qa", help="Kafka topic namespace to use (eg, prod, qa)") + parser.add_argument('--batch-size', + help="size of batch to send", + default=100, type=int) subparsers = parser.add_subparsers() sub_crossref = subparsers.add_parser('crossref') @@ -68,9 +71,6 @@ def main(): sub_crossref.add_argument('--extid-map-file', help="DOI-to-other-identifiers sqlite3 database", default=None, type=str) - sub_crossref.add_argument('--batch-size', - help="size of batch to send", - default=50, type=int) sub_crossref.add_argument('--kafka-mode', action='store_true', help="consume from kafka topic (not stdin)") @@ -86,9 +86,6 @@ def main(): sub_orcid.add_argument('json_file', help="orcid JSON file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) - sub_orcid.add_argument('--batch-size', - help="size of batch to send", - default=50, type=int) sub_journal_metadata = subparsers.add_parser('journal-metadata') sub_journal_metadata.set_defaults( @@ -98,9 +95,6 @@ def main(): sub_journal_metadata.add_argument('json_file', help="Journal JSON metadata file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) - sub_journal_metadata.add_argument('--batch-size', - help="size of batch to send", - default=50, type=int) sub_matched = subparsers.add_parser('matched') sub_matched.set_defaults( @@ -110,9 +104,6 @@ def main(): sub_matched.add_argument('json_file', help="JSON file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) - sub_matched.add_argument('--batch-size', - help="size of batch to send", - default=50, type=int) sub_matched.add_argument('--bezerk-mode', action='store_true', help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") @@ -133,7 +124,7 @@ def main(): help="if this is an import of longtail OA content (sets an 'extra' flag)") sub_grobid_metadata.add_argument('--bezerk-mode', action='store_true', - help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)") + help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") args = parser.parse_args() if not args.__dict__.get("func"): |