diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 18:01:48 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 18:01:48 -0800 | 
| commit | 0a427dae89357bef0c45830b22b5f18e894747ba (patch) | |
| tree | 2efa636f1489b230c392e44edd90640df5388b4c | |
| parent | d7d42a21b0b652496d26a10457a23fe6b615da90 (diff) | |
| download | fatcat-0a427dae89357bef0c45830b22b5f18e894747ba.tar.gz fatcat-0a427dae89357bef0c45830b22b5f18e894747ba.zip | |
batch size as a general import param
| -rwxr-xr-x | python/fatcat_import.py | 17 | 
1 files changed, 4 insertions, 13 deletions
| diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 23f7e869..eeb80b01 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -52,6 +52,9 @@ def main():      parser.add_argument('--kafka-env',          default="qa",          help="Kafka topic namespace to use (eg, prod, qa)") +    parser.add_argument('--batch-size', +        help="size of batch to send", +        default=100, type=int)      subparsers = parser.add_subparsers()      sub_crossref = subparsers.add_parser('crossref') @@ -68,9 +71,6 @@ def main():      sub_crossref.add_argument('--extid-map-file',          help="DOI-to-other-identifiers sqlite3 database",          default=None, type=str) -    sub_crossref.add_argument('--batch-size', -        help="size of batch to send", -        default=50, type=int)      sub_crossref.add_argument('--kafka-mode',          action='store_true',          help="consume from kafka topic (not stdin)") @@ -86,9 +86,6 @@ def main():      sub_orcid.add_argument('json_file',          help="orcid JSON file to import from (or stdin)",          default=sys.stdin, type=argparse.FileType('r')) -    sub_orcid.add_argument('--batch-size', -        help="size of batch to send", -        default=50, type=int)      sub_journal_metadata = subparsers.add_parser('journal-metadata')      sub_journal_metadata.set_defaults( @@ -98,9 +95,6 @@ def main():      sub_journal_metadata.add_argument('json_file',          help="Journal JSON metadata file to import from (or stdin)",          default=sys.stdin, type=argparse.FileType('r')) -    sub_journal_metadata.add_argument('--batch-size', -        help="size of batch to send", -        default=50, type=int)      sub_matched = subparsers.add_parser('matched')      sub_matched.set_defaults( @@ -110,9 +104,6 @@ def main():      sub_matched.add_argument('json_file',          help="JSON file to import from (or stdin)",          default=sys.stdin, type=argparse.FileType('r')) -    sub_matched.add_argument('--batch-size', -        help="size of batch to send", -        default=50, type=int)      sub_matched.add_argument('--bezerk-mode',          action='store_true',          help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") @@ -133,7 +124,7 @@ def main():          help="if this is an import of longtail OA content (sets an 'extra' flag)")      sub_grobid_metadata.add_argument('--bezerk-mode',          action='store_true', -        help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)") +        help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)")      args = parser.parse_args()      if not args.__dict__.get("func"): | 
