diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 17:25:20 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 17:25:41 -0700 |
commit | 58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da (patch) | |
tree | 6b373b9f5ea5ddf2326d1b3651258ad68ef52aef /python/fatcat_import.py | |
parent | 5d32a5826eced059c4bf90b40b2c41d3ff84e47a (diff) | |
download | fatcat-58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da.tar.gz fatcat-58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da.zip |
fixes for grobid metadata importer
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index c799dcd3..ce232e31 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -7,6 +7,7 @@ from fatcat.crossref_importer import FatcatCrossrefImporter from fatcat.orcid_importer import FatcatOrcidImporter from fatcat.issn_importer import FatcatIssnImporter from fatcat.matched_importer import FatcatMatchedImporter +from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter def run_import_crossref(args): fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file, @@ -30,6 +31,11 @@ def run_import_matched(args): fmi.process_batch(args.json_file, size=args.batch_size) fmi.describe_run() +def run_import_grobid_metadata(args): + fmi = FatcatGrobidMetadataImporter(args.host_url) + fmi.process_source(args.tsv_file, group_size=args.group_size) + fmi.describe_run() + def health(args): rfac = RawFatcatApiClient(args.host_url) print(rfac.health()) @@ -92,6 +98,15 @@ def main(): help="size of batch to send", default=50, type=int) + sub_import_grobid_metadata = subparsers.add_parser('import-grobid-metadata') + sub_import_grobid_metadata.set_defaults(func=run_import_grobid_metadata) + sub_import_grobid_metadata.add_argument('tsv_file', + help="TSV file to import from (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + sub_import_grobid_metadata.add_argument('--group-size', + help="editgroup group size to use", + default=75, type=int) + sub_health = subparsers.add_parser('health') sub_health.set_defaults(func=health) |