From 58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 28 Sep 2018 17:25:20 -0700 Subject: fixes for grobid metadata importer --- python/fatcat/importer_common.py | 2 +- python/fatcat_import.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index 95bec8a1..8dfee875 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -41,7 +41,7 @@ class FatcatImporter: for i, row in enumerate(source): self.create_row(row, editgroup=eg.id) if i > 0 and (i % group_size) == 0: - self.api.accept_editgroup(eg) + self.api.accept_editgroup(eg.id) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) self.processed_lines = self.processed_lines + 1 diff --git a/python/fatcat_import.py b/python/fatcat_import.py index c799dcd3..ce232e31 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -7,6 +7,7 @@ from fatcat.crossref_importer import FatcatCrossrefImporter from fatcat.orcid_importer import FatcatOrcidImporter from fatcat.issn_importer import FatcatIssnImporter from fatcat.matched_importer import FatcatMatchedImporter +from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter def run_import_crossref(args): fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file, @@ -30,6 +31,11 @@ def run_import_matched(args): fmi.process_batch(args.json_file, size=args.batch_size) fmi.describe_run() +def run_import_grobid_metadata(args): + fmi = FatcatGrobidMetadataImporter(args.host_url) + fmi.process_source(args.tsv_file, group_size=args.group_size) + fmi.describe_run() + def health(args): rfac = RawFatcatApiClient(args.host_url) print(rfac.health()) @@ -92,6 +98,15 @@ def main(): help="size of batch to send", default=50, type=int) + sub_import_grobid_metadata = subparsers.add_parser('import-grobid-metadata') + sub_import_grobid_metadata.set_defaults(func=run_import_grobid_metadata) + sub_import_grobid_metadata.add_argument('tsv_file', + help="TSV file to import from (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + sub_import_grobid_metadata.add_argument('--group-size', + help="editgroup group size to use", + default=75, type=int) + sub_health = subparsers.add_parser('health') sub_health.set_defaults(func=health) -- cgit v1.2.3