aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-28 17:25:20 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-28 17:25:41 -0700
commit58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da (patch)
tree6b373b9f5ea5ddf2326d1b3651258ad68ef52aef
parent5d32a5826eced059c4bf90b40b2c41d3ff84e47a (diff)
downloadfatcat-58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da.tar.gz
fatcat-58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da.zip
fixes for grobid metadata importer
-rw-r--r--python/fatcat/importer_common.py2
-rwxr-xr-xpython/fatcat_import.py15
2 files changed, 16 insertions, 1 deletions
diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py
index 95bec8a1..8dfee875 100644
--- a/python/fatcat/importer_common.py
+++ b/python/fatcat/importer_common.py
@@ -41,7 +41,7 @@ class FatcatImporter:
for i, row in enumerate(source):
self.create_row(row, editgroup=eg.id)
if i > 0 and (i % group_size) == 0:
- self.api.accept_editgroup(eg)
+ self.api.accept_editgroup(eg.id)
eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
self.processed_lines = self.processed_lines + 1
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index c799dcd3..ce232e31 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -7,6 +7,7 @@ from fatcat.crossref_importer import FatcatCrossrefImporter
from fatcat.orcid_importer import FatcatOrcidImporter
from fatcat.issn_importer import FatcatIssnImporter
from fatcat.matched_importer import FatcatMatchedImporter
+from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter
def run_import_crossref(args):
fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,
@@ -30,6 +31,11 @@ def run_import_matched(args):
fmi.process_batch(args.json_file, size=args.batch_size)
fmi.describe_run()
+def run_import_grobid_metadata(args):
+ fmi = FatcatGrobidMetadataImporter(args.host_url)
+ fmi.process_source(args.tsv_file, group_size=args.group_size)
+ fmi.describe_run()
+
def health(args):
rfac = RawFatcatApiClient(args.host_url)
print(rfac.health())
@@ -92,6 +98,15 @@ def main():
help="size of batch to send",
default=50, type=int)
+ sub_import_grobid_metadata = subparsers.add_parser('import-grobid-metadata')
+ sub_import_grobid_metadata.set_defaults(func=run_import_grobid_metadata)
+ sub_import_grobid_metadata.add_argument('tsv_file',
+ help="TSV file to import from (or stdin)",
+ default=sys.stdin, type=argparse.FileType('r'))
+ sub_import_grobid_metadata.add_argument('--group-size',
+ help="editgroup group size to use",
+ default=75, type=int)
+
sub_health = subparsers.add_parser('health')
sub_health.set_defaults(func=health)