aboutsummaryrefslogtreecommitdiffstats
path: root/python/persist_tool.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/persist_tool.py')
-rwxr-xr-xpython/persist_tool.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/persist_tool.py b/python/persist_tool.py
index 7187719..80b1156 100755
--- a/python/persist_tool.py
+++ b/python/persist_tool.py
@@ -63,6 +63,17 @@ def run_grobid_disk(args):
)
pusher.run()
+def run_pdftrio(args):
+ worker = PersistPdfTrioWorker(
+ db_url=args.db_url,
+ )
+ pusher = JsonLinePusher(
+ worker,
+ args.json_file,
+ batch_size=100,
+ )
+ pusher.run()
+
def run_ingest_file_result(args):
worker = PersistIngestFileResultWorker(
db_url=args.db_url,
@@ -124,6 +135,13 @@ def main():
help="base directory to output into",
type=str)
+ sub_pdftrio = subparsers.add_parser('pdftrio',
+ help="backfill a pdftrio JSON ('pg') dump into postgresql and s3 (minio)")
+ sub_pdftrio.set_defaults(func=run_pdftrio)
+ sub_pdftrio.add_argument('json_file',
+ help="pdftrio file to import from (or '-' for stdin)",
+ type=argparse.FileType('r'))
+
sub_ingest_file_result = subparsers.add_parser('ingest-file-result',
help="backfill a ingest_file_result JSON dump into postgresql")
sub_ingest_file_result.set_defaults(func=run_ingest_file_result)