aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-11-06 19:16:31 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-11-06 19:16:31 -0800
commitb1b34d44ce1a416ee70be665b71b99ba9f98d9a3 (patch)
tree7ecd5b8478a12aad48b26517360bde2a54e9c977 /python/fatcat_ingest.py
parentf32ff2bd5ab1dba1dc3108b75b28ce4090d9c00f (diff)
downloadfatcat-b1b34d44ce1a416ee70be665b71b99ba9f98d9a3.tar.gz
fatcat-b1b34d44ce1a416ee70be665b71b99ba9f98d9a3.zip
ingest tool: support for setting ingest type
Diffstat (limited to 'python/fatcat_ingest.py')
-rwxr-xr-xpython/fatcat_ingest.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/fatcat_ingest.py b/python/fatcat_ingest.py
index 68676ad2..b9d71a7c 100755
--- a/python/fatcat_ingest.py
+++ b/python/fatcat_ingest.py
@@ -87,6 +87,7 @@ def _run_search_dump(args, search):
ingest_request = release_ingest_request(
release,
ingest_request_source="fatcat-ingest",
+ ingest_type=args.ingest_type,
)
if not ingest_request:
continue
@@ -214,6 +215,9 @@ def main():
parser.add_argument('--force-recrawl',
action='store_true',
help="Tell ingest worker to skip GWB history lookup and do SPNv2 crawl")
+ parser.add_argument('--ingest-type',
+ default="pdf",
+ help="What medium to ingest (pdf, xml, html)")
subparsers = parser.add_subparsers()
sub_container = subparsers.add_parser('container',