aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-17 19:16:24 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-17 19:16:24 -0700
commit351ab23bea9a7a95a9be2b18f65ef51564222a8c (patch)
tree3bb2814dc72458c022c7bc25b72e78ece39a6baa /python/sandcrawler
parentd3f2b9f75da41d8dd2ff712e34a83472e259c3af (diff)
downloadsandcrawler-351ab23bea9a7a95a9be2b18f65ef51564222a8c.tar.gz
sandcrawler-351ab23bea9a7a95a9be2b18f65ef51564222a8c.zip
ingest: small bugfix to print pdfextract status on SUCCESS
Diffstat (limited to 'python/sandcrawler')
-rw-r--r--python/sandcrawler/ingest.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index e8b3551..e8e517a 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -499,7 +499,7 @@ class IngestFileWorker(SandcrawlerWorker):
print("[SUCCESS\t] sha1:{} grobid:{} pdfextract:{}".format(
result.get('file_meta', {}).get('sha1hex'),
result.get('grobid', {}).get('status_code'),
- result.get('pdfextract', {}).get('status'),
+ result.get('pdf_meta', {}).get('status'),
),
file=sys.stderr)
return result