diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-02-18 19:02:48 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-02-18 19:04:30 -0800 |
commit | 3f8ead3d07bed78c750b9f6a8b7e95ebffeff089 (patch) | |
tree | 319d88e3b6fed2004b168217cfffa7d76857f901 /python/sandcrawler/pdftrio.py | |
parent | 4cf5345040b4e8a5d77ca3ceb0f7ea4f8c5778dc (diff) | |
download | sandcrawler-3f8ead3d07bed78c750b9f6a8b7e95ebffeff089.tar.gz sandcrawler-3f8ead3d07bed78c750b9f6a8b7e95ebffeff089.zip |
unpaywall2ingestrequest transform script
Diffstat (limited to 'python/sandcrawler/pdftrio.py')
-rw-r--r-- | python/sandcrawler/pdftrio.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdftrio.py b/python/sandcrawler/pdftrio.py index 7a2e53c..12be9eb 100644 --- a/python/sandcrawler/pdftrio.py +++ b/python/sandcrawler/pdftrio.py @@ -180,7 +180,7 @@ class PdfTrioBlobWorker(SandcrawlerWorker): result = dict() result['file_meta'] = gen_file_metadata(blob) result['key'] = result['file_meta']['sha1hex'] - result['pdf_trio'] = self.pdftrio_client.classify_pdf(blob, mode=mode) + result['pdf_trio'] = self.pdftrio_client.classify_pdf(blob, mode=self.mode) result['timing'] = dict( pdftrio_sec=result['pdf_trio'].pop('_total_sec', None), total_sec=time.time() - start_process, |