From d14715279f03fae6cf0fe1998d7845e3a245e86e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 25 Jun 2020 14:57:03 -0700 Subject: ensure pdf_meta isn't passed an empty dict() --- python/sandcrawler/pdfextract.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 5ef5dfd..301754a 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -87,6 +87,9 @@ class PdfExtractResult: metadata[k.lower()] = self.pdf_info[k] if 'CreationDate' in self.pdf_info: pdf_created = self.pdf_info['CreationDate'] + metadata_json: Optional[str] = None + if metadata: + metadata_json = json.dumps(metadata, sort_keys=True) return ( self.sha1hex, datetime.datetime.now(), # updated @@ -99,7 +102,7 @@ class PdfExtractResult: pdf_extra.get('permanent_id'), pdf_created, pdf_extra.get('pdf_version'), - metadata and json.dumps(metadata, sort_keys=True), + metadata_json, ) -- cgit v1.2.3