diff options
-rw-r--r-- | python/sandcrawler/pdfextract.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 5ef5dfd..301754a 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -87,6 +87,9 @@ class PdfExtractResult: metadata[k.lower()] = self.pdf_info[k] if 'CreationDate' in self.pdf_info: pdf_created = self.pdf_info['CreationDate'] + metadata_json: Optional[str] = None + if metadata: + metadata_json = json.dumps(metadata, sort_keys=True) return ( self.sha1hex, datetime.datetime.now(), # updated @@ -99,7 +102,7 @@ class PdfExtractResult: pdf_extra.get('permanent_id'), pdf_created, pdf_extra.get('pdf_version'), - metadata and json.dumps(metadata, sort_keys=True), + metadata_json, ) |