diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 14:57:03 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 14:57:03 -0700 |
commit | d14715279f03fae6cf0fe1998d7845e3a245e86e (patch) | |
tree | 1baeedf60e3ed5fcf2eaca50334fa10f7ca9a53b | |
parent | f651f171499d011a83a247eefb23c4e8a789b1c8 (diff) | |
download | sandcrawler-d14715279f03fae6cf0fe1998d7845e3a245e86e.tar.gz sandcrawler-d14715279f03fae6cf0fe1998d7845e3a245e86e.zip |
ensure pdf_meta isn't passed an empty dict()
-rw-r--r-- | python/sandcrawler/pdfextract.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 5ef5dfd..301754a 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -87,6 +87,9 @@ class PdfExtractResult: metadata[k.lower()] = self.pdf_info[k] if 'CreationDate' in self.pdf_info: pdf_created = self.pdf_info['CreationDate'] + metadata_json: Optional[str] = None + if metadata: + metadata_json = json.dumps(metadata, sort_keys=True) return ( self.sha1hex, datetime.datetime.now(), # updated @@ -99,7 +102,7 @@ class PdfExtractResult: pdf_extra.get('permanent_id'), pdf_created, pdf_extra.get('pdf_version'), - metadata and json.dumps(metadata, sort_keys=True), + metadata_json, ) |