diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-14 16:05:55 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-14 16:05:57 -0700 |
commit | 67c2dd909de3c5dada6efe8db2f59ed09e76d439 (patch) | |
tree | 144fdd899cdabdde86efa5ecc002f81228fcce2f /python/fatcat_tools/workers | |
parent | b2bb070a161b4e4b05ab51ab4ab7bae6d1290e4a (diff) | |
download | fatcat-67c2dd909de3c5dada6efe8db2f59ed09e76d439.tar.gz fatcat-67c2dd909de3c5dada6efe8db2f59ed09e76d439.zip |
entity updater: es['publisher_type'] not always set
This is a small bugfix for a production issue.
Diffstat (limited to 'python/fatcat_tools/workers')
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 65a8fcd8..b0dca6eb 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -212,7 +212,7 @@ class EntityUpdatesWorker(FatcatWorker): # big publishers *generally* have accurate OA metadata, use # preservation networks, and block our crawlers. So unless OA, or # explicitly on accept list, or not preserved, skip crawling - if es['publisher_type'] == 'big5' and es['is_preserved'] and not (es['is_oa'] or in_acceptlist): + if es.get('publisher_type') == 'big5' and es.get('is_preserved') and not (es['is_oa'] or in_acceptlist): return False # if ingest_type is pdf but release_type is almost certainly not a PDF, |