diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-24 14:03:07 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-24 14:03:07 -0800 |
commit | 7222131f172ef26eebf964e8b17b024d7ccebb24 (patch) | |
tree | 8d7c6f1d0de941dc4201c1ff56602045662c0de2 | |
parent | 19bbc2f1959f19b8ec8e7f0609442d67509e49a8 (diff) | |
download | fatcat-7222131f172ef26eebf964e8b17b024d7ccebb24.tar.gz fatcat-7222131f172ef26eebf964e8b17b024d7ccebb24.zip |
make chocula URL updates more conservative
-rw-r--r-- | python/fatcat_tools/importers/chocula.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index 6915ba98..eea50314 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -128,15 +128,15 @@ class ChoculaImporter(EntityImporter): existing.publisher = ce.publisher existing.container_type = existing.container_type or ce.container_type for k in ('urls', 'webarchive_urls'): - # update, or clobber/remove any existing values. often - # want/need to remove dead URLs + # update, which might clobber, but won't remove if ce.extra.get(k): existing.extra[k] = ce.extra.get(k, []) - elif k in existing.extra.keys(): - existing.extra.pop(k) + # note: in some cases we might *want* to clobber existing (if + # all URLs found to be bad), but being conservative for now so + # we don't clobber human edits for k in ('issne', 'issnp', 'country', 'sherpa_romeo', 'ezb', 'szczepanski', 'doaj'): - # update, but don't remove any existing value + # update/overwrite, but don't remove any existing value if ce.extra.get(k): existing.extra[k] = ce.extra[k] if ce.extra.get('languages'): |