diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:56:18 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:56:18 -0800 |
commit | b1efd59c2cad275d126a1bde67c11430d71878db (patch) | |
tree | 819e114ca19bc77c1284db90ad5bc7ae84268ad0 /python/fatcat_tools/importers | |
parent | 1177dafb9b185c7b749ff95ded1a0720792fbb5e (diff) | |
download | fatcat-b1efd59c2cad275d126a1bde67c11430d71878db.tar.gz fatcat-b1efd59c2cad275d126a1bde67c11430d71878db.zip |
chocula importer: handle broken ISSNs in extra metadata
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/chocula.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index c2f2199d..a9682a17 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -146,9 +146,14 @@ class ChoculaImporter(EntityImporter): existing.publication_status = existing.publication_status or ce.publication_status # move issne/issnp from "extra" to top-level fields (new schema) if not existing.issne: - existing.issne = existing.extra.pop("issne", None) + tmp_issn = existing.extra.pop("issne", None) + # clean up bad ISSNs in extra metadata + if tmp_issn and len(tmp_issn) == 9: + existing.issne = tmp_issn if not existing.issnp: - existing.issnp = existing.extra.pop("issnp", None) + tmp_issn = existing.extra.pop("issnp", None) + if tmp_issn and len(tmp_issn) == 9: + existing.issnp = tmp_issn existing.issne = existing.issne or ce.issne existing.issnp = existing.issnp or ce.issnp for k in ("urls", "webarchive_urls"): |