From b1efd59c2cad275d126a1bde67c11430d71878db Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 30 Nov 2021 16:56:18 -0800 Subject: chocula importer: handle broken ISSNs in extra metadata --- python/fatcat_tools/importers/chocula.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index c2f2199d..a9682a17 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -146,9 +146,14 @@ class ChoculaImporter(EntityImporter): existing.publication_status = existing.publication_status or ce.publication_status # move issne/issnp from "extra" to top-level fields (new schema) if not existing.issne: - existing.issne = existing.extra.pop("issne", None) + tmp_issn = existing.extra.pop("issne", None) + # clean up bad ISSNs in extra metadata + if tmp_issn and len(tmp_issn) == 9: + existing.issne = tmp_issn if not existing.issnp: - existing.issnp = existing.extra.pop("issnp", None) + tmp_issn = existing.extra.pop("issnp", None) + if tmp_issn and len(tmp_issn) == 9: + existing.issnp = tmp_issn existing.issne = existing.issne or ce.issne existing.issnp = existing.issnp or ce.issnp for k in ("urls", "webarchive_urls"): -- cgit v1.2.3