diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:56:18 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:56:18 -0800 | 
| commit | b1efd59c2cad275d126a1bde67c11430d71878db (patch) | |
| tree | 819e114ca19bc77c1284db90ad5bc7ae84268ad0 /python | |
| parent | 1177dafb9b185c7b749ff95ded1a0720792fbb5e (diff) | |
| download | fatcat-b1efd59c2cad275d126a1bde67c11430d71878db.tar.gz fatcat-b1efd59c2cad275d126a1bde67c11430d71878db.zip | |
chocula importer: handle broken ISSNs in extra metadata
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/chocula.py | 9 | 
1 files changed, 7 insertions, 2 deletions
| diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index c2f2199d..a9682a17 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -146,9 +146,14 @@ class ChoculaImporter(EntityImporter):              existing.publication_status = existing.publication_status or ce.publication_status              # move issne/issnp from "extra" to top-level fields (new schema)              if not existing.issne: -                existing.issne = existing.extra.pop("issne", None) +                tmp_issn = existing.extra.pop("issne", None) +                # clean up bad ISSNs in extra metadata +                if tmp_issn and len(tmp_issn) == 9: +                    existing.issne = tmp_issn              if not existing.issnp: -                existing.issnp = existing.extra.pop("issnp", None) +                tmp_issn = existing.extra.pop("issnp", None) +                if tmp_issn and len(tmp_issn) == 9: +                    existing.issnp = tmp_issn              existing.issne = existing.issne or ce.issne              existing.issnp = existing.issnp or ce.issnp              for k in ("urls", "webarchive_urls"): | 
