diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 19:12:05 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 19:12:05 -0700 |
commit | 02b3a659a951a6611eed59ad5078d5889119bedd (patch) | |
tree | aebe58504753af07cfe9ba9c5d9de54907077d21 /python/fatcat_tools | |
parent | f3f72f11d1cddbb05455066ff58e05f115d042d8 (diff) | |
download | fatcat-02b3a659a951a6611eed59ad5078d5889119bedd.tar.gz fatcat-02b3a659a951a6611eed59ad5078d5889119bedd.zip |
better JALC DOI de-mangling
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/jalc.py | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 1e1c9032..24da60ea 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -90,10 +90,19 @@ class JalcImporter(EntityImporter): doi = None if record.doi: doi = record.doi.string.lower().strip() + if doi.startswith('http://dx.doi.org/'): + doi = doi.replace('http://dx.doi.org/', '') + elif doi.startswith('https://dx.doi.org/'): + doi = doi.replace('https://dx.doi.org/', '') + elif doi.startswith('http://doi.org/'): + doi = doi.replace('http://doi.org/', '') + elif doi.startswith('https://doi.org/'): + doi = doi.replace('https://doi.org/', '') if not doi.startswith('10.'): sys.stderr.write("bogus JALC DOI: {}\n".format(doi)) doi = None - return None + if not doi: + return None contribs = [] people = record.find_all("Person") |