diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 09:58:41 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 09:58:50 -0700 |
commit | 5927bee04145a8b398172a17582ed312206f7914 (patch) | |
tree | ad396e84f6cc3de0019b3c6c77268c109388293e /python/fatcat_tools/importers | |
parent | f00c0a6c8217fe9a3872c3744cfdfa9a91285ab6 (diff) | |
download | fatcat-5927bee04145a8b398172a17582ed312206f7914.tar.gz fatcat-5927bee04145a8b398172a17582ed312206f7914.zip |
better JALC and arxiv DOI checks
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/jalc.py | 2 |
2 files changed, 4 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index cbe66d8c..c8133b30 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -88,7 +88,9 @@ class ArxivRawImporter(EntityImporter): doi = None if metadata.doi and metadata.doi.string: doi = metadata.doi.string.lower().strip() - assert doi.startswith('10.') + if not (doi.startswith('10.') and '/' in doi: + sys.stderr.write("BOGUS DOI: {}\n".format(doi)) + doi = None title = latex_to_text(metadata.title.string) authors = parse_arxiv_authors(metadata.authors.string) contribs = [fatcat_client.ReleaseContrib(raw_name=a, role='author') for a in authors] diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 8dd4f3e5..fa3c108d 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -98,7 +98,7 @@ class JalcImporter(EntityImporter): doi = doi.replace('http://doi.org/', '') elif doi.startswith('https://doi.org/'): doi = doi.replace('https://doi.org/', '') - if not doi.startswith('10.'): + if not (doi.startswith('10.') and '/' in doi): sys.stderr.write("bogus JALC DOI: {}\n".format(doi)) doi = None if not doi: |