From 5927bee04145a8b398172a17582ed312206f7914 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 22 May 2019 09:58:41 -0700 Subject: better JALC and arxiv DOI checks --- python/fatcat_tools/importers/arxiv.py | 4 +++- python/fatcat_tools/importers/jalc.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index cbe66d8c..c8133b30 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -88,7 +88,9 @@ class ArxivRawImporter(EntityImporter): doi = None if metadata.doi and metadata.doi.string: doi = metadata.doi.string.lower().strip() - assert doi.startswith('10.') + if not (doi.startswith('10.') and '/' in doi: + sys.stderr.write("BOGUS DOI: {}\n".format(doi)) + doi = None title = latex_to_text(metadata.title.string) authors = parse_arxiv_authors(metadata.authors.string) contribs = [fatcat_client.ReleaseContrib(raw_name=a, role='author') for a in authors] diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 8dd4f3e5..fa3c108d 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -98,7 +98,7 @@ class JalcImporter(EntityImporter): doi = doi.replace('http://doi.org/', '') elif doi.startswith('https://doi.org/'): doi = doi.replace('https://doi.org/', '') - if not doi.startswith('10.'): + if not (doi.startswith('10.') and '/' in doi): sys.stderr.write("bogus JALC DOI: {}\n".format(doi)) doi = None if not doi: -- cgit v1.2.3