summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-22 09:58:41 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-22 09:58:50 -0700
commit5927bee04145a8b398172a17582ed312206f7914 (patch)
treead396e84f6cc3de0019b3c6c77268c109388293e /python/fatcat_tools/importers
parentf00c0a6c8217fe9a3872c3744cfdfa9a91285ab6 (diff)
downloadfatcat-5927bee04145a8b398172a17582ed312206f7914.tar.gz
fatcat-5927bee04145a8b398172a17582ed312206f7914.zip
better JALC and arxiv DOI checks
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/arxiv.py4
-rw-r--r--python/fatcat_tools/importers/jalc.py2
2 files changed, 4 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index cbe66d8c..c8133b30 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -88,7 +88,9 @@ class ArxivRawImporter(EntityImporter):
doi = None
if metadata.doi and metadata.doi.string:
doi = metadata.doi.string.lower().strip()
- assert doi.startswith('10.')
+ if not (doi.startswith('10.') and '/' in doi:
+ sys.stderr.write("BOGUS DOI: {}\n".format(doi))
+ doi = None
title = latex_to_text(metadata.title.string)
authors = parse_arxiv_authors(metadata.authors.string)
contribs = [fatcat_client.ReleaseContrib(raw_name=a, role='author') for a in authors]
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 8dd4f3e5..fa3c108d 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -98,7 +98,7 @@ class JalcImporter(EntityImporter):
doi = doi.replace('http://doi.org/', '')
elif doi.startswith('https://doi.org/'):
doi = doi.replace('https://doi.org/', '')
- if not doi.startswith('10.'):
+ if not (doi.startswith('10.') and '/' in doi):
sys.stderr.write("bogus JALC DOI: {}\n".format(doi))
doi = None
if not doi: