aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/arxiv.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:13:15 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:49:46 -0800
commit1024e688bb12d64648ceb638daf049d508f87561 (patch)
tree8995645d370ac89d105a1cd7b1574fcba3bf5a3a /python/fatcat_tools/importers/arxiv.py
parent2fd90ad2cc561fa743a617315824b2744f737575 (diff)
downloadfatcat-1024e688bb12d64648ceb638daf049d508f87561.tar.gz
fatcat-1024e688bb12d64648ceb638daf049d508f87561.zip
importers: use clean_doi() in many more (all?) importers
Diffstat (limited to 'python/fatcat_tools/importers/arxiv.py')
-rw-r--r--python/fatcat_tools/importers/arxiv.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 1d50dd9a..dd2c2284 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -9,6 +9,8 @@ from bs4 import BeautifulSoup
from fatcat_openapi_client import ApiClient, ReleaseEntity
from pylatexenc.latex2text import LatexNodes2Text
+from fatcat_tools.normal import clean_doi
+
from .common import EntityImporter
from .crossref import lookup_license_slug
@@ -127,8 +129,8 @@ class ArxivRawImporter(EntityImporter):
base_id = metadata.id.string
doi = None
if metadata.doi and metadata.doi.string:
- doi = metadata.doi.string.lower().split()[0].strip()
- if not (doi.startswith("10.") and "/" in doi and doi.split("/")[1]):
+ doi = clean_doi(metadata.doi.string.lower().split()[0].strip())
+ if doi and not (doi.startswith("10.") and "/" in doi and doi.split("/")[1]):
sys.stderr.write("BOGUS DOI: {}\n".format(doi))
doi = None
title = latex_to_text(metadata.title.get_text().replace("\n", " "))