From 1024e688bb12d64648ceb638daf049d508f87561 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 9 Nov 2021 18:13:15 -0800 Subject: importers: use clean_doi() in many more (all?) importers --- python/fatcat_tools/importers/arxiv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools/importers/arxiv.py') diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 1d50dd9a..dd2c2284 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -9,6 +9,8 @@ from bs4 import BeautifulSoup from fatcat_openapi_client import ApiClient, ReleaseEntity from pylatexenc.latex2text import LatexNodes2Text +from fatcat_tools.normal import clean_doi + from .common import EntityImporter from .crossref import lookup_license_slug @@ -127,8 +129,8 @@ class ArxivRawImporter(EntityImporter): base_id = metadata.id.string doi = None if metadata.doi and metadata.doi.string: - doi = metadata.doi.string.lower().split()[0].strip() - if not (doi.startswith("10.") and "/" in doi and doi.split("/")[1]): + doi = clean_doi(metadata.doi.string.lower().split()[0].strip()) + if doi and not (doi.startswith("10.") and "/" in doi and doi.split("/")[1]): sys.stderr.write("BOGUS DOI: {}\n".format(doi)) doi = None title = latex_to_text(metadata.title.get_text().replace("\n", " ")) -- cgit v1.2.3