From 1024e688bb12d64648ceb638daf049d508f87561 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 9 Nov 2021 18:13:15 -0800 Subject: importers: use clean_doi() in many more (all?) importers --- python/fatcat_tools/importers/grobid_metadata.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'python/fatcat_tools/importers/grobid_metadata.py') diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index e36e1b48..7c595787 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -7,6 +7,8 @@ from typing import Any, Dict, List, Optional import fatcat_openapi_client from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity +from fatcat_tools.normal import clean_doi + from .common import EntityImporter, clean, make_rel_url MAX_ABSTRACT_BYTES = 4096 @@ -133,9 +135,10 @@ class GrobidMetadataImporter(EntityImporter): # only returns year, ever? release_year = int(obj["date"][:4]) - extra = dict() - if obj.get("doi"): - extra["doi"] = obj["doi"] + extra: Dict[str, Any] = dict() + doi = clean_doi(obj.get("doi")) + if doi: + extra["doi"] = doi if obj["journal"] and obj["journal"].get("name"): extra["container_name"] = clean(obj["journal"]["name"]) -- cgit v1.2.3