summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/grobid_metadata.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:13:15 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:49:46 -0800
commit1024e688bb12d64648ceb638daf049d508f87561 (patch)
tree8995645d370ac89d105a1cd7b1574fcba3bf5a3a /python/fatcat_tools/importers/grobid_metadata.py
parent2fd90ad2cc561fa743a617315824b2744f737575 (diff)
downloadfatcat-1024e688bb12d64648ceb638daf049d508f87561.tar.gz
fatcat-1024e688bb12d64648ceb638daf049d508f87561.zip
importers: use clean_doi() in many more (all?) importers
Diffstat (limited to 'python/fatcat_tools/importers/grobid_metadata.py')
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index e36e1b48..7c595787 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -7,6 +7,8 @@ from typing import Any, Dict, List, Optional
import fatcat_openapi_client
from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity
+from fatcat_tools.normal import clean_doi
+
from .common import EntityImporter, clean, make_rel_url
MAX_ABSTRACT_BYTES = 4096
@@ -133,9 +135,10 @@ class GrobidMetadataImporter(EntityImporter):
# only returns year, ever?
release_year = int(obj["date"][:4])
- extra = dict()
- if obj.get("doi"):
- extra["doi"] = obj["doi"]
+ extra: Dict[str, Any] = dict()
+ doi = clean_doi(obj.get("doi"))
+ if doi:
+ extra["doi"] = doi
if obj["journal"] and obj["journal"].get("name"):
extra["container_name"] = clean(obj["journal"]["name"])