summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/grobid2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar/grobid2json.py')
-rwxr-xr-xfatcat_scholar/grobid2json.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/fatcat_scholar/grobid2json.py b/fatcat_scholar/grobid2json.py
index 898275b..4c8543a 100755
--- a/fatcat_scholar/grobid2json.py
+++ b/fatcat_scholar/grobid2json.py
@@ -121,6 +121,8 @@ def biblio_info(elem: ET.Element) -> Dict[str, Any]:
ref["issue"] = elem.findtext('.//{%s}biblScope[@unit="issue"]' % ns)
ref["doi"] = elem.findtext('.//{%s}idno[@type="DOI"]' % ns)
ref["arxiv_id"] = elem.findtext('.//{%s}idno[@type="arXiv"]' % ns)
+ if ref["arxiv_id"] and ref["arxiv_id"].startswith("arXiv:"):
+ ref["arxiv_id"] = ref["arxiv_id"][6:]
ref["pmcid"] = elem.findtext('.//{%s}idno[@type="PMCID"]' % ns)
ref["pmid"] = elem.findtext('.//{%s}idno[@type="PMID"]' % ns)
el = elem.find('.//{%s}biblScope[@unit="page"]' % ns)