diff options
-rwxr-xr-x | fatcat_scholar/grobid2json.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/fatcat_scholar/grobid2json.py b/fatcat_scholar/grobid2json.py index 898275b..4c8543a 100755 --- a/fatcat_scholar/grobid2json.py +++ b/fatcat_scholar/grobid2json.py @@ -121,6 +121,8 @@ def biblio_info(elem: ET.Element) -> Dict[str, Any]: ref["issue"] = elem.findtext('.//{%s}biblScope[@unit="issue"]' % ns) ref["doi"] = elem.findtext('.//{%s}idno[@type="DOI"]' % ns) ref["arxiv_id"] = elem.findtext('.//{%s}idno[@type="arXiv"]' % ns) + if ref["arxiv_id"] and ref["arxiv_id"].startswith("arXiv:"): + ref["arxiv_id"] = ref["arxiv_id"][6:] ref["pmcid"] = elem.findtext('.//{%s}idno[@type="PMCID"]' % ns) ref["pmid"] = elem.findtext('.//{%s}idno[@type="PMID"]' % ns) el = elem.find('.//{%s}biblScope[@unit="page"]' % ns) |