summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-10-13 15:53:32 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-10-13 16:21:31 -0700
commit2d755c83895271ad214dcefc234bf7da36e572e3 (patch)
tree556f025b19d9178e5b07fc6ea535d9b73b013645
parenta2799486ed1c43b95bef036375023d225c482bab (diff)
downloadfatcat-2d755c83895271ad214dcefc234bf7da36e572e3.tar.gz
fatcat-2d755c83895271ad214dcefc234bf7da36e572e3.zip
dblp import: basic support for handles as identifiers
-rw-r--r--python/fatcat_tools/importers/dblp_release.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index c64e8b09..670f190b 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -30,7 +30,7 @@ from typing import List, Optional, Any
import fatcat_openapi_client
from fatcat_tools.normal import (clean_doi, clean_str, parse_month,
- clean_orcid,
+ clean_orcid, clean_hdl,
clean_arxiv_id, clean_wikidata_qid, clean_isbn13)
from fatcat_tools.importers.common import EntityImporter
from fatcat_tools.transforms import entity_to_dict
@@ -483,6 +483,7 @@ class DblpReleaseImporter(EntityImporter):
doi: Optional[str] = None
wikidata_qid: Optional[str] = None
arxiv_id: Optional[str] = None
+ hdl: Optional[str] = None
for ee in xml_elem.find_all('ee'):
url = ee.text
# convert DOI-like domains, which mostly have DOIs anyways
@@ -498,12 +499,15 @@ class DblpReleaseImporter(EntityImporter):
elif '://arxiv.org/abs/' in url and not arxiv_id:
arxiv_id = url.replace('http://', '').replace('https://', '').replace('arxiv.org/abs/', '')
arxiv_id = clean_arxiv_id(arxiv_id)
+ elif '://hdl.handle.net' in url and not hdl:
+ hdl = clean_hdl(url)
return fatcat_openapi_client.ReleaseExtIds(
dblp=dblp_key,
doi=doi,
wikidata_qid=wikidata_qid,
arxiv=arxiv_id,
+ hdl=hdl,
)
def dblp_ext_urls(self, xml_elem: Any) -> List[str]: