diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-13 15:53:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-13 16:21:31 -0700 |
commit | 2d755c83895271ad214dcefc234bf7da36e572e3 (patch) | |
tree | 556f025b19d9178e5b07fc6ea535d9b73b013645 /python | |
parent | a2799486ed1c43b95bef036375023d225c482bab (diff) | |
download | fatcat-2d755c83895271ad214dcefc234bf7da36e572e3.tar.gz fatcat-2d755c83895271ad214dcefc234bf7da36e572e3.zip |
dblp import: basic support for handles as identifiers
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/importers/dblp_release.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index c64e8b09..670f190b 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -30,7 +30,7 @@ from typing import List, Optional, Any import fatcat_openapi_client from fatcat_tools.normal import (clean_doi, clean_str, parse_month, - clean_orcid, + clean_orcid, clean_hdl, clean_arxiv_id, clean_wikidata_qid, clean_isbn13) from fatcat_tools.importers.common import EntityImporter from fatcat_tools.transforms import entity_to_dict @@ -483,6 +483,7 @@ class DblpReleaseImporter(EntityImporter): doi: Optional[str] = None wikidata_qid: Optional[str] = None arxiv_id: Optional[str] = None + hdl: Optional[str] = None for ee in xml_elem.find_all('ee'): url = ee.text # convert DOI-like domains, which mostly have DOIs anyways @@ -498,12 +499,15 @@ class DblpReleaseImporter(EntityImporter): elif '://arxiv.org/abs/' in url and not arxiv_id: arxiv_id = url.replace('http://', '').replace('https://', '').replace('arxiv.org/abs/', '') arxiv_id = clean_arxiv_id(arxiv_id) + elif '://hdl.handle.net' in url and not hdl: + hdl = clean_hdl(url) return fatcat_openapi_client.ReleaseExtIds( dblp=dblp_key, doi=doi, wikidata_qid=wikidata_qid, arxiv=arxiv_id, + hdl=hdl, ) def dblp_ext_urls(self, xml_elem: Any) -> List[str]: |