From cb9b2107fbfa9cd3a9111851a371a20ab0e08bdf Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 24 Dec 2020 12:00:01 -0800 Subject: dblp release import: skip arxiv_id releases --- python/fatcat_tools/importers/dblp_release.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index 5aea4429..daecd765 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -323,6 +323,15 @@ class DblpReleaseImporter(EntityImporter): if err.status != 404: raise err + # Just skip all releases with an arxiv_id for now. Have not decided + # what to do about grouping works and lookup of un-versioned arxiv_id + # yet. Note that this means we will lack coverage of some works which + # have an arxiv preprint, but in those cases we will presumably at + # least have the pre-print copy/record. + if re.ext_ids.arxiv: + self.counts["skip-arxiv"] += 1 + return False + # then try other ext_id lookups if not existing: for extid_type in ('doi', 'wikidata_qid', 'isbn13', 'arxiv'): -- cgit v1.2.3