summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-24 12:00:01 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-24 12:00:01 -0800
commitcb9b2107fbfa9cd3a9111851a371a20ab0e08bdf (patch)
treede8e1b50d7e0fe41f14c4b51dace6a30d04cdeb4
parent42bc443ccedbbf42510f06b1d5531c06f2b15bed (diff)
downloadfatcat-cb9b2107fbfa9cd3a9111851a371a20ab0e08bdf.tar.gz
fatcat-cb9b2107fbfa9cd3a9111851a371a20ab0e08bdf.zip
dblp release import: skip arxiv_id releases
-rw-r--r--python/fatcat_tools/importers/dblp_release.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index 5aea4429..daecd765 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -323,6 +323,15 @@ class DblpReleaseImporter(EntityImporter):
if err.status != 404:
raise err
+ # Just skip all releases with an arxiv_id for now. Have not decided
+ # what to do about grouping works and lookup of un-versioned arxiv_id
+ # yet. Note that this means we will lack coverage of some works which
+ # have an arxiv preprint, but in those cases we will presumably at
+ # least have the pre-print copy/record.
+ if re.ext_ids.arxiv:
+ self.counts["skip-arxiv"] += 1
+ return False
+
# then try other ext_id lookups
if not existing:
for extid_type in ('doi', 'wikidata_qid', 'isbn13', 'arxiv'):