summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-23 11:44:10 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-23 11:44:10 -0700
commit9fbab0defca5016c0eb5b851ff73e03efac4fac8 (patch)
tree1666205d851bb095bb882349471fad41227e623b
parent475856250e598216366ed1982e523e883b652139 (diff)
downloadfatcat-9fbab0defca5016c0eb5b851ff73e03efac4fac8.tar.gz
fatcat-9fbab0defca5016c0eb5b851ff73e03efac4fac8.zip
set superceded flag on 'old' arxiv releases
-rw-r--r--python/fatcat_tools/importers/arxiv.py5
-rw-r--r--python/tests/import_arxiv.py3
2 files changed, 7 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 182d0471..71b2d134 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -198,6 +198,7 @@ class ArxivRawImporter(EntityImporter):
# group-title
# arxiv: comments, categories, etc
extra_arxiv['base_id'] = base_id
+ extra['superceded'] = True
extra['arxiv'] = extra_arxiv
versions = []
@@ -223,11 +224,13 @@ class ArxivRawImporter(EntityImporter):
license_slug=license_slug,
abstracts=abstracts,
contribs=contribs,
- extra=extra,
+ extra=extra.copy(),
))
# TODO: assert that versions are actually in order?
assert versions
+ versions[-1].extra.pop('superceded')
+
# only apply DOI to most recent version (HACK)
if doi:
versions[-1].ext_ids.doi = doi
diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py
index bbad8fa6..1e649616 100644
--- a/python/tests/import_arxiv.py
+++ b/python/tests/import_arxiv.py
@@ -71,6 +71,7 @@ def test_arxiv_xml_parse(arxiv_importer):
assert len(r1.contribs) == 4
assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an']
assert r1.extra['arxiv']['base_id'] == '1810.09584'
+ assert r1.extra['superceded'] == True
assert r1.contribs[0].raw_name == "Raphael Chetrite"
assert r1.contribs[0].role == "author"
@@ -92,6 +93,8 @@ def test_arxiv_xml_parse(arxiv_importer):
assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures"
assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"]
+ assert not r2.extra.get('superceded')
+ r2.extra['superceded'] = True
assert r1.extra == r2.extra
assert not r1.refs