From 9fbab0defca5016c0eb5b851ff73e03efac4fac8 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 May 2019 11:44:10 -0700 Subject: set superceded flag on 'old' arxiv releases --- python/fatcat_tools/importers/arxiv.py | 5 ++++- python/tests/import_arxiv.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 182d0471..71b2d134 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -198,6 +198,7 @@ class ArxivRawImporter(EntityImporter): # group-title # arxiv: comments, categories, etc extra_arxiv['base_id'] = base_id + extra['superceded'] = True extra['arxiv'] = extra_arxiv versions = [] @@ -223,11 +224,13 @@ class ArxivRawImporter(EntityImporter): license_slug=license_slug, abstracts=abstracts, contribs=contribs, - extra=extra, + extra=extra.copy(), )) # TODO: assert that versions are actually in order? assert versions + versions[-1].extra.pop('superceded') + # only apply DOI to most recent version (HACK) if doi: versions[-1].ext_ids.doi = doi diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index bbad8fa6..1e649616 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -71,6 +71,7 @@ def test_arxiv_xml_parse(arxiv_importer): assert len(r1.contribs) == 4 assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an'] assert r1.extra['arxiv']['base_id'] == '1810.09584' + assert r1.extra['superceded'] == True assert r1.contribs[0].raw_name == "Raphael Chetrite" assert r1.contribs[0].role == "author" @@ -92,6 +93,8 @@ def test_arxiv_xml_parse(arxiv_importer): assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures" assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"] + assert not r2.extra.get('superceded') + r2.extra['superceded'] = True assert r1.extra == r2.extra assert not r1.refs -- cgit v1.2.3