diff options
| -rw-r--r-- | extra/elasticsearch/release_schema.json | 1 | ||||
| -rw-r--r-- | guide/src/entity_release.md | 10 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 5 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 1 | ||||
| -rw-r--r-- | python/tests/import_arxiv.py | 3 | 
5 files changed, 13 insertions, 7 deletions
| diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 57ff4eb9..85026060 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -78,6 +78,7 @@              "in_ia":                { "type": "boolean" },              "in_ia_sim":            { "type": "boolean" },              "in_shadows":           { "type": "boolean" }, +            "is_superceded":        { "type": "boolean" },              "author":         { "type": "alias", "path": "contrib_names" },              "journal":        { "type": "alias", "path": "container_name" }, diff --git a/guide/src/entity_release.md b/guide/src/entity_release.md index d9943d2a..872708a2 100644 --- a/guide/src/entity_release.md +++ b/guide/src/entity_release.md @@ -163,15 +163,13 @@ complete or correct in more obscure cases.  - `aliases` (array of strings) for additional titles this release might be    known by  - `container_name` (string) if not matched to a container entity -- `subtitle` (string)  - `group-title` (string) for releases within an collection/group  - `translation_of` (release identifier) if this release is a translation of    another (usually under the same work) -- `withdrawn_date` (string, ISO date format): if this release has been -  retracted (post-publication) or withdrawn (pre- or post-publication), this is -  the datetime of that event. Retractions also result in a `retraction` release -  under the same `work` entity. This is intended to migrate from "extra" to a -  full release entity field. +- `superceded` (boolean) if there is another release under the same work that +  should be referenced/indicated instead. Intended as a temporary hint until +  proper work-based search is implemented. As an example use, all arxiv release +  versions except for the most recent get this set.  #### `release_type` Vocabulary diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 182d0471..71b2d134 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -198,6 +198,7 @@ class ArxivRawImporter(EntityImporter):          #   group-title          #   arxiv: comments, categories, etc          extra_arxiv['base_id'] = base_id +        extra['superceded'] = True          extra['arxiv'] = extra_arxiv          versions = [] @@ -223,11 +224,13 @@ class ArxivRawImporter(EntityImporter):                  license_slug=license_slug,                  abstracts=abstracts,                  contribs=contribs, -                extra=extra, +                extra=extra.copy(),              ))          # TODO: assert that versions are actually in order?          assert versions +        versions[-1].extra.pop('superceded') +          # only apply DOI to most recent version (HACK)          if doi:              versions[-1].ext_ids.doi = doi diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index f287fe10..8589d364 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -292,6 +292,7 @@ def container_to_elasticsearch(entity, force_bool=True):      if extra.get('ia'):          if extra['ia'].get('sim'):              any_ia_sim = True +    t['is_superceded'] = bool(extra.get('superceded'))      t['in_doaj'] = bool(in_doaj)      t['in_road'] = bool(in_road) diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index bbad8fa6..1e649616 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -71,6 +71,7 @@ def test_arxiv_xml_parse(arxiv_importer):      assert len(r1.contribs) == 4      assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an']      assert r1.extra['arxiv']['base_id'] == '1810.09584' +    assert r1.extra['superceded'] == True      assert r1.contribs[0].raw_name == "Raphael Chetrite"      assert r1.contribs[0].role == "author" @@ -92,6 +93,8 @@ def test_arxiv_xml_parse(arxiv_importer):      assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures"      assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"] +    assert not r2.extra.get('superceded') +    r2.extra['superceded'] = True      assert r1.extra == r2.extra      assert not r1.refs | 
