aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/arxiv.py5
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py1
2 files changed, 5 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 182d0471..71b2d134 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -198,6 +198,7 @@ class ArxivRawImporter(EntityImporter):
# group-title
# arxiv: comments, categories, etc
extra_arxiv['base_id'] = base_id
+ extra['superceded'] = True
extra['arxiv'] = extra_arxiv
versions = []
@@ -223,11 +224,13 @@ class ArxivRawImporter(EntityImporter):
license_slug=license_slug,
abstracts=abstracts,
contribs=contribs,
- extra=extra,
+ extra=extra.copy(),
))
# TODO: assert that versions are actually in order?
assert versions
+ versions[-1].extra.pop('superceded')
+
# only apply DOI to most recent version (HACK)
if doi:
versions[-1].ext_ids.doi = doi
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index f287fe10..8589d364 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -292,6 +292,7 @@ def container_to_elasticsearch(entity, force_bool=True):
if extra.get('ia'):
if extra['ia'].get('sim'):
any_ia_sim = True
+ t['is_superceded'] = bool(extra.get('superceded'))
t['in_doaj'] = bool(in_doaj)
t['in_road'] = bool(in_road)