diff options
author | bnewbold <bnewbold@archive.org> | 2020-07-24 02:56:13 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2020-07-24 02:56:13 +0000 |
commit | 2170e5ff46dfb2bba9bf26196de3c3774c16cd4a (patch) | |
tree | e78de45cbca526dfacf7ae9ada31152a790fa0a4 /python | |
parent | 6a87b3d2e1b315d35ccaa13457571d73afaf5e6b (diff) | |
parent | 3e5799e0bb3f0eb7b9f5d690692757cfbf400412 (diff) | |
download | fatcat-2170e5ff46dfb2bba9bf26196de3c3774c16cd4a.tar.gz fatcat-2170e5ff46dfb2bba9bf26196de3c3774c16cd4a.zip |
Merge branch 'bnewbold-preservation-year-offset' into 'master'
preservation year offset
See merge request webgroup/fatcat!67
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 9 | ||||
-rw-r--r-- | python/tests/transform_elasticsearch.py | 46 |
2 files changed, 55 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 8ec9c164..2eb18fbf 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -1,4 +1,6 @@ +import datetime + import tldextract @@ -114,6 +116,7 @@ def release_to_elasticsearch(entity, force_bool=True): # TODO: mapping... probably by lookup? t['affiliation_rors'] = None + this_year = datetime.date.today().year container = release.container if container: t['publisher'] = container.publisher @@ -130,6 +133,12 @@ def release_to_elasticsearch(entity, force_bool=True): in_kbart = in_jstor for archive in ('portico', 'lockss', 'clockss'): in_kbart = in_kbart or check_kbart(release_year, c_extra['kbart'].get(archive)) + # recent KBART coverage is often not updated for the + # current year. So for current-year publications, consider + # coverage from *last* year to also be included in the + # Keeper + if not in_kbart and release_year == this_year: + in_kbart = check_kbart(this_year - 1, c_extra['kbart'].get(archive)) if c_extra.get('ia'): if c_extra['ia'].get('sim') and release_year: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index f791562c..0d96e139 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -1,5 +1,6 @@ import json +import datetime from fatcat_tools import * from fatcat_openapi_client import * @@ -137,3 +138,48 @@ def test_elasticsearch_changelog_transform(matched_importer): assert es['new_releases'] == 0 assert es['updated'] == 0 assert es['deleted'] == 0 + +def test_elasticsearch_release_kbart_year(): + this_year = datetime.date.today().year + r = ReleaseEntity( + title="something", + release_year=this_year, + license_slug="CC-BY-NC", + ext_ids=ReleaseExtIds(), + refs=[ + ReleaseRef(), + ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), + ], + ) + r.state = 'active' + r.container = ContainerEntity( + name="dummy journal", + extra={ + "kbart": { + "lockss": { + "year_spans": [[1900, this_year - 2]], + }, + }, + }, + ) + es = release_to_elasticsearch(r) + assert es['release_year'] == this_year + assert es['in_ia'] == False + assert es['in_kbart'] == False + assert es['preservation'] == "none" + + r.container = ContainerEntity( + name="dummy journal", + extra={ + "kbart": { + "lockss": { + "year_spans": [[1900, this_year - 1]], + }, + }, + }, + ) + es = release_to_elasticsearch(r) + assert es['release_year'] == this_year + assert es['in_ia'] == False + assert es['in_kbart'] == True + assert es['preservation'] == "dark" |