From a05ecb7959bc57b8f1c3607e1c941e8e25d9a87b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 14:47:56 -0700 Subject: make in_kbart transform inclusive of last year Frequently when looking at preservation coverage of journals, the current year shows as "un-preserved" when in fact there is robust KBART (keepers, eg CLOCKSS/Portico) coverage. This is partially because we don't update containers with KBART year spans very frequently (which is on us), and partially because KBART reports are often a bit out of day (eg, doesn't show coverage for the current year. For that matter, they probably take a few months to update the previous year as well, but that is a larger time span to fudge over. This patch means we will count Portico/LOCKSS/etc coverage for "last year" to count as coverage of publications dated "this year". Note that for this to be effective/correct, it is assumed that we will update containers with coverage year spans at least once a year, and that we will re-index all releases at least once a year. --- python/tests/transform_elasticsearch.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'python/tests') diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index f791562c..0d96e139 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -1,5 +1,6 @@ import json +import datetime from fatcat_tools import * from fatcat_openapi_client import * @@ -137,3 +138,48 @@ def test_elasticsearch_changelog_transform(matched_importer): assert es['new_releases'] == 0 assert es['updated'] == 0 assert es['deleted'] == 0 + +def test_elasticsearch_release_kbart_year(): + this_year = datetime.date.today().year + r = ReleaseEntity( + title="something", + release_year=this_year, + license_slug="CC-BY-NC", + ext_ids=ReleaseExtIds(), + refs=[ + ReleaseRef(), + ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), + ], + ) + r.state = 'active' + r.container = ContainerEntity( + name="dummy journal", + extra={ + "kbart": { + "lockss": { + "year_spans": [[1900, this_year - 2]], + }, + }, + }, + ) + es = release_to_elasticsearch(r) + assert es['release_year'] == this_year + assert es['in_ia'] == False + assert es['in_kbart'] == False + assert es['preservation'] == "none" + + r.container = ContainerEntity( + name="dummy journal", + extra={ + "kbart": { + "lockss": { + "year_spans": [[1900, this_year - 1]], + }, + }, + }, + ) + es = release_to_elasticsearch(r) + assert es['release_year'] == this_year + assert es['in_ia'] == False + assert es['in_kbart'] == True + assert es['preservation'] == "dark" -- cgit v1.2.3