aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/transform_elasticsearch.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-07-23 14:47:56 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-07-23 14:48:00 -0700
commita05ecb7959bc57b8f1c3607e1c941e8e25d9a87b (patch)
tree1ea23eddff08b4e0e098a6019a1c118ed240ac3f /python/tests/transform_elasticsearch.py
parent6a87b3d2e1b315d35ccaa13457571d73afaf5e6b (diff)
downloadfatcat-a05ecb7959bc57b8f1c3607e1c941e8e25d9a87b.tar.gz
fatcat-a05ecb7959bc57b8f1c3607e1c941e8e25d9a87b.zip
make in_kbart transform inclusive of last year
Frequently when looking at preservation coverage of journals, the current year shows as "un-preserved" when in fact there is robust KBART (keepers, eg CLOCKSS/Portico) coverage. This is partially because we don't update containers with KBART year spans very frequently (which is on us), and partially because KBART reports are often a bit out of day (eg, doesn't show coverage for the current year. For that matter, they probably take a few months to update the previous year as well, but that is a larger time span to fudge over. This patch means we will count Portico/LOCKSS/etc coverage for "last year" to count as coverage of publications dated "this year". Note that for this to be effective/correct, it is assumed that we will update containers with coverage year spans at least once a year, and that we will re-index all releases at least once a year.
Diffstat (limited to 'python/tests/transform_elasticsearch.py')
-rw-r--r--python/tests/transform_elasticsearch.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index f791562c..0d96e139 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -1,5 +1,6 @@
import json
+import datetime
from fatcat_tools import *
from fatcat_openapi_client import *
@@ -137,3 +138,48 @@ def test_elasticsearch_changelog_transform(matched_importer):
assert es['new_releases'] == 0
assert es['updated'] == 0
assert es['deleted'] == 0
+
+def test_elasticsearch_release_kbart_year():
+ this_year = datetime.date.today().year
+ r = ReleaseEntity(
+ title="something",
+ release_year=this_year,
+ license_slug="CC-BY-NC",
+ ext_ids=ReleaseExtIds(),
+ refs=[
+ ReleaseRef(),
+ ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
+ ],
+ )
+ r.state = 'active'
+ r.container = ContainerEntity(
+ name="dummy journal",
+ extra={
+ "kbart": {
+ "lockss": {
+ "year_spans": [[1900, this_year - 2]],
+ },
+ },
+ },
+ )
+ es = release_to_elasticsearch(r)
+ assert es['release_year'] == this_year
+ assert es['in_ia'] == False
+ assert es['in_kbart'] == False
+ assert es['preservation'] == "none"
+
+ r.container = ContainerEntity(
+ name="dummy journal",
+ extra={
+ "kbart": {
+ "lockss": {
+ "year_spans": [[1900, this_year - 1]],
+ },
+ },
+ },
+ )
+ es = release_to_elasticsearch(r)
+ assert es['release_year'] == this_year
+ assert es['in_ia'] == False
+ assert es['in_kbart'] == True
+ assert es['preservation'] == "dark"