aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py9
-rw-r--r--python/tests/transform_elasticsearch.py46
2 files changed, 55 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 8ec9c164..2eb18fbf 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -1,4 +1,6 @@
+import datetime
+
import tldextract
@@ -114,6 +116,7 @@ def release_to_elasticsearch(entity, force_bool=True):
# TODO: mapping... probably by lookup?
t['affiliation_rors'] = None
+ this_year = datetime.date.today().year
container = release.container
if container:
t['publisher'] = container.publisher
@@ -130,6 +133,12 @@ def release_to_elasticsearch(entity, force_bool=True):
in_kbart = in_jstor
for archive in ('portico', 'lockss', 'clockss'):
in_kbart = in_kbart or check_kbart(release_year, c_extra['kbart'].get(archive))
+ # recent KBART coverage is often not updated for the
+ # current year. So for current-year publications, consider
+ # coverage from *last* year to also be included in the
+ # Keeper
+ if not in_kbart and release_year == this_year:
+ in_kbart = check_kbart(this_year - 1, c_extra['kbart'].get(archive))
if c_extra.get('ia'):
if c_extra['ia'].get('sim') and release_year:
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index f791562c..0d96e139 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -1,5 +1,6 @@
import json
+import datetime
from fatcat_tools import *
from fatcat_openapi_client import *
@@ -137,3 +138,48 @@ def test_elasticsearch_changelog_transform(matched_importer):
assert es['new_releases'] == 0
assert es['updated'] == 0
assert es['deleted'] == 0
+
+def test_elasticsearch_release_kbart_year():
+ this_year = datetime.date.today().year
+ r = ReleaseEntity(
+ title="something",
+ release_year=this_year,
+ license_slug="CC-BY-NC",
+ ext_ids=ReleaseExtIds(),
+ refs=[
+ ReleaseRef(),
+ ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
+ ],
+ )
+ r.state = 'active'
+ r.container = ContainerEntity(
+ name="dummy journal",
+ extra={
+ "kbart": {
+ "lockss": {
+ "year_spans": [[1900, this_year - 2]],
+ },
+ },
+ },
+ )
+ es = release_to_elasticsearch(r)
+ assert es['release_year'] == this_year
+ assert es['in_ia'] == False
+ assert es['in_kbart'] == False
+ assert es['preservation'] == "none"
+
+ r.container = ContainerEntity(
+ name="dummy journal",
+ extra={
+ "kbart": {
+ "lockss": {
+ "year_spans": [[1900, this_year - 1]],
+ },
+ },
+ },
+ )
+ es = release_to_elasticsearch(r)
+ assert es['release_year'] == this_year
+ assert es['in_ia'] == False
+ assert es['in_kbart'] == True
+ assert es['preservation'] == "dark"