From a05ecb7959bc57b8f1c3607e1c941e8e25d9a87b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 14:47:56 -0700 Subject: make in_kbart transform inclusive of last year Frequently when looking at preservation coverage of journals, the current year shows as "un-preserved" when in fact there is robust KBART (keepers, eg CLOCKSS/Portico) coverage. This is partially because we don't update containers with KBART year spans very frequently (which is on us), and partially because KBART reports are often a bit out of day (eg, doesn't show coverage for the current year. For that matter, they probably take a few months to update the previous year as well, but that is a larger time span to fudge over. This patch means we will count Portico/LOCKSS/etc coverage for "last year" to count as coverage of publications dated "this year". Note that for this to be effective/correct, it is assumed that we will update containers with coverage year spans at least once a year, and that we will re-index all releases at least once a year. --- python/fatcat_tools/transforms/elasticsearch.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'python/fatcat_tools/transforms') diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 8ec9c164..0b04db86 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -1,4 +1,6 @@ +import datetime + import tldextract @@ -114,6 +116,7 @@ def release_to_elasticsearch(entity, force_bool=True): # TODO: mapping... probably by lookup? t['affiliation_rors'] = None + this_year = datetime.date.today().year container = release.container if container: t['publisher'] = container.publisher @@ -130,6 +133,12 @@ def release_to_elasticsearch(entity, force_bool=True): in_kbart = in_jstor for archive in ('portico', 'lockss', 'clockss'): in_kbart = in_kbart or check_kbart(release_year, c_extra['kbart'].get(archive)) + # recent KBART coverage is often not updated for the + # current year. So for current-year publications, consider + # coverage from *last* year to also be included in the + # Keeper + if not in_kbart and release_year == this_year: + in_kbart = in_kbart or check_kbart(this_year - 1, c_extra['kbart'].get(archive)) if c_extra.get('ia'): if c_extra['ia'].get('sim') and release_year: -- cgit v1.2.3 From 3e5799e0bb3f0eb7b9f5d690692757cfbf400412 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 19:45:18 -0700 Subject: simplify in_kbart check statement Thanks @martin --- python/fatcat_tools/transforms/elasticsearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/fatcat_tools/transforms') diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 0b04db86..2eb18fbf 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -138,7 +138,7 @@ def release_to_elasticsearch(entity, force_bool=True): # coverage from *last* year to also be included in the # Keeper if not in_kbart and release_year == this_year: - in_kbart = in_kbart or check_kbart(this_year - 1, c_extra['kbart'].get(archive)) + in_kbart = check_kbart(this_year - 1, c_extra['kbart'].get(archive)) if c_extra.get('ia'): if c_extra['ia'].get('sim') and release_year: -- cgit v1.2.3