diff options
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 9 | ||||
| -rw-r--r-- | python/tests/transform_elasticsearch.py | 46 | 
2 files changed, 55 insertions, 0 deletions
| diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 8ec9c164..2eb18fbf 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -1,4 +1,6 @@ +import datetime +  import tldextract @@ -114,6 +116,7 @@ def release_to_elasticsearch(entity, force_bool=True):      # TODO: mapping... probably by lookup?      t['affiliation_rors'] = None +    this_year = datetime.date.today().year      container = release.container      if container:          t['publisher'] = container.publisher @@ -130,6 +133,12 @@ def release_to_elasticsearch(entity, force_bool=True):                  in_kbart = in_jstor                  for archive in ('portico', 'lockss', 'clockss'):                      in_kbart = in_kbart or check_kbart(release_year, c_extra['kbart'].get(archive)) +                    # recent KBART coverage is often not updated for the +                    # current year. So for current-year publications, consider +                    # coverage from *last* year to also be included in the +                    # Keeper +                    if not in_kbart and release_year == this_year: +                        in_kbart = check_kbart(this_year - 1, c_extra['kbart'].get(archive))              if c_extra.get('ia'):                  if c_extra['ia'].get('sim') and release_year: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index f791562c..0d96e139 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -1,5 +1,6 @@  import json +import datetime  from fatcat_tools import *  from fatcat_openapi_client import * @@ -137,3 +138,48 @@ def test_elasticsearch_changelog_transform(matched_importer):      assert es['new_releases'] == 0      assert es['updated'] == 0      assert es['deleted'] == 0 + +def test_elasticsearch_release_kbart_year(): +    this_year = datetime.date.today().year +    r = ReleaseEntity( +        title="something", +        release_year=this_year, +        license_slug="CC-BY-NC", +        ext_ids=ReleaseExtIds(), +        refs=[ +            ReleaseRef(), +            ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), +        ], +    ) +    r.state = 'active' +    r.container = ContainerEntity( +        name="dummy journal", +        extra={ +            "kbart": { +                "lockss": { +                    "year_spans": [[1900, this_year - 2]], +                }, +            }, +        }, +    ) +    es = release_to_elasticsearch(r) +    assert es['release_year'] == this_year +    assert es['in_ia'] == False +    assert es['in_kbart'] == False +    assert es['preservation'] == "none" + +    r.container = ContainerEntity( +        name="dummy journal", +        extra={ +            "kbart": { +                "lockss": { +                    "year_spans": [[1900, this_year - 1]], +                }, +            }, +        }, +    ) +    es = release_to_elasticsearch(r) +    assert es['release_year'] == this_year +    assert es['in_ia'] == False +    assert es['in_kbart'] == True +    assert es['preservation'] == "dark" | 
