diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-16 14:33:52 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-16 14:33:52 -0800 |
commit | ebcc86561dabf3974ca11151445e66c0df4431f1 (patch) | |
tree | a7251e0ab7c0e8657ebe72ebaa513cabd95e447d /python/tests/transform_elasticsearch.py | |
parent | 532a25205f2cd2929c4258dee87bc6c53cd5cdc3 (diff) | |
download | fatcat-ebcc86561dabf3974ca11151445e66c0df4431f1.tar.gz fatcat-ebcc86561dabf3974ca11151445e66c0df4431f1.zip |
improve release elasticsearch transform test coverage
Diffstat (limited to 'python/tests/transform_elasticsearch.py')
-rw-r--r-- | python/tests/transform_elasticsearch.py | 95 |
1 files changed, 84 insertions, 11 deletions
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index 0d96e139..b5f23e76 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -43,7 +43,7 @@ def test_rich_elasticsearch_convert(): "year_spans": [[1200, 1300]], }, "jstor": { - "year_spans": [[1950, 1960], [1980, 2005]], + "year_spans": [[1000, 1300], [1950, 1960], [1980, 2005]], }, }, "sherpa_romeo": {"color": "blue"}, @@ -63,17 +63,23 @@ def test_rich_elasticsearch_convert(): )] es = release_to_elasticsearch(r) assert es['release_year'] == r.release_year - assert es['in_ia'] == True - assert es['in_jstor'] == False - assert es['in_ia_sim'] == False - assert es['in_ia'] == True - assert es['in_web'] == True - assert es['in_dweb'] == True - assert es['is_oa'] == True - assert es['is_longtail_oa'] == False + assert es['file_count'] == 1 + assert es['fileset_count'] == 0 + assert es['webcapture_count'] == 0 assert es['ref_count'] == 2 assert es['ref_linked_count'] == 1 + assert es['preservation'] == "bright" + assert es['is_oa'] == True + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == True + assert es['in_web'] == True + assert es['in_dweb'] == True + assert es['in_ia'] == True + assert es['in_ia_sim'] == False + assert es['in_kbart'] == True + assert es['in_jstor'] == True + def test_elasticsearch_release_from_json(): r = entity_from_json(open('./tests/files/release_etodop5banbndg3faecnfm6ozi.json', 'r').read(), ReleaseEntity) es = release_to_elasticsearch(r) @@ -85,8 +91,59 @@ def test_elasticsearch_release_from_json(): assert es['issue'] == "11" assert es['volume'] == "118" assert es['number'] == None + + assert es['preservation'] == "dark" + assert es['is_oa'] == False + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == True + assert es['in_web'] == False + assert es['in_dweb'] == False + assert es['in_ia'] == False assert es['in_ia_sim'] == True assert es['in_kbart'] == True + assert es['in_jstor'] == False + + # this release has a fileset, and no file + r = entity_from_json(open('./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json', 'r').read(), ReleaseEntity) + es = release_to_elasticsearch(r) + + assert es['title'] == "Jakobshavn Glacier Bed Elevation" + assert es['ident'] == "3mssw2qnlnblbk7oqyv2dafgey" + assert es['file_count'] == 0 + assert es['fileset_count'] == 1 + assert es['webcapture_count'] == 0 + + assert es['preservation'] == "dark" + assert es['is_oa'] == True + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == True + assert es['in_web'] == True + assert es['in_dweb'] == True + assert es['in_ia'] == False + assert es['in_ia_sim'] == False + assert es['in_kbart'] == False + assert es['in_jstor'] == False + + # this release has a web capture, and no file (edited the JSON to remove file) + r = entity_from_json(open('./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json', 'r').read(), ReleaseEntity) + es = release_to_elasticsearch(r) + + assert es['title'] == "Rethinking Personal Digital Archiving, Part 1" + assert es['ident'] == "mjtqtuyhwfdr7j2c3l36uor7uy" + assert es['file_count'] == 0 + assert es['fileset_count'] == 0 + assert es['webcapture_count'] == 1 + + assert es['preservation'] == "bright" + assert es['is_oa'] == True + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == True + assert es['in_web'] == True + assert es['in_dweb'] == False + assert es['in_ia'] == True + assert es['in_ia_sim'] == False + assert es['in_kbart'] == False + assert es['in_jstor'] == False def test_elasticsearch_container_transform(journal_metadata_importer): with open('tests/files/journal_metadata.sample.json', 'r') as f: @@ -164,9 +221,17 @@ def test_elasticsearch_release_kbart_year(): ) es = release_to_elasticsearch(r) assert es['release_year'] == this_year + + assert es['preservation'] == "none" + assert es['is_oa'] == True + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == None + assert es['in_web'] == False + assert es['in_dweb'] == False assert es['in_ia'] == False + assert es['in_ia_sim'] == False assert es['in_kbart'] == False - assert es['preservation'] == "none" + assert es['in_jstor'] == False r.container = ContainerEntity( name="dummy journal", @@ -180,6 +245,14 @@ def test_elasticsearch_release_kbart_year(): ) es = release_to_elasticsearch(r) assert es['release_year'] == this_year + + assert es['preservation'] == "dark" + assert es['is_oa'] == True + assert es['is_longtail_oa'] == False + assert es['is_preserved'] == True + assert es['in_web'] == False + assert es['in_dweb'] == False assert es['in_ia'] == False + assert es['in_ia_sim'] == False assert es['in_kbart'] == True - assert es['preservation'] == "dark" + assert es['in_jstor'] == False |