summaryrefslogtreecommitdiffstats
path: root/python/tests/transform_elasticsearch.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-16 14:33:52 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-16 14:33:52 -0800
commitebcc86561dabf3974ca11151445e66c0df4431f1 (patch)
treea7251e0ab7c0e8657ebe72ebaa513cabd95e447d /python/tests/transform_elasticsearch.py
parent532a25205f2cd2929c4258dee87bc6c53cd5cdc3 (diff)
downloadfatcat-ebcc86561dabf3974ca11151445e66c0df4431f1.tar.gz
fatcat-ebcc86561dabf3974ca11151445e66c0df4431f1.zip
improve release elasticsearch transform test coverage
Diffstat (limited to 'python/tests/transform_elasticsearch.py')
-rw-r--r--python/tests/transform_elasticsearch.py95
1 files changed, 84 insertions, 11 deletions
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index 0d96e139..b5f23e76 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -43,7 +43,7 @@ def test_rich_elasticsearch_convert():
"year_spans": [[1200, 1300]],
},
"jstor": {
- "year_spans": [[1950, 1960], [1980, 2005]],
+ "year_spans": [[1000, 1300], [1950, 1960], [1980, 2005]],
},
},
"sherpa_romeo": {"color": "blue"},
@@ -63,17 +63,23 @@ def test_rich_elasticsearch_convert():
)]
es = release_to_elasticsearch(r)
assert es['release_year'] == r.release_year
- assert es['in_ia'] == True
- assert es['in_jstor'] == False
- assert es['in_ia_sim'] == False
- assert es['in_ia'] == True
- assert es['in_web'] == True
- assert es['in_dweb'] == True
- assert es['is_oa'] == True
- assert es['is_longtail_oa'] == False
+ assert es['file_count'] == 1
+ assert es['fileset_count'] == 0
+ assert es['webcapture_count'] == 0
assert es['ref_count'] == 2
assert es['ref_linked_count'] == 1
+ assert es['preservation'] == "bright"
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == True
+ assert es['in_web'] == True
+ assert es['in_dweb'] == True
+ assert es['in_ia'] == True
+ assert es['in_ia_sim'] == False
+ assert es['in_kbart'] == True
+ assert es['in_jstor'] == True
+
def test_elasticsearch_release_from_json():
r = entity_from_json(open('./tests/files/release_etodop5banbndg3faecnfm6ozi.json', 'r').read(), ReleaseEntity)
es = release_to_elasticsearch(r)
@@ -85,8 +91,59 @@ def test_elasticsearch_release_from_json():
assert es['issue'] == "11"
assert es['volume'] == "118"
assert es['number'] == None
+
+ assert es['preservation'] == "dark"
+ assert es['is_oa'] == False
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == True
+ assert es['in_web'] == False
+ assert es['in_dweb'] == False
+ assert es['in_ia'] == False
assert es['in_ia_sim'] == True
assert es['in_kbart'] == True
+ assert es['in_jstor'] == False
+
+ # this release has a fileset, and no file
+ r = entity_from_json(open('./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json', 'r').read(), ReleaseEntity)
+ es = release_to_elasticsearch(r)
+
+ assert es['title'] == "Jakobshavn Glacier Bed Elevation"
+ assert es['ident'] == "3mssw2qnlnblbk7oqyv2dafgey"
+ assert es['file_count'] == 0
+ assert es['fileset_count'] == 1
+ assert es['webcapture_count'] == 0
+
+ assert es['preservation'] == "dark"
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == True
+ assert es['in_web'] == True
+ assert es['in_dweb'] == True
+ assert es['in_ia'] == False
+ assert es['in_ia_sim'] == False
+ assert es['in_kbart'] == False
+ assert es['in_jstor'] == False
+
+ # this release has a web capture, and no file (edited the JSON to remove file)
+ r = entity_from_json(open('./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json', 'r').read(), ReleaseEntity)
+ es = release_to_elasticsearch(r)
+
+ assert es['title'] == "Rethinking Personal Digital Archiving, Part 1"
+ assert es['ident'] == "mjtqtuyhwfdr7j2c3l36uor7uy"
+ assert es['file_count'] == 0
+ assert es['fileset_count'] == 0
+ assert es['webcapture_count'] == 1
+
+ assert es['preservation'] == "bright"
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == True
+ assert es['in_web'] == True
+ assert es['in_dweb'] == False
+ assert es['in_ia'] == True
+ assert es['in_ia_sim'] == False
+ assert es['in_kbart'] == False
+ assert es['in_jstor'] == False
def test_elasticsearch_container_transform(journal_metadata_importer):
with open('tests/files/journal_metadata.sample.json', 'r') as f:
@@ -164,9 +221,17 @@ def test_elasticsearch_release_kbart_year():
)
es = release_to_elasticsearch(r)
assert es['release_year'] == this_year
+
+ assert es['preservation'] == "none"
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == None
+ assert es['in_web'] == False
+ assert es['in_dweb'] == False
assert es['in_ia'] == False
+ assert es['in_ia_sim'] == False
assert es['in_kbart'] == False
- assert es['preservation'] == "none"
+ assert es['in_jstor'] == False
r.container = ContainerEntity(
name="dummy journal",
@@ -180,6 +245,14 @@ def test_elasticsearch_release_kbart_year():
)
es = release_to_elasticsearch(r)
assert es['release_year'] == this_year
+
+ assert es['preservation'] == "dark"
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+ assert es['is_preserved'] == True
+ assert es['in_web'] == False
+ assert es['in_dweb'] == False
assert es['in_ia'] == False
+ assert es['in_ia_sim'] == False
assert es['in_kbart'] == True
- assert es['preservation'] == "dark"
+ assert es['in_jstor'] == False