diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 16:00:03 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 21:59:05 -0800 |
commit | 901cf998ce7d8f896cf5d609719b1defd96d01d4 (patch) | |
tree | 85a5eebe76e8ca4df3b1b4217ecc9e829f50583c /python/tests/transform_tests.py | |
parent | 55a4f211532c93d8164b0d4719dc0413005941ea (diff) | |
download | fatcat-901cf998ce7d8f896cf5d609719b1defd96d01d4.tar.gz fatcat-901cf998ce7d8f896cf5d609719b1defd96d01d4.zip |
first implementation of ES file schema
Includes a trivial test and transform, but not any workers or doc
updates.
Diffstat (limited to 'python/tests/transform_tests.py')
-rw-r--r-- | python/tests/transform_tests.py | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index f254e117..7b583ac4 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -7,6 +7,7 @@ from fixtures import api from import_journal_metadata import journal_metadata_importer from import_crossref import crossref_importer +from import_matched import matched_importer def test_basic_elasticsearch_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: @@ -72,14 +73,34 @@ def test_rich_elasticsearch_convert(): assert es['ref_count'] == 2 assert es['ref_linked_count'] == 1 -def test_elasticsearch_from_json(): +def test_elasticsearch_release_from_json(): r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity) release_to_elasticsearch(r) -def test_elasticsearch_container_convert(journal_metadata_importer): +def test_elasticsearch_container_transform(journal_metadata_importer): with open('tests/files/journal_metadata.sample.json', 'r') as f: raw = json.loads(f.readline()) c = journal_metadata_importer.parse_record(raw) c.state = 'active' es = container_to_elasticsearch(c) assert es['publisher'] == c.publisher + +def test_elasticsearch_file_transform(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + raw = json.loads(f.readline()) + f = matched_importer.parse_record(raw) + + f.state = 'active' + es = file_to_elasticsearch(f) + assert es['sha1'] == f.sha1 + assert es['sha256'] == f.sha256 + assert es['md5'] == f.md5 + assert es['size_bytes'] == f.size + assert es['mimetype'] == f.mimetype + assert es['in_ia'] == True + assert 'publisher' in es['rel'] + + # XXX: implement hosts and domain parsing with urlcanon + #assert 'journals.plos.org' in es['host'] + #assert 'plos.org' in es['domain'] + |