summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-07 18:45:15 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-07 18:45:15 -0700
commitbe79fb3c3d15bedd6f7ec5250116175a89f37622 (patch)
treeb753c106ac71e41e3c74469286f467d47e7611bd
parentfb4b999d5a7d36acd29e26d36c37f1a57dab0eb6 (diff)
downloadfatcat-be79fb3c3d15bedd6f7ec5250116175a89f37622.tar.gz
fatcat-be79fb3c3d15bedd6f7ec5250116175a89f37622.zip
better elastic transform tests
-rw-r--r--python/tests/transform_tests.py60
1 files changed, 59 insertions, 1 deletions
diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py
index 6d6c6c82..a835ed82 100644
--- a/python/tests/transform_tests.py
+++ b/python/tests/transform_tests.py
@@ -4,10 +4,11 @@ import pytest
from fatcat_tools import *
from fatcat_client import *
from fixtures import api
+from import_journal_metadata import journal_metadata_importer
from import_crossref import crossref_importer
-def test_elasticsearch_convert(crossref_importer):
+def test_basic_elasticsearch_convert(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line
raw = json.loads(f.read())
@@ -15,6 +16,63 @@ def test_elasticsearch_convert(crossref_importer):
r.state = 'active'
release_to_elasticsearch(r)
+def test_rich_elasticsearch_convert(crossref_importer):
+ r = ReleaseEntity(
+ title="something",
+ release_year=1234,
+ license_slug="CC-BY-NC",
+ )
+ r.state = 'active'
+ r.container = ContainerEntity(
+ name="dummy journal",
+ extra={
+ "ia": {
+ "sim": {
+ "year_spans": [[1000, 1100]],
+ },
+ },
+ "kbart": {
+ "lockss": {
+ "year_spans": [[1200, 1300]],
+ },
+ "jstor": {
+ "year_spans": [[1950, 1960], [1980, 2005]],
+ },
+ },
+ "sherpa_romeo": {"color": "blue"},
+ "doaj": {"as_of": "2010-02-03"},
+ },
+ )
+ r.files = [FileEntity(
+ mimetype="application/pdf",
+ urls=[
+ FileEntityUrls(rel="dweb", url="dat://a954329dlk/thingie"),
+ FileEntityUrls(rel="webarchive", url="https://web.archive.org/web/20001122030405/http://example.com"),
+ FileEntityUrls(rel="web", url="https://archive.org/details/blah/file.pdf"),
+ ],
+ extra={
+ "shadows": {},
+ },
+ )]
+ es = release_to_elasticsearch(r)
+ assert es['release_year'] == r.release_year
+ assert es['in_ia'] == True
+ assert es['in_jstor'] == False
+ assert es['in_ia_sim'] == False
+ assert es['in_ia'] == True
+ assert es['in_web'] == True
+ assert es['in_dweb'] == True
+ assert es['is_oa'] == True
+ assert es['is_longtail_oa'] == False
+
def test_elasticsearch_from_json():
r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity)
release_to_elasticsearch(r)
+
+def test_elasticsearch_container_convert(journal_metadata_importer):
+ with open('tests/files/journal_metadata.sample.json', 'r') as f:
+ raw = json.loads(f.readline())
+ c = journal_metadata_importer.parse_record(raw)
+ c.state = 'active'
+ es = container_to_elasticsearch(c)
+ assert es['publisher'] == c.publisher