1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
import json
import pytest
from fatcat_tools import *
from fatcat_client import *
from fixtures import api
from import_journal_metadata import journal_metadata_importer
from import_crossref import crossref_importer
def test_basic_elasticsearch_convert(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
r.state = 'active'
release_to_elasticsearch(r)
def test_rich_elasticsearch_convert(crossref_importer):
r = ReleaseEntity(
title="something",
release_year=1234,
license_slug="CC-BY-NC",
)
r.state = 'active'
r.container = ContainerEntity(
name="dummy journal",
extra={
"ia": {
"sim": {
"year_spans": [[1000, 1100]],
},
},
"kbart": {
"lockss": {
"year_spans": [[1200, 1300]],
},
"jstor": {
"year_spans": [[1950, 1960], [1980, 2005]],
},
},
"sherpa_romeo": {"color": "blue"},
"doaj": {"as_of": "2010-02-03"},
},
)
r.files = [FileEntity(
mimetype="application/pdf",
urls=[
FileEntityUrls(rel="dweb", url="dat://a954329dlk/thingie"),
FileEntityUrls(rel="webarchive", url="https://web.archive.org/web/20001122030405/http://example.com"),
FileEntityUrls(rel="web", url="https://archive.org/details/blah/file.pdf"),
],
extra={
"shadows": {},
},
)]
es = release_to_elasticsearch(r)
assert es['release_year'] == r.release_year
assert es['in_ia'] == True
assert es['in_jstor'] == False
assert es['in_ia_sim'] == False
assert es['in_ia'] == True
assert es['in_web'] == True
assert es['in_dweb'] == True
assert es['is_oa'] == True
assert es['is_longtail_oa'] == False
def test_elasticsearch_from_json():
r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity)
release_to_elasticsearch(r)
def test_elasticsearch_container_convert(journal_metadata_importer):
with open('tests/files/journal_metadata.sample.json', 'r') as f:
raw = json.loads(f.readline())
c = journal_metadata_importer.parse_record(raw)
c.state = 'active'
es = container_to_elasticsearch(c)
assert es['publisher'] == c.publisher
|