summaryrefslogtreecommitdiffstats
path: root/python/tests/import_doaj.py
blob: a75b574e0c506d7581ff4e8380662dd9c3c2ab5a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

import json
import datetime

import pytest

from fatcat_tools.importers import DoajArticleImporter, JsonLinePusher
from fatcat_tools.transforms import entity_to_dict
import fatcat_openapi_client
from fixtures import api
import json


@pytest.fixture(scope="function")
def doaj_importer(api):
    with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
        yield DoajArticleImporter(api, issn_file, bezerk_mode=True)

@pytest.fixture(scope="function")
def doaj_importer_existing(api):
    with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
        yield DoajArticleImporter(api, issn_file, bezerk_mode=False)

def test_doaj_importer(doaj_importer):
    last_index = doaj_importer.api.get_changelog(limit=1)[0].index
    with open("tests/files/example_doaj_articles.json", "r") as f:
        doaj_importer.bezerk_mode = True
        counts = JsonLinePusher(doaj_importer, f).run()
    assert counts["insert"] == 5
    assert counts["exists"] == 0
    assert counts["skip"] == 0

    # fetch most recent editgroup
    change = doaj_importer.api.get_changelog_entry(index=last_index + 1)
    eg = change.editgroup
    assert eg.description
    assert "doaj" in eg.description.lower()
    assert eg.extra["git_rev"]
    assert "fatcat_tools.DoajArticleImporter" in eg.extra["agent"]

    last_index = doaj_importer.api.get_changelog(limit=1)[0].index
    with open("tests/files/example_doaj_articles.json", "r") as f:
        doaj_importer.bezerk_mode = False
        doaj_importer.reset()
        counts = JsonLinePusher(doaj_importer, f).run()
    assert counts["insert"] == 0
    assert counts["exists"] == 5
    assert counts["skip"] == 0
    assert last_index == doaj_importer.api.get_changelog(limit=1)[0].index


def test_doaj_dict_parse(doaj_importer):
    with open("tests/files/example_doaj_articles.json", "r") as f:
        raw = json.loads(f.readline())
        r = doaj_importer.parse_record(raw)
        # ensure the API server is ok with format
        JsonLinePusher(doaj_importer, [json.dumps(raw)]).run()

        assert r.title == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel"
        assert r.publisher == "Elsevier"
        assert r.release_type == "article-journal"
        assert r.release_stage == "published"
        # XXX: assert r.license_slug == "cc-by-nc-nd"
        assert r.original_title == None
        assert r.ext_ids.doi == "10.1016/j.matdes.2016.06.110"
        assert r.ext_ids.doaj == "e58f08a11ecb495ead55a44ad4f89808"
        assert r.subtitle == None
        assert r.release_date == None
        assert r.release_year == 2016
        assert r.volume == "108"
        assert r.number == None
        assert r.pages == "608-617"
        assert r.version == None
        # XXX: assert r.language == "en"
        # matched by ISSN, so wouldn't be defined normally
        # XXX: assert r.extra['container_name'] == "Materials & Design"
        assert len(r.abstracts) == 1
        assert len(r.abstracts[0].content) == 1033
        assert len(r.contribs) == 5
        assert r.contribs[0].raw_name == "Xinfeng Li"
        assert r.contribs[0].given_name == None
        assert r.contribs[0].surname == None
        assert not r.refs

        print(r.extra)
        # XXX: assert r.extra['release_month'] == 10
        # XXX: assert r.extra['country'] == 'gb'

        #assert r.extra["doaj"]["subjects"] == [
        #    {"subject": "Plant Genetic Resource for Food and Agriculture"}
        #]