diff options
author | Martin Czygan <martin@archive.org> | 2020-11-24 19:29:07 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2020-11-24 19:29:07 +0000 |
commit | cfd13852d7cb58fcc3387373960adaf3680f0faf (patch) | |
tree | 675954b8b34324fe22fc5a00f3fbb99a21a77a21 /python/tests/import_doaj.py | |
parent | fcfcd3224a113fa90da2045a3c7fe90127088ebe (diff) | |
parent | 1fca5a9822944d0646d2dcba6cf54f27a0ffe5c0 (diff) | |
download | fatcat-cfd13852d7cb58fcc3387373960adaf3680f0faf.tar.gz fatcat-cfd13852d7cb58fcc3387373960adaf3680f0faf.zip |
Merge branch 'bnewbold-doaj-metadata' into 'master'
DOAJ article metadata import
See merge request webgroup/fatcat!89
Diffstat (limited to 'python/tests/import_doaj.py')
-rw-r--r-- | python/tests/import_doaj.py | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/python/tests/import_doaj.py b/python/tests/import_doaj.py new file mode 100644 index 00000000..d69aebd7 --- /dev/null +++ b/python/tests/import_doaj.py @@ -0,0 +1,142 @@ + +import json +import datetime + +import pytest +import fatcat_openapi_client + +from fatcat_tools.importers import DoajArticleImporter, JsonLinePusher +from fatcat_tools.transforms import entity_to_dict +from fixtures import * + + +@pytest.fixture(scope="function") +def doaj_importer(api): + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield DoajArticleImporter(api, issn_file, bezerk_mode=True) + +def test_doaj_importer(doaj_importer): + last_index = doaj_importer.api.get_changelog(limit=1)[0].index + with open("tests/files/example_doaj_articles.json", "r") as f: + doaj_importer.bezerk_mode = True + counts = JsonLinePusher(doaj_importer, f).run() + assert counts["insert"] == 5 + assert counts["exists"] == 0 + assert counts["skip"] == 0 + success_changelog = doaj_importer.api.get_changelog(limit=1)[0] + assert last_index + 1 == success_changelog.index + + # fetch most recent editgroup + change = doaj_importer.api.get_changelog_entry(index=last_index + 1) + eg = change.editgroup + assert eg.description + assert "doaj" in eg.description.lower() + assert eg.extra["git_rev"] + assert "fatcat_tools.DoajArticleImporter" in eg.extra["agent"] + + last_index = doaj_importer.api.get_changelog(limit=1)[0].index + with open("tests/files/example_doaj_articles.json", "r") as f: + doaj_importer.bezerk_mode = False + doaj_importer.reset() + counts = JsonLinePusher(doaj_importer, f).run() + assert counts["insert"] == 0 + assert counts["exists"] == 5 + assert counts["skip"] == 0 + assert last_index == doaj_importer.api.get_changelog(limit=1)[0].index + + # cleanup file entities (so other import tests work) + success_editgroup = doaj_importer.api.get_editgroup(success_changelog.editgroup_id) + eg = quick_eg(doaj_importer.api) + for release_edit in success_editgroup.edits.releases: + doaj_importer.api.delete_release(eg.editgroup_id, release_edit.ident) + doaj_importer.api.accept_editgroup(eg.editgroup_id) + +def test_doaj_importer_existing_doi(doaj_importer): + """ + One of the DOAJ test entities has a dummy DOI (10.123/abc); this test + ensures that it isn't clobbered, an then that it gets updated. + """ + with open("tests/files/example_doaj_articles.json", "r") as f: + doaj_importer.reset() + doaj_importer.bezerk_mode = False + doaj_importer.do_updates = False + counts = JsonLinePusher(doaj_importer, f).run() + print(counts) + assert counts["insert"] == 4 + assert counts["exists"] == 1 + assert counts["skip"] == 0 + success_changelog = doaj_importer.api.get_changelog(limit=1)[0] + success_editgroup = doaj_importer.api.get_editgroup(success_changelog.editgroup_id) + + with open("tests/files/example_doaj_articles.json", "r") as f: + doaj_importer.reset() + doaj_importer.bezerk_mode = False + doaj_importer.do_updates = True + counts = JsonLinePusher(doaj_importer, f).run() + print(counts) + assert counts["insert"] == 0 + assert counts["exists"] == 4 + assert counts["update"] == 1 + update_changelog = doaj_importer.api.get_changelog(limit=1)[0] + update_editgroup = doaj_importer.api.get_editgroup(update_changelog.editgroup_id) + + with open("tests/files/example_doaj_articles.json", "r") as f: + doaj_importer.reset() + doaj_importer.bezerk_mode = False + doaj_importer.do_updates = True + counts = JsonLinePusher(doaj_importer, f).run() + print(counts) + assert counts["insert"] == 0 + assert counts["exists"] == 5 + assert counts["update"] == 0 + + # cleanup file entities (so other import tests work) + eg = quick_eg(doaj_importer.api) + for release_edit in success_editgroup.edits.releases: + doaj_importer.api.delete_release(eg.editgroup_id, release_edit.ident) + for release_edit in update_editgroup.edits.releases: + print(release_edit) + doaj_importer.api.update_release( + eg.editgroup_id, + release_edit.ident, + ReleaseEntity( + revision=release_edit.prev_revision, + ext_ids=ReleaseExtIds(), + ), + ) + doaj_importer.api.accept_editgroup(eg.editgroup_id) + +def test_doaj_dict_parse(doaj_importer): + with open("tests/files/example_doaj_articles.json", "r") as f: + raw = json.loads(f.readline()) + r = doaj_importer.parse_record(raw) + + assert r.title == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel" + assert r.publisher == "Elsevier" + assert r.release_type == "article-journal" + assert r.release_stage == "published" + assert r.license_slug == "cc-by-nc-nd" + assert r.original_title == None + assert r.ext_ids.doi == "10.1016/j.matdes.2016.06.110" + assert r.ext_ids.doaj == "e58f08a11ecb495ead55a44ad4f89808" + assert r.subtitle == None + assert r.release_date == None + assert r.release_year == 2016 + assert r.volume == "108" + assert r.number == None + assert r.pages == "608-617" + assert r.version == None + assert r.language == "en" + # matched by ISSN, so wouldn't be defined normally + assert r.extra['container_name'] == "Materials & Design" + assert len(r.abstracts) == 1 + assert len(r.abstracts[0].content) == 1033 + assert len(r.contribs) == 5 + assert r.contribs[0].raw_name == "Xinfeng Li" + assert r.contribs[0].given_name == None + assert r.contribs[0].surname == None + assert not r.refs + + #print(r.extra) + assert r.extra['release_month'] == 10 + assert r.extra['country'] == 'gb' |