From c66f9b2d98de88a98d3a1737d415bdab4e89027c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 2 Dec 2020 11:30:03 -0800 Subject: basic test coverage of dblp release importer --- python/tests/files/dblp.dtd | 351 ++++++++++++++++++++++++++++ python/tests/files/example_dblp.xml | 66 ++++++ python/tests/files/example_dblp_article.xml | 14 ++ python/tests/import_dblp.py | 72 ++++++ 4 files changed, 503 insertions(+) create mode 100644 python/tests/files/dblp.dtd create mode 100644 python/tests/files/example_dblp.xml create mode 100644 python/tests/files/example_dblp_article.xml create mode 100644 python/tests/import_dblp.py (limited to 'python') diff --git a/python/tests/files/dblp.dtd b/python/tests/files/dblp.dtd new file mode 100644 index 00000000..7fedacd2 --- /dev/null +++ b/python/tests/files/dblp.dtd @@ -0,0 +1,351 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/tests/files/example_dblp.xml b/python/tests/files/example_dblp.xml new file mode 100644 index 00000000..ac846d4f --- /dev/null +++ b/python/tests/files/example_dblp.xml @@ -0,0 +1,66 @@ + + + + + + + +Moira C. Norrie +Michael H. Böhlen +PIM Meets Web 2.0. +15-25 +2008 +ER +http://dx.doi.org/10.1007/978-3-540-87877-3 3 +conf/er/2008 +db/conf/er/er2008.html#Norrie08 + + + +Qing Li +Stefano Spaccapietra +Eric Yu +Antoni Olivé +Conceptual Modeling - ER 2008, 27th International Conference on Conceptual Modeling, Barcelona, Spain, October 20-24, 2008. Proceedings +5231 +2008 +978-3-540-87876-6 +ER +Lecture Notes in Computer Science +Springer +db/conf/er/er2008.html + + + +
+Craig Gentry +Computing arbitrary functions of encrypted data. +97-105 +2010 +53 +Commun. ACM +3 +http://doi.acm.org/10.1145/1666420.1666444 +db/journals/cacm/cacm53.html#Gentry10 +
+ + + +Theory and Applications of Trapdoor Functions (Extended Abstract) +Andrew Chi-Chih Yao +80-91 +conf/focs/FOCS23 +1982 +FOCS +db/conf/focs/focs82.html#Yao82a +http://doi.ieeecomputersociety.org/10.1109/SFCS.1982.45 + + + + +Oded Goldreich +Home Page +http://www.wisdom.weizmann.ac.il/~oded/ + + +
diff --git a/python/tests/files/example_dblp_article.xml b/python/tests/files/example_dblp_article.xml new file mode 100644 index 00000000..d6b192b1 --- /dev/null +++ b/python/tests/files/example_dblp_article.xml @@ -0,0 +1,14 @@ +
+Alexander S. Szalay +Michael H. Böhlen +Nicolas Heist +Jens Lehmann 0001 +Jim Gray, astronomer. +58-65 +2008 +51 +Commun. ACM +11 +http://doi.acm.org/10.1145/1400214.1400231 +db/journals/cacm/cacm51.html#Szalay08 +
diff --git a/python/tests/import_dblp.py b/python/tests/import_dblp.py new file mode 100644 index 00000000..fd40eb06 --- /dev/null +++ b/python/tests/import_dblp.py @@ -0,0 +1,72 @@ + +import pytest +from bs4 import BeautifulSoup + +from fatcat_tools.importers import DblpReleaseImporter, Bs4XmlLargeFilePusher +from fixtures import * + + +@pytest.fixture(scope="function") +def dblp_importer(api): + with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + yield DblpReleaseImporter(api, issn_file, bezerk_mode=True, lookup_refs=True) + +@pytest.fixture(scope="function") +def dblp_importer_existing(api): + with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + yield DblpReleaseImporter(api, issn_file, bezerk_mode=False, lookup_refs=True) + +def test_dblp_importer(dblp_importer): + last_index = dblp_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/example_dblp.xml', 'rb') as f: + dblp_importer.bezerk_mode = True + counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run() + print(counts) + assert counts['insert'] == 3 + assert counts['exists'] == 0 + assert counts['skip'] == 1 + + # fetch most recent editgroup + change = dblp_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup + assert eg.description + assert "dblp" in eg.description.lower() + assert eg.extra['git_rev'] + assert "fatcat_tools.DblpReleaseImporter" in eg.extra['agent'] + + # check that entity name mangling was fixed on import + eg = dblp_importer.api.get_editgroup(eg.editgroup_id) + release = dblp_importer.api.get_release(eg.edits.releases[0].ident) + assert release.contribs[1].raw_name == "Michael H. Böhlen" + + last_index = dblp_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/example_dblp.xml', 'rb') as f: + dblp_importer.bezerk_mode = False + dblp_importer.reset() + counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run() + print(counts) + assert counts['insert'] == 0 + assert counts['exists'] == 3 + assert counts['skip'] == 1 + assert last_index == dblp_importer.api.get_changelog(limit=1)[0].index + +def test_dblp_xml_parse(dblp_importer): + with open('tests/files/example_dblp_article.xml', 'r') as f: + soup = BeautifulSoup(f, "xml") + r1 = dblp_importer.parse_record(soup.find_all("article")[0]) + + assert r1.title == "Jim Gray, astronomer" + assert r1.contribs[0].raw_name == "Alexander S. Szalay" + # tested above, in LXML import path + #assert r1.contribs[1].raw_name == "Michael H. Bohlen" + assert r1.contribs[2].raw_name == "Nicolas Heist" + # XXX: assert r1.contribs[2].extra['orcid'] == "0000-0002-4354-9138" + assert r1.contribs[3].raw_name == "Jens Lehmann" + assert r1.ext_ids.dblp == "journals/cacm/Szalay08" + assert r1.ext_ids.doi == "10.1145/1400214.1400231" + assert r1.pages == "58-65" + assert r1.issue == "11" + assert r1.volume == "51" + assert r1.release_year == 2008 + assert r1.extra['container_name'] == "Commun. ACM" + assert r1.extra['dblp']['type'] == "article" -- cgit v1.2.3