aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_arxiv.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
commitcdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/import_arxiv.py
parent78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
downloadfatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
fmt (black): tests/
Diffstat (limited to 'python/tests/import_arxiv.py')
-rw-r--r--python/tests/import_arxiv.py51
1 files changed, 30 insertions, 21 deletions
diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py
index 9368fb06..2a297c00 100644
--- a/python/tests/import_arxiv.py
+++ b/python/tests/import_arxiv.py
@@ -1,4 +1,3 @@
-
import pytest
from bs4 import BeautifulSoup
from fixtures import *
@@ -12,35 +11,37 @@ def arxiv_importer(api):
ari._test_override = True
return ari
+
def test_arxiv_importer(arxiv_importer):
last_index = arxiv_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
arxiv_importer.bezerk_mode = True
counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run()
- assert counts['insert'] == 2
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 2
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = arxiv_importer.api.get_changelog_entry(index=last_index+1)
+ change = arxiv_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "arxiv" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.ArxivRawImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.ArxivRawImporter" in eg.extra["agent"]
last_index = arxiv_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
arxiv_importer.bezerk_mode = False
arxiv_importer.reset()
counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 2
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 2
+ assert counts["skip"] == 0
assert last_index == arxiv_importer.api.get_changelog(limit=1)[0].index
+
def test_arxiv_xml_parse(arxiv_importer):
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r = arxiv_importer.parse_record(soup.find_all("record")[0])
@@ -67,11 +68,15 @@ def test_arxiv_xml_parse(arxiv_importer):
assert r2.release_year == 2019
assert str(r2.release_date) == "2019-01-13"
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
+ # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
assert len(r1.contribs) == 4
- assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an']
- assert r1.extra['arxiv']['base_id'] == '1810.09584'
- assert r1.extra['superceded'] is True
+ assert r1.extra["arxiv"]["categories"] == [
+ "cond-mat.stat-mech",
+ "physics.bio-ph",
+ "physics.data-an",
+ ]
+ assert r1.extra["arxiv"]["base_id"] == "1810.09584"
+ assert r1.extra["superceded"] is True
assert r1.contribs[0].raw_name == "Raphael Chetrite"
assert r1.contribs[0].role == "author"
@@ -90,11 +95,15 @@ def test_arxiv_xml_parse(arxiv_importer):
assert r1.abstracts == r2.abstracts
- assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures"
- assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"]
+ assert r1.extra["arxiv"]["comments"] == "7 pages, 2 figures"
+ assert r1.extra["arxiv"]["categories"] == [
+ "cond-mat.stat-mech",
+ "physics.bio-ph",
+ "physics.data-an",
+ ]
- assert not r2.extra.get('superceded')
- r2.extra['superceded'] = True
+ assert not r2.extra.get("superceded")
+ r2.extra["superceded"] = True
assert r1.extra == r2.extra
assert not r1.refs