aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-24 15:21:43 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-24 15:21:48 -0800
commitf955f66789b0078dcb973ce587d2d3b3184e73a7 (patch)
treee048502b461fdfa973d4697b3043f4a516694b2f /python/tests
parent206acf1b37a1a34d5338c744e17ef2035cd2db58 (diff)
downloadfatcat-f955f66789b0078dcb973ce587d2d3b3184e73a7.tar.gz
fatcat-f955f66789b0078dcb973ce587d2d3b3184e73a7.zip
allow importing contrib/refs lists
The motivation here isn't really to support these gigantic lists on principle, but to be able to ingest large corpuses without having to decide whether to filter out or crop such lists.
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/files/huge_crossref_doi.json.gzbin0 -> 12035 bytes
-rw-r--r--python/tests/import_crossref.py16
2 files changed, 12 insertions, 4 deletions
diff --git a/python/tests/files/huge_crossref_doi.json.gz b/python/tests/files/huge_crossref_doi.json.gz
new file mode 100644
index 00000000..48f58257
--- /dev/null
+++ b/python/tests/files/huge_crossref_doi.json.gz
Binary files differ
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index 193f78f6..6e7f72c5 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -1,5 +1,5 @@
-import json
+import json, gzip
import pytest
from fatcat_tools.importers import CrossrefImporter, JsonLinePusher
from fixtures import api
@@ -15,9 +15,17 @@ def crossref_importer_existing(api):
with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False)
-def test_crossref_importer_batch(crossref_importer):
- with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- JsonLinePusher(crossref_importer, f).run()
+def test_crossref_importer_huge(crossref_importer):
+ last_index = crossref_importer.api.get_changelog(limit=1)[0].index
+ with gzip.open('tests/files/huge_crossref_doi.json.gz', 'rt') as f:
+ crossref_importer.bezerk_mode = True
+ line = f.readline()
+ mega_blob = [line for i in range(95)]
+ counts = JsonLinePusher(crossref_importer, mega_blob).run()
+ assert counts['insert'] == 95
+ change = crossref_importer.api.get_changelog_entry(index=last_index+1)
+ release = crossref_importer.api.get_release(change.editgroup.edits.releases[0].ident)
+ assert len(release.contribs) == 1014
def test_crossref_importer(crossref_importer):
last_index = crossref_importer.api.get_changelog(limit=1)[0].index