aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_ingest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests/import_ingest.py')
-rw-r--r--python/tests/import_ingest.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py
index 44dd5a0b..8f8a34c7 100644
--- a/python/tests/import_ingest.py
+++ b/python/tests/import_ingest.py
@@ -5,6 +5,7 @@ from fixtures import *
from fatcat_tools.importers import (
IngestFileResultImporter,
+ IngestFilesetFileResultImporter,
IngestFilesetResultImporter,
IngestWebResultImporter,
JsonLinePusher,
@@ -26,6 +27,11 @@ def ingest_fileset_importer(api):
yield IngestFilesetResultImporter(api)
+@pytest.fixture(scope="function")
+def ingest_fileset_file_importer(api):
+ yield IngestFilesetFileResultImporter(api)
+
+
# TODO: use API to check that entities actually created...
def test_ingest_importer_basic(ingest_importer):
with open("tests/files/example_ingest.json", "r") as f:
@@ -238,3 +244,57 @@ def test_ingest_fileset_importer(ingest_fileset_importer):
assert counts["exists"] == 7
assert counts["skip"] == 13
assert counts["skip-release-not-found"] == 13
+
+
+def test_ingest_fileset_file_dict_parse(ingest_fileset_file_importer):
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ raw = json.loads(f.readline())
+ fe = ingest_fileset_file_importer.parse_record(raw)
+ assert fe.sha1 == "6fb020064da66bb7a666c17555611cf6820fc9ae"
+ assert fe.md5 == "dfc41b617564f99a12e6077a6208876f"
+ assert fe.sha256 == "2febad53ff0f163a18d7cbb913275bf99ed2544730cda191458837e2b0da9d18"
+ assert fe.mimetype == "image/tiff"
+ assert fe.size == 410631015
+ assert fe.extra["path"] == "NDVI_Diff_1990_2018_T06.tif"
+ assert len(fe.urls) == 2
+ for u in fe.urls:
+ if u.rel == "repository":
+ assert u.url == "https://ndownloader.figshare.com/files/14460875"
+ if u.rel == "archive":
+ assert (
+ u.url
+ == "https://archive.org/download/springernature.figshare.com-7767695-v1/NDVI_Diff_1990_2018_T06.tif"
+ )
+ assert len(fe.release_ids) == 1
+
+
+def test_ingest_fileset_file_importer(ingest_fileset_file_importer):
+ """
+ Similar to the above, but specifically tests 'file'/'success-file' import pathway
+ """
+ last_index = ingest_fileset_file_importer.api.get_changelog(limit=1)[0].index
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ ingest_fileset_file_importer.bezerk_mode = True
+ counts = JsonLinePusher(ingest_fileset_file_importer, f).run()
+ assert counts["insert"] == 16
+ assert counts["exists"] == 0
+ assert counts["skip"] == 4
+ assert counts["skip-bad-hashes"] == 4
+
+ # fetch most recent editgroup
+ change = ingest_fileset_file_importer.api.get_changelog_entry(index=last_index + 1)
+ eg = change.editgroup
+ assert eg.description
+ assert "crawled from web" in eg.description.lower()
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.IngestFilesetFileResultImporter" in eg.extra["agent"]
+
+ # re-insert; should skip
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ ingest_fileset_file_importer.reset()
+ ingest_fileset_file_importer.bezerk_mode = False
+ counts = JsonLinePusher(ingest_fileset_file_importer, f).run()
+ assert counts["insert"] == 0
+ assert counts["exists"] == 16
+ assert counts["skip"] == 4
+ assert counts["skip-bad-hashes"] == 4