summaryrefslogtreecommitdiffstats
path: root/python/tests/import_fileset_generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests/import_fileset_generic.py')
-rw-r--r--python/tests/import_fileset_generic.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/python/tests/import_fileset_generic.py b/python/tests/import_fileset_generic.py
new file mode 100644
index 00000000..44310304
--- /dev/null
+++ b/python/tests/import_fileset_generic.py
@@ -0,0 +1,59 @@
+
+import json
+import pytest
+
+from fatcat_tools.importers import FilesetImporter, JsonLinePusher
+from fixtures import *
+
+
+@pytest.fixture(scope="function")
+def fileset_importer(api):
+ yield FilesetImporter(api)
+
+# TODO: use API to check that entities actually created...
+def test_fileset_importer_basic(fileset_importer):
+ with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ JsonLinePusher(fileset_importer, f).run()
+
+def test_fileset_importer(fileset_importer):
+ last_index = fileset_importer.api.get_changelog(limit=1)[0].index
+ with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ fileset_importer.bezerk_mode = True
+ counts = JsonLinePusher(fileset_importer, f).run()
+ assert counts['insert'] == 1
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
+
+ # fetch most recent editgroup
+ change = fileset_importer.api.get_changelog_entry(index=last_index+1)
+ eg = change.editgroup
+ assert eg.description
+ assert "generic fileset" in eg.description.lower()
+ assert eg.extra['git_rev']
+ assert "fatcat_tools.FilesetImporter" in eg.extra['agent']
+
+ # re-insert; should skip
+ with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ fileset_importer.reset()
+ fileset_importer.bezerk_mode = False
+ counts = JsonLinePusher(fileset_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 1
+ assert counts['skip'] == 0
+
+def test_fileset_dict_parse(fileset_importer):
+ with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ raw = json.loads(f.readline())
+ fs = fileset_importer.parse_record(raw)
+
+ assert fs.manifest[0].sha1 == "cc9bd558ca79b30b2966714da7ef4129537fde0c"
+ assert fs.manifest[0].md5 == "742c40404c9a4dbbd77c0985201c639f"
+ assert fs.manifest[0].sha256 == "3a7c07ad17ce3638d5a1dd21f995a496e430b952eef00270ad741d506984370f"
+ assert fs.manifest[0].size == 640500
+ assert fs.manifest[0].path == "070111_LatA_100nM.txt"
+ assert fs.manifest[0].extra['mimetype'] == "text/plain"
+ assert len(fs.urls) == 3
+ for u in fs.urls:
+ if u.rel == "repo":
+ assert u.url == "https://merritt.cdlib.org/d/ark%3A%2Fb5068%2Fd1rp49/1/"
+ assert len(fs.release_ids) == 1