fmt (black): tests/

author: Bryan Newbold <bnewbold@robocracy.org> 2021-11-02 18:13:14 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2021-11-02 18:13:14 -0700
commit: cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree: 5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/import_ingest.py
parent: 78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
download: fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
1 files changed, 50 insertions, 43 deletions
diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py
index 04a8e7f6..d9e7d294 100644
--- a/python/tests/import_ingest.py
+++ b/python/tests/import_ingest.py
@@ -1,4 +1,3 @@
-
 import json
 
 import pytest
@@ -15,94 +14,100 @@ from fatcat_tools.importers import (
 def ingest_importer(api):
     yield IngestFileResultImporter(api)
 
+
 @pytest.fixture(scope="function")
 def ingest_web_importer(api):
     yield IngestWebResultImporter(api)
 
+
 # TODO: use API to check that entities actually created...
 def test_ingest_importer_basic(ingest_importer):
-    with open('tests/files/example_ingest.json', 'r') as f:
+    with open("tests/files/example_ingest.json", "r") as f:
         JsonLinePusher(ingest_importer, f).run()
 
+
 def test_ingest_importer(ingest_importer):
     last_index = ingest_importer.api.get_changelog(limit=1)[0].index
-    with open('tests/files/example_ingest.json', 'r') as f:
+    with open("tests/files/example_ingest.json", "r") as f:
         ingest_importer.bezerk_mode = True
         counts = JsonLinePusher(ingest_importer, f).run()
-    assert counts['insert'] == 1
-    assert counts['exists'] == 0
-    assert counts['skip'] == 1
+    assert counts["insert"] == 1
+    assert counts["exists"] == 0
+    assert counts["skip"] == 1
 
     # fetch most recent editgroup
-    change = ingest_importer.api.get_changelog_entry(index=last_index+1)
+    change = ingest_importer.api.get_changelog_entry(index=last_index + 1)
     eg = change.editgroup
     assert eg.description
     assert "crawled from web" in eg.description.lower()
-    assert eg.extra['git_rev']
-    assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent']
+    assert eg.extra["git_rev"]
+    assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"]
 
     # re-insert; should skip
-    with open('tests/files/example_ingest.json', 'r') as f:
+    with open("tests/files/example_ingest.json", "r") as f:
         ingest_importer.reset()
         ingest_importer.bezerk_mode = False
         counts = JsonLinePusher(ingest_importer, f).run()
-    assert counts['insert'] == 0
-    assert counts['exists'] == 1
-    assert counts['skip'] == 1
+    assert counts["insert"] == 0
+    assert counts["exists"] == 1
+    assert counts["skip"] == 1
+
 
 def test_ingest_importer_xml(ingest_importer):
     last_index = ingest_importer.api.get_changelog(limit=1)[0].index
-    with open('tests/files/example_ingest_xml.json', 'r') as f:
+    with open("tests/files/example_ingest_xml.json", "r") as f:
         ingest_importer.bezerk_mode = True
         counts = JsonLinePusher(ingest_importer, f).run()
     print(counts)
-    assert counts['insert'] == 1
-    assert counts['exists'] == 0
-    assert counts['skip'] == 0
+    assert counts["insert"] == 1
+    assert counts["exists"] == 0
+    assert counts["skip"] == 0
 
     # fetch most recent editgroup
-    change = ingest_importer.api.get_changelog_entry(index=last_index+1)
+    change = ingest_importer.api.get_changelog_entry(index=last_index + 1)
     eg = change.editgroup
     assert eg.description
     assert "crawled from web" in eg.description.lower()
-    assert eg.extra['git_rev']
-    assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent']
+    assert eg.extra["git_rev"]
+    assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"]
 
     # re-import should skip
-    with open('tests/files/example_ingest_xml.json', 'r') as f:
+    with open("tests/files/example_ingest_xml.json", "r") as f:
         ingest_importer.reset()
         ingest_importer.bezerk_mode = False
         counts = JsonLinePusher(ingest_importer, f).run()
-    assert counts['insert'] == 0
-    assert counts['exists'] == 1
-    assert counts['skip'] == 0
+    assert counts["insert"] == 0
+    assert counts["exists"] == 1
+    assert counts["skip"] == 0
+
 
 def test_ingest_importer_web(ingest_web_importer):
     last_index = ingest_web_importer.api.get_changelog(limit=1)[0].index
-    with open('tests/files/example_ingest_html.json', 'r') as f:
+    with open("tests/files/example_ingest_html.json", "r") as f:
         ingest_web_importer.bezerk_mode = True
         counts = JsonLinePusher(ingest_web_importer, f).run()
     print(counts)
-    assert counts['insert'] == 1
-    assert counts['exists'] == 0
-    assert counts['skip'] == 0
+    assert counts["insert"] == 1
+    assert counts["exists"] == 0
+    assert counts["skip"] == 0
 
     # fetch most recent editgroup
-    change = ingest_web_importer.api.get_changelog_entry(index=last_index+1)
+    change = ingest_web_importer.api.get_changelog_entry(index=last_index + 1)
     eg = change.editgroup
     assert eg.description
     assert "crawled from web" in eg.description.lower()
-    assert eg.extra['git_rev']
-    assert "fatcat_tools.IngestWebResultImporter" in eg.extra['agent']
+    assert eg.extra["git_rev"]
+    assert "fatcat_tools.IngestWebResultImporter" in eg.extra["agent"]
 
     # re-import should skip
-    with open('tests/files/example_ingest_html.json', 'r') as f:
+    with open("tests/files/example_ingest_html.json", "r") as f:
         ingest_web_importer.reset()
         ingest_web_importer.bezerk_mode = False
         counts = JsonLinePusher(ingest_web_importer, f).run()
-    assert counts['insert'] == 0
-    assert counts['exists'] == 1
-    assert counts['skip'] == 0
+    assert counts["insert"] == 0
+    assert counts["exists"] == 1
+    assert counts["skip"] == 0
+
 
 def test_ingest_importer_stage(ingest_importer, api):
     """
@@ -117,29 +122,30 @@ def test_ingest_importer_stage(ingest_importer, api):
         dict(request_stage="published", release_stage="draft", status="skip-release-stage"),
     ]
     ingest_importer.bezerk_mode = True
-    with open('tests/files/example_ingest.json', 'r') as f:
+    with open("tests/files/example_ingest.json", "r") as f:
         raw = json.loads(f.readline())
     for row in test_table:
-        #print(row)
+        # print(row)
 
         # set dummy record stage
         eg = quick_eg(api)
         r1 = api.lookup_release(doi="10.123/abc")
-        r1.release_stage = row['release_stage']
+        r1.release_stage = row["release_stage"]
         api.update_release(eg.editgroup_id, r1.ident, r1)
         api.accept_editgroup(eg.editgroup_id)
 
         # set ingest request stage
-        raw['request']['release_stage'] = row['request_stage']
+        raw["request"]["release_stage"] = row["request_stage"]
         ingest_importer.reset()
         ingest_importer.push_record(raw)
         counts = ingest_importer.finish()
         print(counts)
         assert counts["total"] == 1
-        assert counts[row['status']] == 1
+        assert counts[row["status"]] == 1
+
 
 def test_ingest_dict_parse(ingest_importer):
-    with open('tests/files/example_ingest.json', 'r') as f:
+    with open("tests/files/example_ingest.json", "r") as f:
         raw = json.loads(f.readline())
         f = ingest_importer.parse_record(raw)
         assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
@@ -154,14 +160,15 @@ def test_ingest_dict_parse(ingest_importer):
                 assert u.url.startswith("https://web.archive.org/")
         assert len(f.release_ids) == 1
 
+
 def test_ingest_dict_parse_old(ingest_importer):
-    with open('tests/files/example_ingest.old.json', 'r') as f:
+    with open("tests/files/example_ingest.old.json", "r") as f:
         raw = json.loads(f.readline())
 
         # ancient ingest requests had no type; skip them
         f = ingest_importer.parse_record(raw)
         assert f is None
-        raw['request']['ingest_type'] = 'pdf'
+        raw["request"]["ingest_type"] = "pdf"
 
         f = ingest_importer.parse_record(raw)
         assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
author	Bryan Newbold <bnewbold@robocracy.org>	2021-11-02 18:13:14 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2021-11-02 18:13:14 -0700
commit	cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree	5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/import_ingest.py
parent	78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
download	fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip