From 03d7c929e1b415cbd612d612b9b1c9725f5690bb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 14 Sep 2018 16:53:28 -0700 Subject: switch manifest importer to be json-based --- python/tests/files/example_matched.json | 6 +++--- python/tests/manifest.py | 34 --------------------------------- 2 files changed, 3 insertions(+), 37 deletions(-) delete mode 100644 python/tests/manifest.py (limited to 'python/tests') diff --git a/python/tests/files/example_matched.json b/python/tests/files/example_matched.json index 79db1296..d9d71669 100644 --- a/python/tests/files/example_matched.json +++ b/python/tests/files/example_matched.json @@ -1,3 +1,3 @@ -{ "dois": ["10.123/abc"], "sha1": "00242a192acc258bdfdb151943419437f440c313", "md5": "f4de91152c7ab9fdc2a128f962faebff", "sha256": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 255629, "cdx": { "dt": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "mimetype": "application/pdf" } -{ "dois": ["10.123/abc"], "sha1": "3f242a192acc258bdfdb151943419437f440c313", "md5": "f4de91152c7ab9fdc2a128f962faebff", "sha256": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 255629, "cdx": { "dt": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "mimetype": "application/pdf" } -{ "dois": ["10.456/1231123"], "sha1": "000000000000258bdfdb151943419437f440c313", "md5": "000000000000b9fdc2a128f962faebff", "sha256": "000000000000620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 123, "cdx": { "dt": "20000000164644", "url": "http://no-plos.org/plosme" }, "mimetype": "application/txt" } +{ "dois": ["10.123/abc"], "sha1": "00242a192acc258bdfdb151943419437f440c313", "md5": "f4de91152c7ab9fdc2a128f962faebff", "sha256": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 255629, "cdx": [{ "dt": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }], "mimetype": "application/pdf" } +{ "dois": ["10.123/abc"], "sha1": "3f242a192acc258bdfdb151943419437f440c313", "md5": "f4de91152c7ab9fdc2a128f962faebff", "sha256": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 255629, "cdx": [{ "dt": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }], "mimetype": "application/pdf" , "urls": ["http://other.tld/asdf"]} +{ "dois": ["10.456/1231123"], "sha1": "000000000000258bdfdb151943419437f440c313", "md5": "000000000000b9fdc2a128f962faebff", "sha256": "000000000000620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size": 123, "cdx": [{ "dt": "20000000164644", "url": "http://no-plos.org/plosme" }], "mimetype": "application/txt" } diff --git a/python/tests/manifest.py b/python/tests/manifest.py deleted file mode 100644 index 030d9e48..00000000 --- a/python/tests/manifest.py +++ /dev/null @@ -1,34 +0,0 @@ - -import json -import pytest -from fatcat.manifest_importer import FatcatManifestImporter - - -@pytest.fixture(scope="function") -def manifest_importer(): - yield FatcatManifestImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -#def test_manifest_importer_batch(manifest_importer): -# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: -# manifest_importer.process_batch(f) - -#def test_manifest_importer(manifest_importer): -# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: -# manifest_importer.process_source(f) - -def test_manifest_row_parse(manifest_importer): - # (sha1, mimetype, size_bytes, md5, doi, url, datetime) = row - - c = manifest_importer.parse_manifest_row( - (None, None, None, None, None, None, None)) - assert c == None - - c = manifest_importer.parse_manifest_row( - ("7d97e98f8af710c7e7fe703abc8f639e0ee507c4", "application/pdf", "12345", "8af710c7e7fe703abc8f639e0ee507c4", "10.1234/asdf", "https://example.com/thing.pdf", "200001010000")) - assert c.sha1 == "7d97e98f8af710c7e7fe703abc8f639e0ee507c4" - assert c.mimetype == "application/pdf" - assert c.urls[0].url == "https://example.com/thing.pdf" - assert c.urls[0].rel == "web" - assert c.urls[1].url == "https://web.archive.org/web/200001010000/https://example.com/thing.pdf" - assert c.urls[1].rel == "webarchive" -- cgit v1.2.3