aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_file_meta.py
blob: 2555f35122906664e2c547acd8192e51fda56167 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json

import pytest
from fatcat_openapi_client import FileEntity
from fixtures import *

from fatcat_tools.importers import FileMetaImporter, JsonLinePusher


@pytest.fixture(scope="function")
def file_meta_importer(api):
    yield FileMetaImporter(api)


def test_file_meta_importer_basic(file_meta_importer):

    # insert two file entities
    api = file_meta_importer.api
    eg = quick_eg(file_meta_importer.api)
    # with full metadata
    f1edit = api.create_file(
        eg.editgroup_id,
        FileEntity(
            size=372121,
            md5="e1fd97475c8aa102568f5d70a1bd0c07",
            sha1="0000045687dad717ed6512e395b04ec9c00995b7",
            sha256="51bdc9e40cc175089fcb60b0b188e6cbcdcddb1ff8acbe6b039b8f8fff0afff0",
            mimetype="application/pdf",
        ),
    )
    # partial/stub metadata
    f2edit = api.create_file(
        eg.editgroup_id,
        FileEntity(
            sha1="00000376ad49f56145721503f1eb5e6e49e779fd",
            mimetype="application/pdf",
        ),
    )
    api.accept_editgroup(eg.editgroup_id)

    with open("tests/files/example_file_meta.json", "r") as f:
        counts = JsonLinePusher(file_meta_importer, f).run()

    assert counts["insert"] == 0
    assert counts["exists"] == 0
    assert counts["update"] == 1
    assert counts["skip-no-match"] == 4
    assert counts["skip-missing-field"] == 1
    assert counts["skip-existing-complete"] == 1

    # cleanup file entities
    eg = quick_eg(file_meta_importer.api)
    api.delete_file(eg.editgroup_id, f1edit.ident)
    api.delete_file(eg.editgroup_id, f2edit.ident)
    api.accept_editgroup(eg.editgroup_id)


def test_file_meta_dict_parse(file_meta_importer):
    with open("tests/files/example_file_meta.json", "r") as f:
        raw = json.loads(f.readline())
        f = file_meta_importer.parse_record(raw)

        assert f.sha1 == "00000088bbc15a03ab89d8da6c356bf25aea9519"
        assert f.md5 == "f4308d58dc8806232c30edc56a896412"
        assert f.sha256 == "593f5a260129bb89ed316c9ddcb7b2f9c2e3da8adf87d29f212b423de32a2c59"
        assert f.mimetype == "application/pdf"
        assert f.size == 354118