1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
import json
import pytest
import datetime
from copy import copy
from fatcat_client import *
from fatcat_client.rest import ApiException
from fixtures import *
def test_webcapture(api):
eg = quick_eg(api)
r1 = ReleaseEntity(title="test webcapture release")
r1edit = api.create_release(r1, editgroup_id=eg.editgroup_id)
wc1 = WebcaptureEntity(
original_url = "http://example.site",
#timestamp = "2012-01-02T03:04:05Z",
timestamp = datetime.datetime.now(datetime.timezone.utc),
cdx = [
WebcaptureEntityCdx(
surt="site,example,)/data/thing.tar.gz",
#timestamp="2012-01-02T03:04:05Z",
timestamp=datetime.datetime.now(datetime.timezone.utc),
url="http://example.site/data/thing.tar.gz",
mimetype="application/gzip",
status_code=200,
sha1="455face3598611458efe1f072e58624790a67266",
sha256="c7b49f3e84cd1b7cb0b0e3e9f632b7be7e21b4dc229df23331f880a8a7dfa75a",
),
WebcaptureEntityCdx(
surt="site,example,)/README.md",
#timestamp="2012-01-02T03:04:05Z",
timestamp=datetime.datetime.now(datetime.timezone.utc),
url="http://example.site/README.md",
mimetype="text/markdown",
status_code=200,
sha1="455face3598611458efe1f072e58624790a67266",
sha256="429bcafa4d3d0072d5b2511e12c85c1aac1d304011d1c406da14707f7b9cd905",
),
],
archive_urls = [
FileEntityUrls(rel="wayback", url="https://web.archive.org/web/"),
],
release_ids = [r1edit.ident],
)
wc1edit = api.create_webcapture(wc1, editgroup_id=eg.editgroup_id)
api.accept_editgroup(eg.editgroup_id)
wc2 = api.get_webcapture(wc1edit.ident)
# get revision
wc2_rev = api.get_webcapture_revision(wc1edit.revision)
assert wc1edit.revision == wc2_rev.revision
assert wc2.revision == wc2_rev.revision
assert wc2.timestamp == wc2_rev.timestamp
# check that fields match
# I don't know why these aren't equal...
#print(wc1.archive_urls)
#print(wc2.archive_urls)
#assert wc1.archive_urls == wc2.archive_urls
assert wc1.archive_urls[0].rel == wc2.archive_urls[0].rel
assert wc1.archive_urls[0].url == wc2.archive_urls[0].url
assert wc1.cdx == wc2.cdx
assert wc1.release_ids == wc2.release_ids
assert wc1.timestamp == wc2.timestamp
assert wc1.original_url == wc2.original_url
# TODO: check release expansion
r1 = api.get_release(r1edit.ident, expand="webcaptures")
print(r1)
assert r1.webcaptures[0].cdx == wc1.cdx
def test_webcapture_examples(api):
wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam')
assert wc3.cdx[0].surt == 'org,asheesh)/'
assert wc3.cdx[1].sha1 == 'a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5'
assert wc3.archive_urls[1].rel == 'warc'
def test_bad_webcapture(api):
eg = quick_eg(api)
bad_list = [
# good (for testing test itself)
WebcaptureEntity(cdx=[
WebcaptureEntityCdx(
surt="site,example,)/123.jpg",
url="http://example.site/123.jpg",
sha1="455face3598611458efe1f072e58624790a67266",
timestamp=201506071122)]),
]
for b in bad_list:
with pytest.raises(fatcat_client.rest.ApiException):
api.create_webcapture(b, editgroup_id=eg.editgroup_id)
|