summaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-01-30 00:08:41 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-01-30 00:08:41 -0800
commit4cbee44529dd967c966ed3f2cc2bb80176be4e43 (patch)
treeeb434db2b532786fc9a3c6420c17c06db788a758 /python/tests
parent461376e6c6107da9a1c0a41c379465ef1c39f051 (diff)
downloadfatcat-4cbee44529dd967c966ed3f2cc2bb80176be4e43.tar.gz
fatcat-4cbee44529dd967c966ed3f2cc2bb80176be4e43.zip
implement host+domain parsing for file ES transform
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/transform_elasticsearch.py7
1 files changed, 3 insertions, 4 deletions
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index c247e745..e67681c6 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -106,11 +106,10 @@ def test_elasticsearch_file_transform(matched_importer):
assert es['size_bytes'] == f.size
assert es['mimetype'] == f.mimetype
assert es['in_ia'] == True
- assert 'web' in es['rels']
- # XXX: implement hosts and domain parsing with urlcanon
- #assert 'journals.plos.org' in es['host']
- #assert 'plos.org' in es['domain']
+ assert 'web' in es['rels']
+ assert 'www.zhros.ru' in es['hosts']
+ assert 'zhros.ru' in es['domains']
def test_elasticsearch_changelog_transform(matched_importer):
ce = entity_from_json(open('./tests/files/changelog_3469683.json', 'r').read(), ChangelogEntry)