diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 7 | ||||
-rw-r--r-- | python/tests/transform_elasticsearch.py | 2 |
2 files changed, 4 insertions, 5 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 42669bbf..5a492fb4 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -497,13 +497,12 @@ def file_to_elasticsearch(entity): sha1 = entity.sha1, sha256 = entity.sha256, md5 = entity.md5, - rel = [u.rel for u in entity.urls], ) # TODO: domain, hosts (from urls; use proper urlcanon) - t['rel'] = list(set([u.rel for u in entity.urls])) - t['host'] = [] - t['domain'] = [] + t['rels'] = list(set([u.rel for u in entity.urls])) + t['hosts'] = [] + t['domains'] = [] in_ia = False for u in entity.urls: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index 89a4eef8..c247e745 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -106,7 +106,7 @@ def test_elasticsearch_file_transform(matched_importer): assert es['size_bytes'] == f.size assert es['mimetype'] == f.mimetype assert es['in_ia'] == True - assert 'web' in es['rel'] + assert 'web' in es['rels'] # XXX: implement hosts and domain parsing with urlcanon #assert 'journals.plos.org' in es['host'] |