diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 7 | ||||
| -rw-r--r-- | python/tests/transform_elasticsearch.py | 2 | 
2 files changed, 4 insertions, 5 deletions
| diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 42669bbf..5a492fb4 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -497,13 +497,12 @@ def file_to_elasticsearch(entity):          sha1 = entity.sha1,          sha256 = entity.sha256,          md5 = entity.md5, -        rel = [u.rel for u in entity.urls],      )      # TODO: domain, hosts (from urls; use proper urlcanon) -    t['rel'] = list(set([u.rel for u in entity.urls])) -    t['host'] = [] -    t['domain'] = [] +    t['rels'] = list(set([u.rel for u in entity.urls])) +    t['hosts'] = [] +    t['domains'] = []      in_ia = False      for u in entity.urls: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index 89a4eef8..c247e745 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -106,7 +106,7 @@ def test_elasticsearch_file_transform(matched_importer):      assert es['size_bytes'] == f.size      assert es['mimetype'] == f.mimetype      assert es['in_ia'] == True -    assert 'web' in es['rel'] +    assert 'web' in es['rels']      # XXX: implement hosts and domain parsing with urlcanon      #assert 'journals.plos.org' in es['host'] | 
