diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 23:56:27 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 23:56:27 -0800 |
commit | ade1eb9ff955ca5ba58acdc8b76e344c9cc54790 (patch) | |
tree | 192e6756fe0b4bc83b43323f8e5b47419eb8ed1b | |
parent | 749295a16dd18aa64b87f6b7c3e0e6052931161a (diff) | |
download | fatcat-ade1eb9ff955ca5ba58acdc8b76e344c9cc54790.tar.gz fatcat-ade1eb9ff955ca5ba58acdc8b76e344c9cc54790.zip |
fix ES file schema plural field names
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 7 | ||||
-rw-r--r-- | python/tests/transform_elasticsearch.py | 2 |
2 files changed, 4 insertions, 5 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 42669bbf..5a492fb4 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -497,13 +497,12 @@ def file_to_elasticsearch(entity): sha1 = entity.sha1, sha256 = entity.sha256, md5 = entity.md5, - rel = [u.rel for u in entity.urls], ) # TODO: domain, hosts (from urls; use proper urlcanon) - t['rel'] = list(set([u.rel for u in entity.urls])) - t['host'] = [] - t['domain'] = [] + t['rels'] = list(set([u.rel for u in entity.urls])) + t['hosts'] = [] + t['domains'] = [] in_ia = False for u in entity.urls: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index 89a4eef8..c247e745 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -106,7 +106,7 @@ def test_elasticsearch_file_transform(matched_importer): assert es['size_bytes'] == f.size assert es['mimetype'] == f.mimetype assert es['in_ia'] == True - assert 'web' in es['rel'] + assert 'web' in es['rels'] # XXX: implement hosts and domain parsing with urlcanon #assert 'journals.plos.org' in es['host'] |