From a42d5f0d00e76bf8474647fae4e1d9d61693a7d9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Jun 2020 14:01:34 -0700 Subject: ES schema: add best_url to file schema This will increase index size (URLs are often long in our corpus, and we have many file entities), but seems worth it. Initially added `ia_url` as a second field, guaranteed to always be an *.archive.org URL, but `best_url` defaults to that anyways so didn't seem worthwhile. --- extra/elasticsearch/file_schema.json | 1 + 1 file changed, 1 insertion(+) (limited to 'extra/elasticsearch/file_schema.json') diff --git a/extra/elasticsearch/file_schema.json b/extra/elasticsearch/file_schema.json index 9c8ee64c..0fa25c3a 100644 --- a/extra/elasticsearch/file_schema.json +++ b/extra/elasticsearch/file_schema.json @@ -44,6 +44,7 @@ "rels": { "type": "keyword", "normalizer": "default" }, "in_ia": { "type": "boolean" }, "in_ia_petabox": { "type": "boolean" }, + "best_url": { "type": "keyword", "normalizer": "default" }, "release_id": { "type": "alias", "path": "release_ids" }, "sha1hex": { "type": "alias", "path": "sha1" }, -- cgit v1.2.3