diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-20 20:21:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-20 20:22:25 -0700 |
commit | 038395d5975b8dd0c790bf35cb020d1cd4e6cb46 (patch) | |
tree | e214776e5d3440ac41759459e92ca7014812ea9f | |
parent | 5f23ec1956eb5a53848c480a2f8399ac2f3f30ff (diff) | |
download | fatcat-038395d5975b8dd0c790bf35cb020d1cd4e6cb46.tar.gz fatcat-038395d5975b8dd0c790bf35cb020d1cd4e6cb46.zip |
include creator_ids in release elastic schema
Intent is to allow fast creator search/lookup
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | extra/elasticsearch/release_schema.json | 1 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 6 |
3 files changed, 8 insertions, 0 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9d2814..401fb202 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ This version includes some backward-incompatible changes. - allow expanding `releases` for file, fileset, and webcapture entities. Expanded release entities have their abstracts and refs hidden by default (for performance) +- `creator_ids` in release elasticsearch schema, for lookups ## [0.2.2] - 2019-05-08 diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index ef195e1b..63aab55c 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -57,6 +57,7 @@ "container_type": { "type": "keyword" }, "contrib_count": { "type": "integer" }, "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, + "creator_ids": { "type": "keyword" }, "ref_count": { "type": "integer" }, "file_count": { "type": "integer" }, "fileset_count": { "type": "integer" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index a8e0227b..7f7f2f1b 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -86,10 +86,16 @@ def release_to_elasticsearch(entity, force_bool=True): t['ref_count'] = len(release.refs or []) t['contrib_count'] = len(release.contribs or []) contrib_names = [] + creator_ids = [] for c in (release.contribs or []): if c.raw_name: contrib_names.append(c.raw_name) + elif c.surname: + contrib_names.append(c.surname) + if c.creator_id: + creator_ids.append(c.creator_id) t['contrib_names'] = contrib_names + t['creator_ids'] = creator_ids container = release.container if container: |