summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-20 20:21:17 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-20 20:22:25 -0700
commit038395d5975b8dd0c790bf35cb020d1cd4e6cb46 (patch)
treee214776e5d3440ac41759459e92ca7014812ea9f
parent5f23ec1956eb5a53848c480a2f8399ac2f3f30ff (diff)
downloadfatcat-038395d5975b8dd0c790bf35cb020d1cd4e6cb46.tar.gz
fatcat-038395d5975b8dd0c790bf35cb020d1cd4e6cb46.zip
include creator_ids in release elastic schema
Intent is to allow fast creator search/lookup
-rw-r--r--CHANGELOG.md1
-rw-r--r--extra/elasticsearch/release_schema.json1
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py6
3 files changed, 8 insertions, 0 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e9d2814..401fb202 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,7 @@ This version includes some backward-incompatible changes.
- allow expanding `releases` for file, fileset, and webcapture entities.
Expanded release entities have their abstracts and refs hidden by default
(for performance)
+- `creator_ids` in release elasticsearch schema, for lookups
## [0.2.2] - 2019-05-08
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index ef195e1b..63aab55c 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -57,6 +57,7 @@
"container_type": { "type": "keyword" },
"contrib_count": { "type": "integer" },
"contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "creator_ids": { "type": "keyword" },
"ref_count": { "type": "integer" },
"file_count": { "type": "integer" },
"fileset_count": { "type": "integer" },
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index a8e0227b..7f7f2f1b 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -86,10 +86,16 @@ def release_to_elasticsearch(entity, force_bool=True):
t['ref_count'] = len(release.refs or [])
t['contrib_count'] = len(release.contribs or [])
contrib_names = []
+ creator_ids = []
for c in (release.contribs or []):
if c.raw_name:
contrib_names.append(c.raw_name)
+ elif c.surname:
+ contrib_names.append(c.surname)
+ if c.creator_id:
+ creator_ids.append(c.creator_id)
t['contrib_names'] = contrib_names
+ t['creator_ids'] = creator_ids
container = release.container
if container: