aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-04-17 15:32:18 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-04-17 15:32:20 -0700
commit026e352f5d99652f088b6bcdc28d43106b8f52d2 (patch)
tree5e5122bec5419e73d3c5a6cb7df6a91e29b99e0a
parente749b576bdb9bbec3bcef7e4e389e7fbbf916c84 (diff)
downloadfatcat-026e352f5d99652f088b6bcdc28d43106b8f52d2.tar.gz
fatcat-026e352f5d99652f088b6bcdc28d43106b8f52d2.zip
ES changelog worker: fixes for ident; fetch update from API if needed
The API fetch update may be needed for old changelog entries in the kafka feed.
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py11
1 files changed, 9 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index 525f372b..15d7aae3 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -103,13 +103,20 @@ class ElasticsearchReleaseWorker(FatcatWorker):
if entity_dict.get('name') and not entity_dict.get('title'):
continue
entity = entity_from_json(json_str, self.entity_type, api_client=ac)
+ if self.entity_type == "changelog":
+ key = entity.index
+ # might need to fetch from API
+ if not (entity.editgroup and entity.editgroup.editor):
+ entity = ac.get_changelog_entry(entity.index, expand="editgroup,editor")
+ else:
+ key = entity.ident
# TODO: handle deletions from index
bulk_actions.append(json.dumps({
- "index": { "_id": entity.ident, },
+ "index": { "_id": key, },
}))
bulk_actions.append(json.dumps(
self.transform_func(entity)))
- print("Upserting, eg, {} (of {} releases in elasticsearch)".format(entity.ident, len(batch)))
+ print("Upserting, eg, {} (of {} {} in elasticsearch)".format(entity.ident, len(batch), self.entity_type))
elasticsearch_endpoint = "{}/{}/{}/_bulk".format(
self.elasticsearch_backend,
self.elasticsearch_index,