diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-04-17 15:32:18 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-04-17 15:32:20 -0700 |
commit | 026e352f5d99652f088b6bcdc28d43106b8f52d2 (patch) | |
tree | 5e5122bec5419e73d3c5a6cb7df6a91e29b99e0a /python/fatcat_tools | |
parent | e749b576bdb9bbec3bcef7e4e389e7fbbf916c84 (diff) | |
download | fatcat-026e352f5d99652f088b6bcdc28d43106b8f52d2.tar.gz fatcat-026e352f5d99652f088b6bcdc28d43106b8f52d2.zip |
ES changelog worker: fixes for ident; fetch update from API if needed
The API fetch update may be needed for old changelog entries in the
kafka feed.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/workers/elasticsearch.py | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index 525f372b..15d7aae3 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -103,13 +103,20 @@ class ElasticsearchReleaseWorker(FatcatWorker): if entity_dict.get('name') and not entity_dict.get('title'): continue entity = entity_from_json(json_str, self.entity_type, api_client=ac) + if self.entity_type == "changelog": + key = entity.index + # might need to fetch from API + if not (entity.editgroup and entity.editgroup.editor): + entity = ac.get_changelog_entry(entity.index, expand="editgroup,editor") + else: + key = entity.ident # TODO: handle deletions from index bulk_actions.append(json.dumps({ - "index": { "_id": entity.ident, }, + "index": { "_id": key, }, })) bulk_actions.append(json.dumps( self.transform_func(entity))) - print("Upserting, eg, {} (of {} releases in elasticsearch)".format(entity.ident, len(batch))) + print("Upserting, eg, {} (of {} {} in elasticsearch)".format(entity.ident, len(batch), self.entity_type)) elasticsearch_endpoint = "{}/{}/{}/_bulk".format( self.elasticsearch_backend, self.elasticsearch_index, |