aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-16 14:58:07 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-16 15:00:18 -0800
commitf60ba0ea04081ac0095c12d8ecbaa48b3da74aee (patch)
treed4de1d1d529776205568f55ad7f724e398e442c9 /python/fatcat_tools
parent0174f7976e4cf5f288539e8a82ba07cc8f45f5c8 (diff)
downloadfatcat-f60ba0ea04081ac0095c12d8ecbaa48b3da74aee.tar.gz
fatcat-f60ba0ea04081ac0095c12d8ecbaa48b3da74aee.zip
entity update worker: treat fileset and webcapture updates like file updates
When webcapture or fileset entities are updated, then the release entities associated with them also need to be updated (and work entities, recursively). A TODO is to handle the case where a release_id is *removed* as well as *added*, and reprocess the releases in that case as well.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/workers/changelog.py28
1 files changed, 25 insertions, 3 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 2111a20d..94791770 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -326,6 +326,8 @@ class EntityUpdatesWorker(FatcatWorker):
release_ids = []
new_release_ids = []
file_ids = []
+ fileset_ids = []
+ webcapture_ids = []
container_ids = []
work_ids = []
release_edits = cle['editgroup']['edits']['releases']
@@ -337,6 +339,12 @@ class EntityUpdatesWorker(FatcatWorker):
file_edits = cle['editgroup']['edits']['files']
for e in file_edits:
file_ids.append(e['ident'])
+ fileset_edits = cle['editgroup']['edits']['filesets']
+ for e in fileset_edits:
+ fileset_ids.append(e['ident'])
+ webcapture_edits = cle['editgroup']['edits']['webcaptures']
+ for e in webcapture_edits:
+ webcapture_ids.append(e['ident'])
container_edits = cle['editgroup']['edits']['containers']
for e in container_edits:
container_ids.append(e['ident'])
@@ -348,8 +356,8 @@ class EntityUpdatesWorker(FatcatWorker):
for ident in set(file_ids):
file_entity = self.api.get_file(ident, expand=None)
# update release when a file changes
- # TODO: fetch old revision as well, and only update
- # releases for which list changed
+ # TODO: also fetch old version of file and update any *removed*
+ # release idents (and same for filesets, webcapture updates)
release_ids.extend(file_entity.release_ids or [])
file_dict = self.api.api_client.sanitize_for_serialization(file_entity)
producer.produce(
@@ -358,6 +366,19 @@ class EntityUpdatesWorker(FatcatWorker):
key=ident.encode('utf-8'),
on_delivery=fail_fast,
)
+
+ # TODO: topic for fileset updates
+ for ident in set(fileset_ids):
+ fileset_entity = self.api.get_fileset(ident, expand=None)
+ # update release when a fileset changes
+ release_ids.extend(file_entity.release_ids or [])
+
+ # TODO: topic for webcapture updates
+ for ident in set(webcapture_ids):
+ webcapture_entity = self.api.get_webcapture(ident, expand=None)
+ # update release when a webcapture changes
+ release_ids.extend(webcapture_entity.release_ids or [])
+
for ident in set(container_ids):
container = self.api.get_container(ident)
container_dict = self.api.api_client.sanitize_for_serialization(container)
@@ -367,6 +388,7 @@ class EntityUpdatesWorker(FatcatWorker):
key=ident.encode('utf-8'),
on_delivery=fail_fast,
)
+
for ident in set(release_ids):
release = self.api.get_release(ident, expand="files,filesets,webcaptures,container")
if release.work_id:
@@ -378,7 +400,7 @@ class EntityUpdatesWorker(FatcatWorker):
key=ident.encode('utf-8'),
on_delivery=fail_fast,
)
- # filter to "new" active releases with no matched files
+ # for ingest requests, filter to "new" active releases with no matched files
if release.ident in new_release_ids:
ir = release_ingest_request(release, ingest_request_source='fatcat-changelog')
if ir and not release.files and self.want_live_ingest(release, ir):