diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:05:19 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-30 16:08:08 -0800 |
commit | b392eaf397f4a7d1b127e9947cda9addeac7f7c0 (patch) | |
tree | 216997be3b5c9942587cac22c5fad26b4b5a9699 | |
parent | 0587f40814f945acd6a77cf1711b3b8f6123a7ec (diff) | |
download | fatcat-b392eaf397f4a7d1b127e9947cda9addeac7f7c0.tar.gz fatcat-b392eaf397f4a7d1b127e9947cda9addeac7f7c0.zip |
container merger: fix bug with filtering by release count
Also apply the "human edit" and "release count" checks only to the dupe
(to-be-redirected) idents.
-rw-r--r-- | python/fatcat_tools/mergers/containers.py | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/python/fatcat_tools/mergers/containers.py b/python/fatcat_tools/mergers/containers.py index 1b9975e5..1d24743c 100644 --- a/python/fatcat_tools/mergers/containers.py +++ b/python/fatcat_tools/mergers/containers.py @@ -113,6 +113,20 @@ class ContainerMerger(EntityMerger): if getattr(entities[ident], evidence["extid_type"]) != evidence["extid"]: self.counts["skip-extid-mismatch"] += 1 return 0 + resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json") + resp.raise_for_status() + stats = resp.json() + release_counts[ident] = stats["total"] + + if not primary_id: + primary_id = self.choose_primary_container( + list(entities.values()), redirects, release_counts + ) + dupe_ids = [d for d in dupe_ids if d != primary_id] + + assert primary_id not in dupe_ids + + for ident in dupe_ids: if not self.clobber_human_edited: edit_history = self.api.get_container_history(ident) for edit in edit_history: @@ -120,10 +134,6 @@ class ContainerMerger(EntityMerger): print(f"skipping container_{ident}: human edited", file=sys.stderr) self.counts["skip-human-edited"] += 1 return 0 - resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json") - resp.raise_for_status() - stats = resp.json() - release_counts[ident] = stats["total"] if self.max_container_releases is not None: if release_counts[ident] > self.max_container_releases: self.counts["skip-container-release-count"] += 1 @@ -131,15 +141,7 @@ class ContainerMerger(EntityMerger): f"skipping container_{ident}: release count {release_counts[ident]}", file=sys.stderr, ) - continue - - if not primary_id: - primary_id = self.choose_primary_container( - list(entities.values()), redirects, release_counts - ) - dupe_ids = [d for d in dupe_ids if d != primary_id] - - assert primary_id not in dupe_ids + return 0 if self.dry_run_mode: eg_id = "dummy-editgroup-id" |