From b392eaf397f4a7d1b127e9947cda9addeac7f7c0 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 30 Nov 2021 16:05:19 -0800 Subject: container merger: fix bug with filtering by release count Also apply the "human edit" and "release count" checks only to the dupe (to-be-redirected) idents. --- python/fatcat_tools/mergers/containers.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/mergers/containers.py b/python/fatcat_tools/mergers/containers.py index 1b9975e5..1d24743c 100644 --- a/python/fatcat_tools/mergers/containers.py +++ b/python/fatcat_tools/mergers/containers.py @@ -113,6 +113,20 @@ class ContainerMerger(EntityMerger): if getattr(entities[ident], evidence["extid_type"]) != evidence["extid"]: self.counts["skip-extid-mismatch"] += 1 return 0 + resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json") + resp.raise_for_status() + stats = resp.json() + release_counts[ident] = stats["total"] + + if not primary_id: + primary_id = self.choose_primary_container( + list(entities.values()), redirects, release_counts + ) + dupe_ids = [d for d in dupe_ids if d != primary_id] + + assert primary_id not in dupe_ids + + for ident in dupe_ids: if not self.clobber_human_edited: edit_history = self.api.get_container_history(ident) for edit in edit_history: @@ -120,10 +134,6 @@ class ContainerMerger(EntityMerger): print(f"skipping container_{ident}: human edited", file=sys.stderr) self.counts["skip-human-edited"] += 1 return 0 - resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json") - resp.raise_for_status() - stats = resp.json() - release_counts[ident] = stats["total"] if self.max_container_releases is not None: if release_counts[ident] > self.max_container_releases: self.counts["skip-container-release-count"] += 1 @@ -131,15 +141,7 @@ class ContainerMerger(EntityMerger): f"skipping container_{ident}: release count {release_counts[ident]}", file=sys.stderr, ) - continue - - if not primary_id: - primary_id = self.choose_primary_container( - list(entities.values()), redirects, release_counts - ) - dupe_ids = [d for d in dupe_ids if d != primary_id] - - assert primary_id not in dupe_ids + return 0 if self.dry_run_mode: eg_id = "dummy-editgroup-id" -- cgit v1.2.3