aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-30 16:05:19 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-30 16:08:08 -0800
commitb392eaf397f4a7d1b127e9947cda9addeac7f7c0 (patch)
tree216997be3b5c9942587cac22c5fad26b4b5a9699 /python/fatcat_tools
parent0587f40814f945acd6a77cf1711b3b8f6123a7ec (diff)
downloadfatcat-b392eaf397f4a7d1b127e9947cda9addeac7f7c0.tar.gz
fatcat-b392eaf397f4a7d1b127e9947cda9addeac7f7c0.zip
container merger: fix bug with filtering by release count
Also apply the "human edit" and "release count" checks only to the dupe (to-be-redirected) idents.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/mergers/containers.py28
1 files changed, 15 insertions, 13 deletions
diff --git a/python/fatcat_tools/mergers/containers.py b/python/fatcat_tools/mergers/containers.py
index 1b9975e5..1d24743c 100644
--- a/python/fatcat_tools/mergers/containers.py
+++ b/python/fatcat_tools/mergers/containers.py
@@ -113,6 +113,20 @@ class ContainerMerger(EntityMerger):
if getattr(entities[ident], evidence["extid_type"]) != evidence["extid"]:
self.counts["skip-extid-mismatch"] += 1
return 0
+ resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json")
+ resp.raise_for_status()
+ stats = resp.json()
+ release_counts[ident] = stats["total"]
+
+ if not primary_id:
+ primary_id = self.choose_primary_container(
+ list(entities.values()), redirects, release_counts
+ )
+ dupe_ids = [d for d in dupe_ids if d != primary_id]
+
+ assert primary_id not in dupe_ids
+
+ for ident in dupe_ids:
if not self.clobber_human_edited:
edit_history = self.api.get_container_history(ident)
for edit in edit_history:
@@ -120,10 +134,6 @@ class ContainerMerger(EntityMerger):
print(f"skipping container_{ident}: human edited", file=sys.stderr)
self.counts["skip-human-edited"] += 1
return 0
- resp = self.http_session.get(f"https://fatcat.wiki/container/{ident}/stats.json")
- resp.raise_for_status()
- stats = resp.json()
- release_counts[ident] = stats["total"]
if self.max_container_releases is not None:
if release_counts[ident] > self.max_container_releases:
self.counts["skip-container-release-count"] += 1
@@ -131,15 +141,7 @@ class ContainerMerger(EntityMerger):
f"skipping container_{ident}: release count {release_counts[ident]}",
file=sys.stderr,
)
- continue
-
- if not primary_id:
- primary_id = self.choose_primary_container(
- list(entities.values()), redirects, release_counts
- )
- dupe_ids = [d for d in dupe_ids if d != primary_id]
-
- assert primary_id not in dupe_ids
+ return 0
if self.dry_run_mode:
eg_id = "dummy-editgroup-id"