From f7a75a019c9dee35542e6f92ec37937df36ff756 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 15 Dec 2020 19:30:05 -0800 Subject: html ingest: small fixes to try_update() code path Don't currently have test coverage for most try_update() code; run the inserts manually in testing. --- python/fatcat_tools/importers/ingest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4fbd19f1..1e04e712 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -480,9 +480,9 @@ class IngestWebResultImporter(IngestFileResultImporter): def try_update(self, wc): - # check for existing edits-in-progress with same file hash + # check for existing edits-in-progress with same URL for other in self._entity_queue: - if other.sha1 == wc.sha1: + if other.original_url == wc.original_url: self.counts['skip-in-queue'] += 1 return False @@ -501,9 +501,9 @@ class IngestWebResultImporter(IngestFileResultImporter): self.counts['skip-release-has-webcapture'] += 1 return False - # TODO: for now, never update - self.counts['skip-update-disabled'] += 1 - return False + # Ok, if we got here then no existing web capture for (first) release, + # so go ahead and insert! + return True def insert_batch(self, batch): self.api.create_webcapture_auto_batch(fatcat_openapi_client.WebcaptureAutoBatch( -- cgit v1.2.3