diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-15 19:30:05 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-15 19:30:07 -0800 |
commit | f7a75a019c9dee35542e6f92ec37937df36ff756 (patch) | |
tree | d74978d96598eb86044feb7435b8dde75bd2cb9e /python/fatcat_tools | |
parent | 6bffa5d8938be66076ae514288a31693f9fefc77 (diff) | |
download | fatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.tar.gz fatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.zip |
html ingest: small fixes to try_update() code path
Don't currently have test coverage for most try_update() code; run the
inserts manually in testing.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4fbd19f1..1e04e712 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -480,9 +480,9 @@ class IngestWebResultImporter(IngestFileResultImporter): def try_update(self, wc): - # check for existing edits-in-progress with same file hash + # check for existing edits-in-progress with same URL for other in self._entity_queue: - if other.sha1 == wc.sha1: + if other.original_url == wc.original_url: self.counts['skip-in-queue'] += 1 return False @@ -501,9 +501,9 @@ class IngestWebResultImporter(IngestFileResultImporter): self.counts['skip-release-has-webcapture'] += 1 return False - # TODO: for now, never update - self.counts['skip-update-disabled'] += 1 - return False + # Ok, if we got here then no existing web capture for (first) release, + # so go ahead and insert! + return True def insert_batch(self, batch): self.api.create_webcapture_auto_batch(fatcat_openapi_client.WebcaptureAutoBatch( |