summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-15 19:30:05 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-15 19:30:07 -0800
commitf7a75a019c9dee35542e6f92ec37937df36ff756 (patch)
treed74978d96598eb86044feb7435b8dde75bd2cb9e /python/fatcat_tools/importers
parent6bffa5d8938be66076ae514288a31693f9fefc77 (diff)
downloadfatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.tar.gz
fatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.zip
html ingest: small fixes to try_update() code path
Don't currently have test coverage for most try_update() code; run the inserts manually in testing.
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/ingest.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 4fbd19f1..1e04e712 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -480,9 +480,9 @@ class IngestWebResultImporter(IngestFileResultImporter):
def try_update(self, wc):
- # check for existing edits-in-progress with same file hash
+ # check for existing edits-in-progress with same URL
for other in self._entity_queue:
- if other.sha1 == wc.sha1:
+ if other.original_url == wc.original_url:
self.counts['skip-in-queue'] += 1
return False
@@ -501,9 +501,9 @@ class IngestWebResultImporter(IngestFileResultImporter):
self.counts['skip-release-has-webcapture'] += 1
return False
- # TODO: for now, never update
- self.counts['skip-update-disabled'] += 1
- return False
+ # Ok, if we got here then no existing web capture for (first) release,
+ # so go ahead and insert!
+ return True
def insert_batch(self, batch):
self.api.create_webcapture_auto_batch(fatcat_openapi_client.WebcaptureAutoBatch(