diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-15 19:30:05 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-15 19:30:07 -0800 | 
| commit | f7a75a019c9dee35542e6f92ec37937df36ff756 (patch) | |
| tree | d74978d96598eb86044feb7435b8dde75bd2cb9e /python | |
| parent | 6bffa5d8938be66076ae514288a31693f9fefc77 (diff) | |
| download | fatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.tar.gz fatcat-f7a75a019c9dee35542e6f92ec37937df36ff756.zip | |
html ingest: small fixes to try_update() code path
Don't currently have test coverage for most try_update() code; run the
inserts manually in testing.
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 10 | 
1 files changed, 5 insertions, 5 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4fbd19f1..1e04e712 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -480,9 +480,9 @@ class IngestWebResultImporter(IngestFileResultImporter):      def try_update(self, wc): -        # check for existing edits-in-progress with same file hash +        # check for existing edits-in-progress with same URL          for other in self._entity_queue: -            if other.sha1 == wc.sha1: +            if other.original_url == wc.original_url:                  self.counts['skip-in-queue'] += 1                  return False @@ -501,9 +501,9 @@ class IngestWebResultImporter(IngestFileResultImporter):              self.counts['skip-release-has-webcapture'] += 1              return False -        # TODO: for now, never update -        self.counts['skip-update-disabled'] += 1 -        return False +        # Ok, if we got here then no existing web capture for (first) release, +        # so go ahead and insert! +        return True      def insert_batch(self, batch):          self.api.create_webcapture_auto_batch(fatcat_openapi_client.WebcaptureAutoBatch( | 
