diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-12-15 19:16:24 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-12-15 19:16:24 -0800 |
commit | 1b5370e29c484b3c0bc7ee9346bf81c9064d4db5 (patch) | |
tree | b796345045cae8fff9526e73aeaaf1896ad27f3b | |
parent | db5df5c7de7e691c5aab01b3a12bd7308df23b8d (diff) | |
download | sandcrawler-1b5370e29c484b3c0bc7ee9346bf81c9064d4db5.tar.gz sandcrawler-1b5370e29c484b3c0bc7ee9346bf81c9064d4db5.zip |
persist: html_meta is ON CONFLICT DO UPDATE
-rw-r--r-- | python/sandcrawler/persist.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index d840da2..0fd54a4 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -215,7 +215,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): html_meta_batch = [self.result_to_html_meta(r) for r in batch if r.get('hit') and r.get('html_body')] if html_meta_batch: - resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="nothing") + resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="update") self.counts['insert-html_meta'] += resp[0] self.counts['update-html_meta'] += resp[1] |