From 1b5370e29c484b3c0bc7ee9346bf81c9064d4db5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 15 Dec 2020 19:16:24 -0800 Subject: persist: html_meta is ON CONFLICT DO UPDATE --- python/sandcrawler/persist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index d840da2..0fd54a4 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -215,7 +215,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): html_meta_batch = [self.result_to_html_meta(r) for r in batch if r.get('hit') and r.get('html_body')] if html_meta_batch: - resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="nothing") + resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="update") self.counts['insert-html_meta'] += resp[0] self.counts['update-html_meta'] += resp[1] -- cgit v1.2.3