aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-12-15 19:16:24 -0800
committerBryan Newbold <bnewbold@archive.org>2020-12-15 19:16:24 -0800
commit1b5370e29c484b3c0bc7ee9346bf81c9064d4db5 (patch)
treeb796345045cae8fff9526e73aeaaf1896ad27f3b
parentdb5df5c7de7e691c5aab01b3a12bd7308df23b8d (diff)
downloadsandcrawler-1b5370e29c484b3c0bc7ee9346bf81c9064d4db5.tar.gz
sandcrawler-1b5370e29c484b3c0bc7ee9346bf81c9064d4db5.zip
persist: html_meta is ON CONFLICT DO UPDATE
-rw-r--r--python/sandcrawler/persist.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index d840da2..0fd54a4 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -215,7 +215,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
html_meta_batch = [self.result_to_html_meta(r) for r in batch if r.get('hit') and r.get('html_body')]
if html_meta_batch:
- resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="nothing")
+ resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="update")
self.counts['insert-html_meta'] += resp[0]
self.counts['update-html_meta'] += resp[1]