aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/persist.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 14:42:36 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 14:42:36 -0700
commit505bd30c6517f1544e0e0c5ab59515c2d3f73562 (patch)
treef0b6f55d426b68a7857326e0f1cdb317c657d72e /python/sandcrawler/persist.py
parent326bbc56cb01b87d58307785d2b2a6df09c0e3c2 (diff)
downloadsandcrawler-505bd30c6517f1544e0e0c5ab59515c2d3f73562.tar.gz
sandcrawler-505bd30c6517f1544e0e0c5ab59515c2d3f73562.zip
start adding python type annotations to db and persist code
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r--python/sandcrawler/persist.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index b714bc7..bb76e54 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -266,7 +266,8 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
self.result_to_html_meta(r) for r in batch if r.get('hit') and r.get('html_body')
]
if html_meta_batch:
- resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="update")
+ rows = [d.to_sql_tuple() for d in html_meta_batch]
+ resp = self.db.insert_html_meta(self.cur, rows, on_conflict="update")
self.counts['insert-html_meta'] += resp[0]
self.counts['update-html_meta'] += resp[1]
@@ -534,7 +535,8 @@ class PersistPdfTextWorker(SandcrawlerWorker):
self.counts['s3-put'] += 1
if not self.s3_only:
- resp = self.db.insert_pdf_meta(self.cur, parsed_batch, on_conflict="update")
+ rows = [r.to_sql_tuple() for r in parsed_batch]
+ resp = self.db.insert_pdf_meta(self.cur, rows, on_conflict="update")
self.counts['insert-pdf-meta'] += resp[0]
self.counts['update-pdf-meta'] += resp[1]