diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 14:42:36 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 14:42:36 -0700 |
commit | 505bd30c6517f1544e0e0c5ab59515c2d3f73562 (patch) | |
tree | f0b6f55d426b68a7857326e0f1cdb317c657d72e /python/sandcrawler/persist.py | |
parent | 326bbc56cb01b87d58307785d2b2a6df09c0e3c2 (diff) | |
download | sandcrawler-505bd30c6517f1544e0e0c5ab59515c2d3f73562.tar.gz sandcrawler-505bd30c6517f1544e0e0c5ab59515c2d3f73562.zip |
start adding python type annotations to db and persist code
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r-- | python/sandcrawler/persist.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index b714bc7..bb76e54 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -266,7 +266,8 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): self.result_to_html_meta(r) for r in batch if r.get('hit') and r.get('html_body') ] if html_meta_batch: - resp = self.db.insert_html_meta(self.cur, html_meta_batch, on_conflict="update") + rows = [d.to_sql_tuple() for d in html_meta_batch] + resp = self.db.insert_html_meta(self.cur, rows, on_conflict="update") self.counts['insert-html_meta'] += resp[0] self.counts['update-html_meta'] += resp[1] @@ -534,7 +535,8 @@ class PersistPdfTextWorker(SandcrawlerWorker): self.counts['s3-put'] += 1 if not self.s3_only: - resp = self.db.insert_pdf_meta(self.cur, parsed_batch, on_conflict="update") + rows = [r.to_sql_tuple() for r in parsed_batch] + resp = self.db.insert_pdf_meta(self.cur, rows, on_conflict="update") self.counts['insert-pdf-meta'] += resp[0] self.counts['update-pdf-meta'] += resp[1] |