diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-17 11:10:36 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-17 11:10:36 -0700 |
commit | 3287f08a788107815f366019060a7cbcfe9505d2 (patch) | |
tree | 29a867d2cf84d116b26be37508d4ea6462dede88 /python/sandcrawler/persist.py | |
parent | 5a6bf449ac78586bf150216fe2310be178eeb6c3 (diff) | |
download | sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.tar.gz sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.zip |
workers: refactor to pass key to process()
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r-- | python/sandcrawler/persist.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index f2a4893..338cdfc 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -35,7 +35,7 @@ class PersistCdxWorker(SandcrawlerWorker): self.db = SandcrawlerPostgresClient(db_url) self.cur = self.db.conn.cursor() - def process(self, record): + def process(self, record, key=None): """ Only do batches (as transactions) """ @@ -60,7 +60,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): self.db = SandcrawlerPostgresClient(db_url) self.cur = self.db.conn.cursor() - def process(self, record): + def process(self, record, key=None): """ Only do batches (as transactions) """ @@ -203,7 +203,7 @@ class PersistIngestRequestWorker(PersistIngestFileResultWorker): def __init__(self, db_url, **kwargs): super().__init__(db_url=db_url) - def process(self, record): + def process(self, record, key=None): """ Only do batches (as transactions) """ @@ -243,7 +243,7 @@ class PersistGrobidWorker(SandcrawlerWorker): self.db_only = kwargs.get('db_only', False) assert not (self.s3_only and self.db_only), "Only one of s3_only and db_only allowed" - def process(self, record): + def process(self, record, key=None): """ Only do batches (as transactions) """ @@ -327,7 +327,7 @@ class PersistGrobidDiskWorker(SandcrawlerWorker): ) return obj_path - def process(self, record): + def process(self, record, key=None): if record.get('status_code') != 200 or not record.get('tei_xml'): return False @@ -347,7 +347,7 @@ class PersistPdfTrioWorker(SandcrawlerWorker): self.db = SandcrawlerPostgresClient(db_url) self.cur = self.db.conn.cursor() - def process(self, record): + def process(self, record, key=None): """ Only do batches (as transactions) """ |