diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 18:12:23 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 18:12:23 -0700 |
commit | 485dd2cfd120c52bbc5cc7745e44176d1003b40d (patch) | |
tree | 966bf78a4bd3cc1f6c94efb8fc3054a8a441dab0 /python/sandcrawler/persist.py | |
parent | 7087e7f65d8b81e29af44a43c1067bb2ec618c4e (diff) | |
download | sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.tar.gz sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.zip |
lint collection membership (last lint for now)
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r-- | python/sandcrawler/persist.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index d47a8cb..8ec5979 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -86,7 +86,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): raw['link_source_id'] = raw['fatcat']['release_ident'] for k in ('ingest_type', 'base_url', 'link_source', 'link_source_id'): - if not k in raw: + if k not in raw: self.counts['skip-request-fields'] += 1 return None if raw['ingest_type'] not in ('pdf', 'xml', 'html'): @@ -120,10 +120,10 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): if there is a problem with conversion, return None and set skip count """ for k in ('request', 'hit', 'status'): - if not k in raw: + if k not in raw: self.counts['skip-result-fields'] += 1 return None - if not 'base_url' in raw['request']: + if 'base_url' not in raw['request']: self.counts['skip-result-fields'] += 1 return None ingest_type = raw['request'].get('ingest_type') @@ -181,9 +181,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): if there is a problem with conversion, return None and set skip count """ for k in ('request', 'hit', 'status'): - if not k in raw: + if k not in raw: return None - if not 'base_url' in raw['request']: + if 'base_url' not in raw['request']: return None ingest_type = raw['request'].get('ingest_type') if ingest_type not in ('dataset'): |