aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/persist.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 18:12:23 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 18:12:23 -0700
commit485dd2cfd120c52bbc5cc7745e44176d1003b40d (patch)
tree966bf78a4bd3cc1f6c94efb8fc3054a8a441dab0 /python/sandcrawler/persist.py
parent7087e7f65d8b81e29af44a43c1067bb2ec618c4e (diff)
downloadsandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.tar.gz
sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.zip
lint collection membership (last lint for now)
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r--python/sandcrawler/persist.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index d47a8cb..8ec5979 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -86,7 +86,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
raw['link_source_id'] = raw['fatcat']['release_ident']
for k in ('ingest_type', 'base_url', 'link_source', 'link_source_id'):
- if not k in raw:
+ if k not in raw:
self.counts['skip-request-fields'] += 1
return None
if raw['ingest_type'] not in ('pdf', 'xml', 'html'):
@@ -120,10 +120,10 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
if there is a problem with conversion, return None and set skip count
"""
for k in ('request', 'hit', 'status'):
- if not k in raw:
+ if k not in raw:
self.counts['skip-result-fields'] += 1
return None
- if not 'base_url' in raw['request']:
+ if 'base_url' not in raw['request']:
self.counts['skip-result-fields'] += 1
return None
ingest_type = raw['request'].get('ingest_type')
@@ -181,9 +181,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
if there is a problem with conversion, return None and set skip count
"""
for k in ('request', 'hit', 'status'):
- if not k in raw:
+ if k not in raw:
return None
- if not 'base_url' in raw['request']:
+ if 'base_url' not in raw['request']:
return None
ingest_type = raw['request'].get('ingest_type')
if ingest_type not in ('dataset'):