lint collection membership (last lint for now)

author: Bryan Newbold <bnewbold@archive.org> 2021-10-26 18:12:23 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2021-10-26 18:12:23 -0700
commit: 485dd2cfd120c52bbc5cc7745e44176d1003b40d (patch)
tree: 966bf78a4bd3cc1f6c94efb8fc3054a8a441dab0 /python/sandcrawler/persist.py
parent: 7087e7f65d8b81e29af44a43c1067bb2ec618c4e (diff)
download: sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.tar.gz
sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.zip
1 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index d47a8cb..8ec5979 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -86,7 +86,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
             raw['link_source_id'] = raw['fatcat']['release_ident']
 
         for k in ('ingest_type', 'base_url', 'link_source', 'link_source_id'):
-            if not k in raw:
+            if k not in raw:
                 self.counts['skip-request-fields'] += 1
                 return None
         if raw['ingest_type'] not in ('pdf', 'xml', 'html'):
@@ -120,10 +120,10 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
         if there is a problem with conversion, return None and set skip count
         """
         for k in ('request', 'hit', 'status'):
-            if not k in raw:
+            if k not in raw:
                 self.counts['skip-result-fields'] += 1
                 return None
-        if not 'base_url' in raw['request']:
+        if 'base_url' not in raw['request']:
             self.counts['skip-result-fields'] += 1
             return None
         ingest_type = raw['request'].get('ingest_type')
@@ -181,9 +181,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
         if there is a problem with conversion, return None and set skip count
         """
         for k in ('request', 'hit', 'status'):
-            if not k in raw:
+            if k not in raw:
                 return None
-        if not 'base_url' in raw['request']:
+        if 'base_url' not in raw['request']:
             return None
         ingest_type = raw['request'].get('ingest_type')
         if ingest_type not in ('dataset'):
author	Bryan Newbold <bnewbold@archive.org>	2021-10-26 18:12:23 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2021-10-26 18:12:23 -0700
commit	485dd2cfd120c52bbc5cc7745e44176d1003b40d (patch)
tree	966bf78a4bd3cc1f6c94efb8fc3054a8a441dab0 /python/sandcrawler/persist.py
parent	7087e7f65d8b81e29af44a43c1067bb2ec618c4e (diff)
download	sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.tar.gz sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.zip